Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add tests for meval to replicate paper results #605

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"auto_scores": [-3.212421178817749, -1.2346594333648682, -3.337571382522583, -6.383989334106445, -4.984112739562988, -3.6778671741485596, -3.930911064147949]}, {"auto_scores": [-3.6213812828063965, -0.634912371635437, -3.670194387435913, -5.574587821960449, -6.174976825714111, -4.187601089477539, -4.893369197845459]}, {"auto_scores": [-4.3441267013549805, -2.0739285945892334, -4.994687080383301, -5.674907207489014, -4.832435607910156, -4.986924171447754, -5.201959133148193]}, {"auto_scores": [-3.734975814819336, -3.2286689281463623, -3.826882839202881, -4.8123393058776855, -5.000977039337158, -5.222707748413086, -4.795951843261719]}, {"auto_scores": [-4.223236560821533, -4.719715595245361, -4.59467077255249, -5.68583869934082, -6.5522780418396, -5.388362407684326, -5.18690299987793]}, {"auto_scores": [-3.7510688304901123, -2.914438247680664, -4.286332130432129, -7.133133411407471, -5.809284210205078, -3.3124961853027344, -5.612478733062744]}, {"auto_scores": [-0.6680727005004883, -0.6739019155502319, -3.1031758785247803, -5.8908233642578125, -4.66937780380249, -2.0242176055908203, -4.522027492523193]}, {"auto_scores": [-4.329258441925049, -5.275319576263428, -5.579972743988037, -7.09144401550293, -5.283503532409668, -5.104097843170166, -4.941505432128906]}, {"auto_scores": [-2.8284058570861816, -1.1876786947250366, -3.3185195922851562, -4.423750400543213, -4.909323692321777, -3.3485019207000732, -3.490218162536621]}, {"auto_scores": [-6.907121181488037, -4.707708358764648, -4.0600481033325195, -6.627317905426025, -4.6124958992004395, -5.713040828704834, -4.879390239715576]}, {"auto_scores": [-3.482964515686035, -1.8555914163589478, -3.4856302738189697, -4.5765790939331055, -5.007498264312744, -5.3058695793151855, -4.370933532714844]}, {"auto_scores": [-3.649778127670288, -7.154506683349609, -3.289411783218384, -5.8764119148254395, -5.020175933837891, -4.205811023712158, -4.3719482421875]}, {"auto_scores": [-3.525455951690674, -0.6714977025985718, -3.5236685276031494, -5.575921058654785, -4.328283309936523, -4.421144008636475, -5.753363609313965]}, {"auto_scores": [-4.823455810546875, -3.305582284927368, -3.7563633918762207, -7.122200012207031, -6.407985687255859, -6.161899566650391, -6.045509338378906]}, {"auto_scores": [-4.271821022033691, -0.22897133231163025, -1.7710247039794922, -5.976954460144043, -4.617368221282959, -4.148235321044922, -5.564839839935303]}, {"auto_scores": [-4.172784328460693, -0.9746262431144714, -4.971782207489014, -7.950442790985107, -4.324660778045654, -5.159337043762207, -5.440691947937012]}, {"auto_scores": [-3.0683135986328125, -2.433345079421997, -2.8803658485412598, -5.805661678314209, -4.270867824554443, -3.3736610412597656, -5.132525444030762]}, {"auto_scores": [-1.7446292638778687, -0.6325616240501404, -2.5213329792022705, -4.796778202056885, -4.467785835266113, -3.71679949760437, -4.044307708740234]}, {"auto_scores": [-4.092418670654297, -3.5181899070739746, -4.59417200088501, -4.340042591094971, -5.503137111663818, -5.130063533782959, -4.715507507324219]}, {"auto_scores": [-1.9360358715057373, -1.2985289096832275, -1.7955436706542969, -5.797574996948242, -5.288994789123535, -2.147458076477051, -2.362152576446533]}, {"auto_scores": [-3.1864800453186035, -7.8803300857543945, -3.383310079574585, -5.561729431152344, -5.314629554748535, -3.5953166484832764, -5.051906108856201]}, {"auto_scores": [-3.8382568359375, -1.2670823335647583, -4.553617000579834, -5.867466449737549, -4.707453727722168, -5.538605213165283, -4.538035869598389]}, {"auto_scores": [-3.8993802070617676, -3.60168719291687, -3.857508659362793, -4.424567699432373, -5.072203636169434, -4.495748519897461, -4.477273464202881]}, {"auto_scores": [-1.950005054473877, -0.7109898924827576, -3.2378036975860596, -5.4950456619262695, -4.684595108032227, -1.7371671199798584, -4.209641933441162]}, {"auto_scores": [-4.496479034423828, -2.556669235229492, -4.498341083526611, -5.729393005371094, -5.359912872314453, -5.263284206390381, -6.106546401977539]}, {"auto_scores": [-4.054862976074219, -0.45988163352012634, -4.6832661628723145, -4.341195106506348, -5.400857448577881, -4.356181621551514, -5.217522144317627]}, {"auto_scores": [-5.333022117614746, -1.9357249736785889, -4.021650314331055, -5.890506267547607, -4.706431865692139, -5.232513904571533, -5.546692371368408]}, {"auto_scores": [-3.3256938457489014, -0.9897561073303223, -4.210707664489746, -5.635035037994385, -5.4662861824035645, -3.871394157409668, -5.324164390563965]}, {"auto_scores": [-2.717494487762451, -3.7315330505371094, -3.955324172973633, -4.491107940673828, -4.338680744171143, -3.963266372680664, -4.681497573852539]}, {"auto_scores": [-0.8609310984611511, -1.1239956617355347, -1.3126013278961182, -4.678332805633545, -4.1132965087890625, -2.218703269958496, -3.150106906890869]}, {"auto_scores": [-3.643179416656494, -0.9851183891296387, -3.833064317703247, -4.648499011993408, -4.927526950836182, -4.354979038238525, -5.20051383972168]}, {"auto_scores": [-3.9338979721069336, -3.090111494064331, -4.448740482330322, -3.902815818786621, -4.836552619934082, -3.8867716789245605, -5.490387439727783]}, {"auto_scores": [-1.1862435340881348, -1.603387713432312, -4.425495147705078, -4.629676342010498, -6.500186920166016, -1.749828577041626, -3.9402852058410645]}, {"auto_scores": [-3.261005401611328, -0.7430241703987122, -4.377554416656494, -5.855930805206299, -5.323549747467041, -4.947967529296875, -4.609178066253662]}, {"auto_scores": [-2.8447532653808594, -0.9991836547851562, -3.883254051208496, -7.081796646118164, -5.052463531494141, -4.451764106750488, -4.260918140411377]}, {"auto_scores": [-2.7264280319213867, -1.1419219970703125, -1.1307411193847656, -5.69485330581665, -5.184634208679199, -2.312387228012085, -2.2400753498077393]}, {"auto_scores": [-3.8610730171203613, -1.3325031995773315, -3.223046064376831, -5.786430835723877, -5.001805305480957, -4.727075576782227, -4.931162357330322]}, {"auto_scores": [-3.1031854152679443, -2.283616065979004, -3.1693084239959717, -5.209831714630127, -5.4373955726623535, -3.6447370052337646, -5.161511421203613]}, {"auto_scores": [-2.831552505493164, -1.4173871278762817, -2.1985905170440674, -4.8720808029174805, -3.425079345703125, -2.933311700820923, -4.249796390533447]}, {"auto_scores": [-2.482973575592041, -0.8178629279136658, -3.3884518146514893, -4.180578231811523, -4.87364387512207, -1.7275251150131226, -3.002310037612915]}, {"auto_scores": [-3.5216124057769775, -1.315640926361084, -4.976970195770264, -4.588405132293701, -4.988601207733154, -4.338250160217285, -4.258938789367676]}, {"auto_scores": [-2.2196450233459473, -0.6448060274124146, -4.11163330078125, -8.273467063903809, -3.422013521194458, -2.428804397583008, -4.367648601531982]}, {"auto_scores": [-2.8822271823883057, -1.338028073310852, -2.5680463314056396, -5.671765327453613, -3.6491050720214844, -3.923588752746582, -3.6876883506774902]}, {"auto_scores": [-3.0493404865264893, -1.0289462804794312, -2.6373345851898193, -5.77088737487793, -4.87874174118042, -5.060049057006836, -5.282394886016846]}, {"auto_scores": [-4.5092267990112305, -2.1672258377075195, -5.196324348449707, -4.173991680145264, -5.770926475524902, -4.504154205322266, -5.454038619995117]}, {"auto_scores": [-3.9801697731018066, -1.4928231239318848, -4.76878547668457, -4.193707466125488, -4.717602252960205, -4.891543388366699, -4.786087989807129]}, {"auto_scores": [-4.327728271484375, -5.265812873840332, -4.09039831161499, -5.932196140289307, -5.382634162902832, -4.686864376068115, -5.1508002281188965]}, {"auto_scores": [-3.269033432006836, -4.5389723777771, -4.005921840667725, -6.995456695556641, -4.278707981109619, -3.5476715564727783, -4.695621490478516]}, {"auto_scores": [-3.557901620864868, -2.4186339378356934, -4.851772785186768, -4.371306896209717, -5.103922367095947, -4.333691596984863, -4.298647880554199]}, {"auto_scores": [-4.055961608886719, -4.662075519561768, -3.473954439163208, -7.5713982582092285, -6.1275200843811035, -5.409901142120361, -5.5505499839782715]}, {"auto_scores": [-3.688018798828125, -3.572950839996338, -4.312163352966309, -4.746547698974609, -4.910508155822754, -4.222039222717285, -4.782878398895264]}, {"auto_scores": [-3.908001661300659, -1.983359932899475, -4.276918411254883, -5.791119575500488, -5.717682361602783, -4.877152442932129, -4.80209493637085]}, {"auto_scores": [-3.632587194442749, -3.0536139011383057, -4.381106376647949, -5.96414852142334, -4.910465240478516, -3.78712797164917, -5.2252631187438965]}, {"auto_scores": [-3.524134635925293, -1.4702116250991821, -3.7997453212738037, -6.277534484863281, -5.421000003814697, -4.504569053649902, -4.773496150970459]}, {"auto_scores": [-3.3854877948760986, -4.794503211975098, -4.626842498779297, -4.996484756469727, -5.472954273223877, -4.742428302764893, -6.0292253494262695]}, {"auto_scores": [-3.2534337043762207, -1.3036439418792725, -3.159641981124878, -6.059638023376465, -4.791782379150391, -4.999390125274658, -5.129875659942627]}, {"auto_scores": [-3.7950146198272705, -1.0038492679595947, -3.8807947635650635, -6.902472496032715, -5.115923881530762, -4.9170732498168945, -4.917073726654053]}, {"auto_scores": [-4.213415622711182, -4.250669002532959, -3.8901631832122803, -5.633732318878174, -5.176709175109863, -5.110617637634277, -5.267539024353027]}, {"auto_scores": [-4.235969543457031, -2.604471445083618, -4.817269802093506, -6.817267894744873, -4.734187126159668, -4.550678253173828, -4.019895076751709]}, {"auto_scores": [-4.271620273590088, -3.7753279209136963, -4.285919189453125, -6.411378860473633, -4.8667378425598145, -4.203290939331055, -4.631669521331787]}]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[{"auto_scores": [0.47059, 0.9434, 0.21918, 0.05263, 0.19781, 0.22857, 0.21539]}, {"auto_scores": [0.31667, 0.97561, 0.54545, 0.07408, 0.17858, 0.18519, 0.38554]}, {"auto_scores": [0.21052, 0.9, 0.16439, 0.05555, 0.28572, 0.28571, 0.12598]}, {"auto_scores": [0.12372, 0.75, 0.04167, 0.0, 0.09411, 0.15686, 0.12698]}, {"auto_scores": [0.12281, 0.72, 0.10811, 0.0, 0.11628, 0.12766, 0.10526]}, {"auto_scores": [0.2029, 0.85714, 0.23188, 0.15, 0.25715, 0.17241, 0.17647]}, {"auto_scores": [0.64285, 1.0, 0.37255, 0.0, 0.32117, 0.77894, 0.6]}, {"auto_scores": [0.12903, 0.66667, 0.15385, 0.0, 0.10526, 0.11111, 0.11111]}, {"auto_scores": [0.52941, 0.95652, 0.19672, 0.0, 0.16667, 0.53731, 0.45569]}, {"auto_scores": [0.09756, 0.67692, 0.10126, 0.04445, 0.13158, 0.07895, 0.12599]}, {"auto_scores": [0.2078, 0.90909, 0.21739, 0.07407, 0.15001, 0.20833, 0.11321]}, {"auto_scores": [0.08, 0.43479, 0.15686, 0.08334, 0.09412, 0.11765, 0.1282]}, {"auto_scores": [0.34951, 1.0, 0.64285, 0.21053, 0.4, 0.11594, 0.1039]}, {"auto_scores": [0.10937, 0.66667, 0.07142, 0.08333, 0.10126, 0.0, 0.03252]}, {"auto_scores": [0.12903, 1.0, 0.82051, 0.0, 0.16667, 0.43243, 0.33614]}, {"auto_scores": [0.09449, 0.96552, 0.14492, 0.0, 0.07577, 0.3077, 0.06061]}, {"auto_scores": [0.26373, 0.9, 0.33334, 0.0, 0.12612, 0.17544, 0.12903]}, {"auto_scores": [0.33708, 0.98529, 0.26263, 0.02381, 0.34783, 0.5, 0.24242]}, {"auto_scores": [0.18182, 0.66667, 0.15385, 0.0, 0.0, 0.16666, 0.11111]}, {"auto_scores": [0.6055, 0.93507, 0.73333, 0.04545, 0.46729, 0.85715, 0.73333]}, {"auto_scores": [0.0, 0.33333, 0.0, 0.0, 0.0, 0.0, 0.0]}, {"auto_scores": [0.2, 0.97297, 0.21875, 0.0, 0.47369, 0.11111, 0.21818]}, {"auto_scores": [0.25862, 0.70968, 0.2, 0.03636, 0.21539, 0.28948, 0.30769]}, {"auto_scores": [0.48855, 0.98462, 0.23684, 0.09524, 0.17778, 0.76923, 0.40876]}, {"auto_scores": [0.08265, 0.78572, 0.04445, 0.0, 0.08602, 0.03637, 0.075]}, {"auto_scores": [0.22609, 1.0, 0.1608, 0.03509, 0.125, 0.17894, 0.15384]}, {"auto_scores": [0.27118, 0.93549, 0.20513, 0.0, 0.30107, 0.27692, 0.21782]}, {"auto_scores": [0.42222, 0.97436, 0.24, 0.0, 0.26415, 0.73077, 0.29033]}, {"auto_scores": [0.22535, 0.875, 0.3077, 0.0, 0.18182, 0.14634, 0.13333]}, {"auto_scores": [0.49006, 1.0, 0.59259, 0.09091, 0.51807, 0.425, 0.59649]}, {"auto_scores": [0.11764, 0.91892, 0.28169, 0.11428, 0.26966, 0.19231, 0.11429]}, {"auto_scores": [0.07595, 0.82927, 0.14706, 0.0, 0.11111, 0.025, 0.09302]}, {"auto_scores": [0.69822, 0.98148, 0.24096, 0.04838, 0.22377, 0.5, 0.35616]}, {"auto_scores": [0.42857, 0.98182, 0.26471, 0.16666, 0.20472, 0.16129, 0.18182]}, {"auto_scores": [0.58252, 0.9697, 0.12821, 0.05405, 0.26471, 0.21621, 0.25]}, {"auto_scores": [0.61017, 0.98591, 1.0, 0.16394, 0.34568, 0.95652, 1.0]}, {"auto_scores": [0.43678, 0.95652, 0.51613, 0.10526, 0.14545, 0.14545, 0.19048]}, {"auto_scores": [0.28572, 0.91892, 0.20896, 0.11428, 0.19672, 0.28571, 0.19672]}, {"auto_scores": [0.38938, 0.97675, 0.83019, 0.0, 0.37624, 0.56604, 0.35088]}, {"auto_scores": [0.63889, 0.97872, 0.15873, 0.05263, 0.07843, 0.74193, 0.74193]}, {"auto_scores": [0.12195, 0.96296, 0.21818, 0.2069, 0.17242, 0.17242, 0.24]}, {"auto_scores": [0.33463, 0.99479, 0.09216, 0.0, 0.31372, 0.28444, 0.41216]}, {"auto_scores": [0.42718, 0.95833, 0.1, 0.0, 0.15152, 0.10345, 0.4466]}, {"auto_scores": [0.31343, 0.97675, 0.47619, 0.0, 0.14458, 0.16667, 0.10526]}, {"auto_scores": [0.11494, 0.93333, 0.2, 0.0, 0.11112, 0.05, 0.0708]}, {"auto_scores": [0.17204, 0.95, 0.18749, 0.05556, 0.16667, 0.06667, 0.1791]}, {"auto_scores": [0.16901, 0.73077, 0.28986, 0.0, 0.19355, 0.09375, 0.225]}, {"auto_scores": [0.18868, 0.78788, 0.19179, 0.15, 0.24657, 0.1695, 0.17143]}, {"auto_scores": [0.1519, 0.84615, 0.09677, 0.06667, 0.03884, 0.07844, 0.07273]}, {"auto_scores": [0.12307, 0.7317, 0.19047, 0.0, 0.1282, 0.09836, 0.11594]}, {"auto_scores": [0.08772, 0.78261, 0.08696, 0.07143, 0.04706, 0.14545, 0.10666]}, {"auto_scores": [0.20833, 0.91666, 0.24, 0.16666, 0.13636, 0.17857, 0.22222]}, {"auto_scores": [0.08, 0.78431, 0.19178, 0.0, 0.26087, 0.28986, 0.20619]}, {"auto_scores": [0.16868, 0.875, 0.17778, 0.0, 0.28, 0.28, 0.20589]}, {"auto_scores": [0.12371, 0.69565, 0.06558, 0.06667, 0.05797, 0.07692, 0.05633]}, {"auto_scores": [0.20155, 0.93333, 0.2623, 0.0, 0.14457, 0.13793, 0.18666]}, {"auto_scores": [0.15385, 0.97143, 0.16326, 0.25806, 0.19178, 0.09091, 0.09091]}, {"auto_scores": [0.19672, 0.79166, 0.21538, 0.0, 0.10204, 0.21538, 0.18182]}, {"auto_scores": [0.28948, 0.85714, 0.11941, 0.36364, 0.31579, 0.3, 0.2353]}, {"auto_scores": [0.20833, 0.90323, 0.21538, 0.16, 0.10417, 0.10526, 0.13333]}]
Loading