MrLight commited on
Commit
c6b9166
1 Parent(s): 99566ca

Update results.json

Browse files
Files changed (1) hide show
  1. results.json +1 -1
results.json CHANGED
@@ -1 +1 @@
1
- {"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.782, "ndcg_at_3": 0.82981, "ndcg_at_5": 0.84375, "ndcg_at_10": 0.8587, "ndcg_at_20": 0.8648, "ndcg_at_100": 0.87028, "ndcg_at_1000": 0.87086, "map_at_1": 0.782, "map_at_3": 0.818, "map_at_5": 0.8257, "map_at_10": 0.83191, "map_at_20": 0.8336, "map_at_100": 0.83446, "map_at_1000": 0.83449, "recall_at_1": 0.782, "recall_at_3": 0.864, "recall_at_5": 0.898, "recall_at_10": 0.944, "recall_at_20": 0.968, "recall_at_100": 0.996, "recall_at_1000": 1.0, "precision_at_1": 0.782, "precision_at_3": 0.288, "precision_at_5": 0.1796, "precision_at_10": 0.0944, "precision_at_20": 0.0484, "precision_at_100": 0.00996, "precision_at_1000": 0.001, "mrr_at_1": 0.78, "mrr_at_3": 0.8166666666666667, "mrr_at_5": 0.8248666666666664, "mrr_at_10": 0.8307190476190476, "mrr_at_20": 0.8325789557192032, "mrr_at_100": 0.8334424012781603, "mrr_at_1000": 0.8334778187538883, "naucs_at_1_max": 0.44742507810931065, "naucs_at_1_std": -0.3467212770230202, "naucs_at_1_diff1": 0.8844169005664965, "naucs_at_3_max": 0.4788715366672357, "naucs_at_3_std": -0.39301359731467017, "naucs_at_3_diff1": 0.8540066564260118, "naucs_at_5_max": 0.45302949652172225, "naucs_at_5_std": -0.501238964042512, "naucs_at_5_diff1": 0.835731722985994, "naucs_at_10_max": 0.4593337334934008, "naucs_at_10_std": -0.462168200613585, "naucs_at_10_diff1": 0.8597939175670272, "naucs_at_20_max": 0.3856500933706777, "naucs_at_20_std": -0.37164449112980097, "naucs_at_20_diff1": 0.852240896358544, "naucs_at_100_max": 0.4225023342669959, "naucs_at_100_std": -0.3699813258636757, "naucs_at_100_diff1": 0.9346405228758466, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.46563, "ndcg_at_3": 0.54097, "ndcg_at_5": 0.56463, "ndcg_at_10": 0.58516, "ndcg_at_20": 0.60242, "ndcg_at_100": 0.62688, "ndcg_at_1000": 0.63783, "map_at_1": 0.46563, "map_at_3": 0.52254, "map_at_5": 0.53562, "map_at_10": 0.54436, "map_at_20": 0.54904, "map_at_100": 0.55233, "map_at_1000": 0.55276, "recall_at_1": 0.46563, "recall_at_3": 0.59424, "recall_at_5": 0.65188, "recall_at_10": 0.71397, "recall_at_20": 0.78271, "recall_at_100": 0.91574, "recall_at_1000": 1.0, "precision_at_1": 0.46563, "precision_at_3": 0.19808, "precision_at_5": 0.13038, "precision_at_10": 0.0714, "precision_at_20": 0.03914, "precision_at_100": 0.00916, "precision_at_1000": 0.001, "mrr_at_1": 0.4656319290465632, "mrr_at_3": 0.5229120473022912, "mrr_at_5": 0.5365484109386548, "mrr_at_10": 0.5447277654594725, "mrr_at_20": 0.5493058156436063, "mrr_at_100": 0.5524490255309318, "mrr_at_1000": 0.5528774197374761, "naucs_at_1_max": -0.1292959584935956, "naucs_at_1_std": -0.03510981553583776, "naucs_at_1_diff1": 0.7126693919953404, "naucs_at_3_max": -0.21561989661410058, "naucs_at_3_std": -0.014060839538205696, "naucs_at_3_diff1": 0.6019328085184139, "naucs_at_5_max": -0.16671887155888676, "naucs_at_5_std": 0.05018778556139546, "naucs_at_5_diff1": 0.5917975017550882, "naucs_at_10_max": -0.13542996456491116, "naucs_at_10_std": 0.11591288538108199, "naucs_at_10_diff1": 0.5825255382689951, "naucs_at_20_max": -0.1415677503942358, "naucs_at_20_std": 0.22303762425580564, "naucs_at_20_diff1": 0.553963541857152, "naucs_at_100_max": -0.25629451591291, "naucs_at_100_std": 0.5675008878796466, "naucs_at_100_diff1": 0.4369090393282568, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.80972, "ndcg_at_3": 0.86194, "ndcg_at_5": 0.8777, "ndcg_at_10": 0.8831, "ndcg_at_20": 0.88719, "ndcg_at_100": 0.89133, "ndcg_at_1000": 0.89239, "map_at_1": 0.80972, "map_at_3": 0.84919, "map_at_5": 0.85789, "map_at_10": 0.86022, "map_at_20": 0.86133, "map_at_100": 0.8619, "map_at_1000": 0.86194, "recall_at_1": 0.80972, "recall_at_3": 0.89879, "recall_at_5": 0.93725, "recall_at_10": 0.95344, "recall_at_20": 0.96964, "recall_at_100": 0.9919, "recall_at_1000": 1.0, "precision_at_1": 0.80972, "precision_at_3": 0.2996, "precision_at_5": 0.18745, "precision_at_10": 0.09534, "precision_at_20": 0.04848, "precision_at_100": 0.00992, "precision_at_1000": 0.001, "mrr_at_1": 0.8097165991902834, "mrr_at_3": 0.848515519568151, "mrr_at_5": 0.8577260458839403, "mrr_at_10": 0.860326296510507, "mrr_at_20": 0.8612999573525888, "mrr_at_100": 0.8618786331293616, "mrr_at_1000": 0.8619216766315373, "naucs_at_1_max": 0.33733350430585324, "naucs_at_1_std": -0.15038270562159095, "naucs_at_1_diff1": 0.8917076612158314, "naucs_at_3_max": 0.33015296696282664, "naucs_at_3_std": -0.1433967026981579, "naucs_at_3_diff1": 0.8358887326202742, "naucs_at_5_max": 0.45303441221019186, "naucs_at_5_std": 0.039306942809813596, "naucs_at_5_diff1": 0.8678211411133314, "naucs_at_10_max": 0.5059450246252336, "naucs_at_10_std": 0.20537063834609695, "naucs_at_10_diff1": 0.8719594745464425, "naucs_at_20_max": 0.6482879525826721, "naucs_at_20_std": 0.4910824957216253, "naucs_at_20_diff1": 0.8864405546443311, "naucs_at_100_max": 0.5671541640947649, "naucs_at_100_std": 0.9673496364838197, "naucs_at_100_diff1": 0.9673496364838197, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.88214, "ndcg_at_3": 0.9244, "ndcg_at_5": 0.93056, "ndcg_at_10": 0.93787, "ndcg_at_20": 0.93967, "ndcg_at_100": 0.94029, "ndcg_at_1000": 0.94029, "map_at_1": 0.88214, "map_at_3": 0.91429, "map_at_5": 0.91786, "map_at_10": 0.92111, "map_at_20": 0.9216, "map_at_100": 0.92166, "map_at_1000": 0.92166, "recall_at_1": 0.88214, "recall_at_3": 0.95357, "recall_at_5": 0.96786, "recall_at_10": 0.98929, "recall_at_20": 0.99643, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.88214, "precision_at_3": 0.31786, "precision_at_5": 0.19357, "precision_at_10": 0.09893, "precision_at_20": 0.04982, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8857142857142857, "mrr_at_3": 0.9160714285714285, "mrr_at_5": 0.9196428571428571, "mrr_at_10": 0.9229761904761907, "mrr_at_20": 0.9234863945578232, "mrr_at_100": 0.9235501700680274, "mrr_at_1000": 0.9235501700680274, "naucs_at_1_max": 0.8153595242906209, "naucs_at_1_std": -0.08425944635279732, "naucs_at_1_diff1": 0.8695984758825744, "naucs_at_3_max": 0.8902894491129786, "naucs_at_3_std": -0.09850606909430229, "naucs_at_3_diff1": 0.8082309847015758, "naucs_at_5_max": 0.9419026870007242, "naucs_at_5_std": -0.4513435003631115, "naucs_at_5_diff1": 0.8137773627969682, "naucs_at_10_max": 0.9564270152505505, "naucs_at_10_std": -0.12729536258947582, "naucs_at_10_diff1": 0.7642390289449155, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 0.5541549953314753, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.53463, "ndcg_at_3": 0.66112, "ndcg_at_5": 0.69108, "ndcg_at_10": 0.71691, "ndcg_at_20": 0.72967, "ndcg_at_100": 0.7372, "ndcg_at_1000": 0.73831, "map_at_1": 0.53463, "map_at_3": 0.62971, "map_at_5": 0.64629, "map_at_10": 0.65725, "map_at_20": 0.66083, "map_at_100": 0.662, "map_at_1000": 0.66205, "recall_at_1": 0.53463, "recall_at_3": 0.75213, "recall_at_5": 0.82503, "recall_at_10": 0.9034, "recall_at_20": 0.95322, "recall_at_100": 0.9921, "recall_at_1000": 1.0, "precision_at_1": 0.53463, "precision_at_3": 0.25071, "precision_at_5": 0.16501, "precision_at_10": 0.09034, "precision_at_20": 0.04766, "precision_at_100": 0.00992, "precision_at_1000": 0.001, "mrr_at_1": 0.5321992709599028, "mrr_at_3": 0.6287970838396124, "mrr_at_5": 0.6452916160388834, "mrr_at_10": 0.6561307836216711, "mrr_at_20": 0.6598389266958162, "mrr_at_100": 0.66101941789965, "mrr_at_1000": 0.6610768683790884, "naucs_at_1_max": 0.07409527997140894, "naucs_at_1_std": -0.26047752640418215, "naucs_at_1_diff1": 0.72204494772842, "naucs_at_3_max": 0.04003305577617401, "naucs_at_3_std": -0.26947639508637483, "naucs_at_3_diff1": 0.5974129781453887, "naucs_at_5_max": 0.07754372151617528, "naucs_at_5_std": -0.20433194072434457, "naucs_at_5_diff1": 0.585370891111698, "naucs_at_10_max": 0.22439176753639506, "naucs_at_10_std": -0.013478263230933481, "naucs_at_10_diff1": 0.5327286719899367, "naucs_at_20_max": 0.19200373124090117, "naucs_at_20_std": 0.2687201803616548, "naucs_at_20_diff1": 0.5032897844192747, "naucs_at_100_max": 0.20865776577292058, "naucs_at_100_std": 0.9010980513802566, "naucs_at_100_diff1": 0.5088839877584361, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.68, "ndcg_at_3": 0.79964, "ndcg_at_5": 0.82803, "ndcg_at_10": 0.84065, "ndcg_at_20": 0.84305, "ndcg_at_100": 0.84305, "ndcg_at_1000": 0.84305, "map_at_1": 0.68, "map_at_3": 0.77167, "map_at_5": 0.78717, "map_at_10": 0.79219, "map_at_20": 0.79278, "map_at_100": 0.79278, "map_at_1000": 0.79278, "recall_at_1": 0.68, "recall_at_3": 0.88, "recall_at_5": 0.95, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.68, "precision_at_3": 0.29333, "precision_at_5": 0.19, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.71, "mrr_at_3": 0.7883333333333333, "mrr_at_5": 0.8043333333333333, "mrr_at_10": 0.8093611111111113, "mrr_at_20": 0.8099861111111113, "mrr_at_100": 0.8099861111111113, "mrr_at_1000": 0.8099861111111113, "naucs_at_1_max": 0.3441019970976431, "naucs_at_1_std": -0.18704650680671775, "naucs_at_1_diff1": 0.6837640798839055, "naucs_at_3_max": 0.4518374164810674, "naucs_at_3_std": -0.2021953547566022, "naucs_at_3_diff1": 0.4876312440343619, "naucs_at_5_max": 0.30214752567694186, "naucs_at_5_std": -0.19066293183939664, "naucs_at_5_diff1": 0.5589169000933724, "naucs_at_10_max": 0.5541549953314738, "naucs_at_10_std": 0.12278244631185926, "naucs_at_10_diff1": 0.8692810457516413, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.94893, "ndcg_at_5": 0.95754, "ndcg_at_10": 0.95754, "ndcg_at_20": 0.95999, "ndcg_at_100": 0.95999, "ndcg_at_1000": 0.95999, "map_at_1": 0.92, "map_at_3": 0.94167, "map_at_5": 0.94667, "map_at_10": 0.94667, "map_at_20": 0.94729, "map_at_100": 0.94729, "map_at_1000": 0.94729, "recall_at_1": 0.92, "recall_at_3": 0.97, "recall_at_5": 0.99, "recall_at_10": 0.99, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32333, "precision_at_5": 0.198, "precision_at_10": 0.099, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.93, "mrr_at_3": 0.9466666666666668, "mrr_at_5": 0.9516666666666667, "mrr_at_10": 0.9516666666666667, "mrr_at_20": 0.9522916666666668, "mrr_at_100": 0.9522916666666668, "mrr_at_1000": 0.9522916666666668, "naucs_at_1_max": 0.6212651727357608, "naucs_at_1_std": -0.3262138188608764, "naucs_at_1_diff1": 0.8748249299719901, "naucs_at_3_max": 0.6374105197634555, "naucs_at_3_std": -0.8266417678182336, "naucs_at_3_diff1": 0.9074074074074082, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.5541549953314738, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.84, "ndcg_at_3": 0.89655, "ndcg_at_5": 0.90472, "ndcg_at_10": 0.91089, "ndcg_at_20": 0.91601, "ndcg_at_100": 0.91601, "ndcg_at_1000": 0.91601, "map_at_1": 0.84, "map_at_3": 0.88167, "map_at_5": 0.88617, "map_at_10": 0.88853, "map_at_20": 0.88996, "map_at_100": 0.88996, "map_at_1000": 0.88996, "recall_at_1": 0.84, "recall_at_3": 0.94, "recall_at_5": 0.96, "recall_at_10": 0.98, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.84, "precision_at_3": 0.31333, "precision_at_5": 0.192, "precision_at_10": 0.098, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.84, "mrr_at_3": 0.8833333333333334, "mrr_at_5": 0.8878333333333334, "mrr_at_10": 0.8905119047619049, "mrr_at_20": 0.8921352813852814, "mrr_at_100": 0.8921352813852814, "mrr_at_1000": 0.8921352813852814, "naucs_at_1_max": 0.5559339715267544, "naucs_at_1_std": -0.5277061855670094, "naucs_at_1_diff1": 0.9355670103092778, "naucs_at_3_max": 0.526221599751008, "naucs_at_3_std": -0.34920634920634924, "naucs_at_3_diff1": 0.9319172113289744, "naucs_at_5_max": 0.35877684407096394, "naucs_at_5_std": -0.511671335200739, "naucs_at_5_diff1": 0.9305555555555542, "naucs_at_10_max": 0.6381886087768404, "naucs_at_10_std": -0.21475256769374024, "naucs_at_10_diff1": 0.861111111111116, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.89, "ndcg_at_3": 0.92786, "ndcg_at_5": 0.94377, "ndcg_at_10": 0.94666, "ndcg_at_20": 0.94666, "ndcg_at_100": 0.94666, "ndcg_at_1000": 0.94666, "map_at_1": 0.89, "map_at_3": 0.92, "map_at_5": 0.9285, "map_at_10": 0.9295, "map_at_20": 0.9295, "map_at_100": 0.9295, "map_at_1000": 0.9295, "recall_at_1": 0.89, "recall_at_3": 0.95, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.89, "precision_at_3": 0.31667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.89, "mrr_at_3": 0.92, "mrr_at_5": 0.9285000000000001, "mrr_at_10": 0.9296111111111113, "mrr_at_20": 0.9296111111111113, "mrr_at_100": 0.9296111111111113, "mrr_at_1000": 0.9296111111111113, "naucs_at_1_max": 0.6429430461300578, "naucs_at_1_std": -0.4849239756034711, "naucs_at_1_diff1": 0.9007817197835238, "naucs_at_3_max": 0.6070028011204466, "naucs_at_3_std": -0.7213818860877673, "naucs_at_3_diff1": 0.9183006535947692, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.7222222222222276, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.94809, "ndcg_at_5": 0.94809, "ndcg_at_10": 0.94809, "ndcg_at_20": 0.94809, "ndcg_at_100": 0.94809, "ndcg_at_1000": 0.94809, "map_at_1": 0.87, "map_at_3": 0.93, "map_at_5": 0.93, "map_at_10": 0.93, "map_at_20": 0.93, "map_at_100": 0.93, "map_at_1000": 0.93, "recall_at_1": 0.87, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.93, "mrr_at_5": 0.93, "mrr_at_10": 0.93, "mrr_at_20": 0.93, "mrr_at_100": 0.93, "mrr_at_1000": 0.93, "naucs_at_1_max": 0.6636303043768056, "naucs_at_1_std": 0.017144338295194392, "naucs_at_1_diff1": 0.8868769902984519, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}
 
1
+ {"vidore/arxivqa_test_subsampled": {"ndcg_at_1": 0.794, "ndcg_at_3": 0.84612, "ndcg_at_5": 0.85584, "ndcg_at_10": 0.86884, "ndcg_at_20": 0.87552, "ndcg_at_100": 0.88006, "ndcg_at_1000": 0.88061, "map_at_1": 0.794, "map_at_3": 0.83367, "map_at_5": 0.83897, "map_at_10": 0.84437, "map_at_20": 0.84626, "map_at_100": 0.84692, "map_at_1000": 0.84695, "recall_at_1": 0.794, "recall_at_3": 0.882, "recall_at_5": 0.906, "recall_at_10": 0.946, "recall_at_20": 0.972, "recall_at_100": 0.996, "recall_at_1000": 1.0, "precision_at_1": 0.794, "precision_at_3": 0.294, "precision_at_5": 0.1812, "precision_at_10": 0.0946, "precision_at_20": 0.0486, "precision_at_100": 0.00996, "precision_at_1000": 0.001, "mrr_at_1": 0.794, "mrr_at_3": 0.8333333333333334, "mrr_at_5": 0.8385333333333332, "mrr_at_10": 0.8440206349206347, "mrr_at_20": 0.8457624946948473, "mrr_at_100": 0.8464318523346234, "mrr_at_1000": 0.8464586755347477, "naucs_at_1_max": 0.0264204919913496, "naucs_at_1_std": -0.4432218159450506, "naucs_at_1_diff1": 0.8754399753955507, "naucs_at_3_max": -0.08298212469117683, "naucs_at_3_std": -0.5738264787094874, "naucs_at_3_diff1": 0.8379192300860663, "naucs_at_5_max": -0.07192522399030574, "naucs_at_5_std": -0.5486421518962216, "naucs_at_5_diff1": 0.8177285098436528, "naucs_at_10_max": -0.30577169139260696, "naucs_at_10_std": -0.7531728740879055, "naucs_at_10_diff1": 0.8244112459798737, "naucs_at_20_max": -0.2807456315859826, "naucs_at_20_std": -0.4205348806189298, "naucs_at_20_diff1": 0.882953181272511, "naucs_at_100_max": -1.7399626517274025, "naucs_at_100_std": 0.34897292250231704, "naucs_at_100_diff1": 0.8611111111111168, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/docvqa_test_subsampled": {"ndcg_at_1": 0.48337, "ndcg_at_3": 0.55403, "ndcg_at_5": 0.5713, "ndcg_at_10": 0.60081, "ndcg_at_20": 0.61311, "ndcg_at_100": 0.63676, "ndcg_at_1000": 0.64891, "map_at_1": 0.48337, "map_at_3": 0.53769, "map_at_5": 0.54723, "map_at_10": 0.55947, "map_at_20": 0.56283, "map_at_100": 0.56619, "map_at_1000": 0.56674, "recall_at_1": 0.48337, "recall_at_3": 0.60089, "recall_at_5": 0.64302, "recall_at_10": 0.73392, "recall_at_20": 0.78271, "recall_at_100": 0.90909, "recall_at_1000": 1.0, "precision_at_1": 0.48337, "precision_at_3": 0.2003, "precision_at_5": 0.1286, "precision_at_10": 0.07339, "precision_at_20": 0.03914, "precision_at_100": 0.00909, "precision_at_1000": 0.001, "mrr_at_1": 0.4878048780487805, "mrr_at_3": 0.5395417590539542, "mrr_at_5": 0.5487435328898742, "mrr_at_10": 0.5610539189807481, "mrr_at_20": 0.5648169761114504, "mrr_at_100": 0.5681408169734422, "mrr_at_1000": 0.5686621426973844, "naucs_at_1_max": -0.5212133012346877, "naucs_at_1_std": -0.09288577096523737, "naucs_at_1_diff1": 0.7503022682010474, "naucs_at_3_max": -0.6565880303875717, "naucs_at_3_std": -0.07460405832901369, "naucs_at_3_diff1": 0.6561082577501688, "naucs_at_5_max": -0.5943855392066872, "naucs_at_5_std": -0.05220343159264611, "naucs_at_5_diff1": 0.6238399743739096, "naucs_at_10_max": -0.6195908132701516, "naucs_at_10_std": 0.13900687126802166, "naucs_at_10_diff1": 0.558265195499329, "naucs_at_20_max": -0.6615869284465803, "naucs_at_20_std": 0.1387067044872028, "naucs_at_20_diff1": 0.5554270724438797, "naucs_at_100_max": -0.7684732285094673, "naucs_at_100_std": 0.45832384986805724, "naucs_at_100_diff1": 0.4903408194281384, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/infovqa_test_subsampled": {"ndcg_at_1": 0.81377, "ndcg_at_3": 0.86726, "ndcg_at_5": 0.88059, "ndcg_at_10": 0.89071, "ndcg_at_20": 0.89184, "ndcg_at_100": 0.89493, "ndcg_at_1000": 0.89651, "map_at_1": 0.81377, "map_at_3": 0.85425, "map_at_5": 0.86164, "map_at_10": 0.866, "map_at_20": 0.86637, "map_at_100": 0.86683, "map_at_1000": 0.86689, "recall_at_1": 0.81377, "recall_at_3": 0.90486, "recall_at_5": 0.93725, "recall_at_10": 0.96761, "recall_at_20": 0.97166, "recall_at_100": 0.98785, "recall_at_1000": 1.0, "precision_at_1": 0.81377, "precision_at_3": 0.30162, "precision_at_5": 0.18745, "precision_at_10": 0.09676, "precision_at_20": 0.04858, "precision_at_100": 0.00988, "precision_at_1000": 0.001, "mrr_at_1": 0.8137651821862348, "mrr_at_3": 0.8542510121457486, "mrr_at_5": 0.8617408906882587, "mrr_at_10": 0.866114002956108, "mrr_at_20": 0.8665832705306387, "mrr_at_100": 0.8669491238036686, "mrr_at_1000": 0.8670130381350266, "naucs_at_1_max": 0.08332768086513993, "naucs_at_1_std": -0.16693401490742565, "naucs_at_1_diff1": 0.9069340613475471, "naucs_at_3_max": 0.0011369168562785203, "naucs_at_3_std": -0.14976111225508304, "naucs_at_3_diff1": 0.8389370588773137, "naucs_at_5_max": -0.0749515972070914, "naucs_at_5_std": -0.08214044161222527, "naucs_at_5_diff1": 0.8372514892403252, "naucs_at_10_max": -0.1502345681826907, "naucs_at_10_std": 0.5637465376812698, "naucs_at_10_diff1": 0.7969878779414252, "naucs_at_20_max": -0.20287146290299748, "naucs_at_20_std": 0.6550388680835327, "naucs_at_20_diff1": 0.7878243417729163, "naucs_at_100_max": -0.6623458938523301, "naucs_at_100_std": 0.7639533363794858, "naucs_at_100_diff1": 0.8604151369738356, "naucs_at_1000_max": 1.0, "naucs_at_1000_std": 1.0, "naucs_at_1000_diff1": 1.0}, "vidore/tabfquad_test_subsampled": {"ndcg_at_1": 0.875, "ndcg_at_3": 0.92487, "ndcg_at_5": 0.93087, "ndcg_at_10": 0.93426, "ndcg_at_20": 0.93601, "ndcg_at_100": 0.93739, "ndcg_at_1000": 0.93739, "map_at_1": 0.875, "map_at_3": 0.9125, "map_at_5": 0.91589, "map_at_10": 0.91725, "map_at_20": 0.91769, "map_at_100": 0.91791, "map_at_1000": 0.91791, "recall_at_1": 0.875, "recall_at_3": 0.96071, "recall_at_5": 0.975, "recall_at_10": 0.98571, "recall_at_20": 0.99286, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.875, "precision_at_3": 0.32024, "precision_at_5": 0.195, "precision_at_10": 0.09857, "precision_at_20": 0.04964, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.8678571428571429, "mrr_at_3": 0.9101190476190477, "mrr_at_5": 0.9135119047619048, "mrr_at_10": 0.9148653628117914, "mrr_at_20": 0.9153266723356009, "mrr_at_100": 0.9155495426855899, "mrr_at_1000": 0.9155495426855899, "naucs_at_1_max": 0.6341186138885083, "naucs_at_1_std": -0.33273524174770575, "naucs_at_1_diff1": 0.8733735104780164, "naucs_at_3_max": 0.7835922247686966, "naucs_at_3_std": -0.27565571683218926, "naucs_at_3_diff1": 0.8767082590612036, "naucs_at_5_max": 0.8832866479925309, "naucs_at_5_std": -0.041149793250630305, "naucs_at_5_diff1": 0.885620915032675, "naucs_at_10_max": 0.8978758169934754, "naucs_at_10_std": 0.022175536881420224, "naucs_at_10_diff1": 0.8692810457516408, "naucs_at_20_max": 0.8611111111111035, "naucs_at_20_std": 0.41433239962653884, "naucs_at_20_diff1": 0.8692810457516309, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/tatdqa_test": {"ndcg_at_1": 0.54192, "ndcg_at_3": 0.65843, "ndcg_at_5": 0.69378, "ndcg_at_10": 0.71916, "ndcg_at_20": 0.73058, "ndcg_at_100": 0.73824, "ndcg_at_1000": 0.73959, "map_at_1": 0.54192, "map_at_3": 0.63001, "map_at_5": 0.64967, "map_at_10": 0.66004, "map_at_20": 0.6632, "map_at_100": 0.66435, "map_at_1000": 0.66442, "recall_at_1": 0.54192, "recall_at_3": 0.74058, "recall_at_5": 0.82625, "recall_at_10": 0.90522, "recall_at_20": 0.95018, "recall_at_100": 0.99028, "recall_at_1000": 1.0, "precision_at_1": 0.54192, "precision_at_3": 0.24686, "precision_at_5": 0.16525, "precision_at_10": 0.09052, "precision_at_20": 0.04751, "precision_at_100": 0.0099, "precision_at_1000": 0.001, "mrr_at_1": 0.540097205346294, "mrr_at_3": 0.6291008505467812, "mrr_at_5": 0.6485722964763079, "mrr_at_10": 0.6587733128893529, "mrr_at_20": 0.6620815452578555, "mrr_at_100": 0.6632579676072735, "mrr_at_1000": 0.6633336632341336, "naucs_at_1_max": -0.058378738250013384, "naucs_at_1_std": -0.22221425489777502, "naucs_at_1_diff1": 0.7291748780182709, "naucs_at_3_max": -0.07682812054313384, "naucs_at_3_std": -0.24718525281474638, "naucs_at_3_diff1": 0.6115470895642744, "naucs_at_5_max": -0.07888770379939952, "naucs_at_5_std": -0.19457655229648088, "naucs_at_5_diff1": 0.5914062739597068, "naucs_at_10_max": 0.0029404715257986057, "naucs_at_10_std": -0.05548744311414097, "naucs_at_10_diff1": 0.5356727090689206, "naucs_at_20_max": -0.004305759775130589, "naucs_at_20_std": 0.2822864075893588, "naucs_at_20_diff1": 0.5157529604916903, "naucs_at_100_max": -0.04774510029921774, "naucs_at_100_std": 0.8056862206490103, "naucs_at_100_diff1": 0.5126658383723813, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/shiftproject_test": {"ndcg_at_1": 0.68, "ndcg_at_3": 0.79595, "ndcg_at_5": 0.8196, "ndcg_at_10": 0.83596, "ndcg_at_20": 0.83866, "ndcg_at_100": 0.84072, "ndcg_at_1000": 0.84072, "map_at_1": 0.68, "map_at_3": 0.77, "map_at_5": 0.7825, "map_at_10": 0.78935, "map_at_20": 0.79018, "map_at_100": 0.79054, "map_at_1000": 0.79054, "recall_at_1": 0.68, "recall_at_3": 0.87, "recall_at_5": 0.93, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.68, "precision_at_3": 0.29, "precision_at_5": 0.186, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.7, "mrr_at_3": 0.775, "mrr_at_5": 0.7915000000000001, "mrr_at_10": 0.7985238095238096, "mrr_at_20": 0.799357142857143, "mrr_at_100": 0.7997142857142859, "mrr_at_1000": 0.7997142857142859, "naucs_at_1_max": 0.2058945477161214, "naucs_at_1_std": -0.3255476470181746, "naucs_at_1_diff1": 0.6658489392578257, "naucs_at_3_max": 0.26312671258238784, "naucs_at_3_std": -0.1116418573650315, "naucs_at_3_diff1": 0.5204769310523577, "naucs_at_5_max": 0.26463918900894084, "naucs_at_5_std": 0.08563425370148234, "naucs_at_5_diff1": 0.3650126717353644, "naucs_at_10_max": 0.7957516339869297, "naucs_at_10_std": 0.12278244631185727, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.12278244631185926, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_artificial_intelligence_test": {"ndcg_at_1": 0.94, "ndcg_at_3": 0.97024, "ndcg_at_5": 0.97454, "ndcg_at_10": 0.97454, "ndcg_at_20": 0.97454, "ndcg_at_100": 0.97454, "ndcg_at_1000": 0.97454, "map_at_1": 0.94, "map_at_3": 0.96333, "map_at_5": 0.96583, "map_at_10": 0.96583, "map_at_20": 0.96583, "map_at_100": 0.96583, "map_at_1000": 0.96583, "recall_at_1": 0.94, "recall_at_3": 0.99, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.94, "precision_at_3": 0.33, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.94, "mrr_at_3": 0.9633333333333333, "mrr_at_5": 0.9658333333333333, "mrr_at_10": 0.9658333333333333, "mrr_at_20": 0.9658333333333333, "mrr_at_100": 0.9658333333333333, "mrr_at_1000": 0.9658333333333333, "naucs_at_1_max": 0.28003423591658866, "naucs_at_1_std": -0.3201058201058171, "naucs_at_1_diff1": 0.9564270152505444, "naucs_at_3_max": 1.0, "naucs_at_3_std": -0.5634920634921204, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_energy_test": {"ndcg_at_1": 0.87, "ndcg_at_3": 0.91655, "ndcg_at_5": 0.92859, "ndcg_at_10": 0.9316, "ndcg_at_20": 0.9316, "ndcg_at_100": 0.93378, "ndcg_at_1000": 0.93378, "map_at_1": 0.87, "map_at_3": 0.905, "map_at_5": 0.9115, "map_at_10": 0.91261, "map_at_20": 0.91261, "map_at_100": 0.91305, "map_at_1000": 0.91305, "recall_at_1": 0.87, "recall_at_3": 0.95, "recall_at_5": 0.98, "recall_at_10": 0.99, "recall_at_20": 0.99, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.87, "precision_at_3": 0.31667, "precision_at_5": 0.196, "precision_at_10": 0.099, "precision_at_20": 0.0495, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.87, "mrr_at_3": 0.91, "mrr_at_5": 0.9139999999999999, "mrr_at_10": 0.9152499999999999, "mrr_at_20": 0.9152499999999999, "mrr_at_100": 0.9157045454545454, "mrr_at_1000": 0.9157045454545454, "naucs_at_1_max": 0.2051766274161294, "naucs_at_1_std": -0.443160779086129, "naucs_at_1_diff1": 0.9218692142486855, "naucs_at_3_max": -0.14724556489262278, "naucs_at_3_std": -0.7007469654528471, "naucs_at_3_diff1": 0.9477124183006521, "naucs_at_5_max": -0.661531279178339, "naucs_at_5_std": -0.9556489262371534, "naucs_at_5_diff1": 0.9346405228758136, "naucs_at_10_max": -1.1517273576097316, "naucs_at_10_std": -1.7399626517273863, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": -1.1517273576097316, "naucs_at_20_std": -1.7399626517273863, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_government_reports_test": {"ndcg_at_1": 0.92, "ndcg_at_3": 0.95655, "ndcg_at_5": 0.96042, "ndcg_at_10": 0.96398, "ndcg_at_20": 0.96398, "ndcg_at_100": 0.96398, "ndcg_at_1000": 0.96398, "map_at_1": 0.92, "map_at_3": 0.94833, "map_at_5": 0.95033, "map_at_10": 0.952, "map_at_20": 0.952, "map_at_100": 0.952, "map_at_1000": 0.952, "recall_at_1": 0.92, "recall_at_3": 0.98, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.32667, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.94, "mrr_at_3": 0.9583333333333333, "mrr_at_5": 0.9603333333333333, "mrr_at_10": 0.9620000000000001, "mrr_at_20": 0.9620000000000001, "mrr_at_100": 0.9620000000000001, "mrr_at_1000": 0.9620000000000001, "naucs_at_1_max": 0.8082983193277313, "naucs_at_1_std": -0.12400793650793779, "naucs_at_1_diff1": 0.9325980392156855, "naucs_at_3_max": 0.7770774976657261, "naucs_at_3_std": -0.9556489262371661, "naucs_at_3_diff1": 0.8611111111111119, "naucs_at_5_max": 0.5541549953314738, "naucs_at_5_std": -1.7399626517273863, "naucs_at_5_diff1": 0.7222222222222276, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}, "vidore/syntheticDocQA_healthcare_industry_test": {"ndcg_at_1": 0.91, "ndcg_at_3": 0.96417, "ndcg_at_5": 0.96417, "ndcg_at_10": 0.96417, "ndcg_at_20": 0.96417, "ndcg_at_100": 0.96417, "ndcg_at_1000": 0.96417, "map_at_1": 0.91, "map_at_3": 0.95167, "map_at_5": 0.95167, "map_at_10": 0.95167, "map_at_20": 0.95167, "map_at_100": 0.95167, "map_at_1000": 0.95167, "recall_at_1": 0.91, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_100": 1.0, "recall_at_1000": 1.0, "precision_at_1": 0.91, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_100": 0.01, "precision_at_1000": 0.001, "mrr_at_1": 0.92, "mrr_at_3": 0.9583333333333335, "mrr_at_5": 0.9583333333333335, "mrr_at_10": 0.9583333333333335, "mrr_at_20": 0.9583333333333335, "mrr_at_100": 0.9583333333333335, "mrr_at_1000": 0.9583333333333335, "naucs_at_1_max": 0.6672891378773725, "naucs_at_1_std": -0.03304284676833719, "naucs_at_1_diff1": 0.9400871459694983, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_100_max": NaN, "naucs_at_100_std": NaN, "naucs_at_100_diff1": NaN, "naucs_at_1000_max": NaN, "naucs_at_1000_std": NaN, "naucs_at_1000_diff1": NaN}}