crumb commited on
Commit
e327d6a
·
verified ·
1 Parent(s): 52b299d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +66 -3
README.md CHANGED
@@ -11,9 +11,72 @@ tags: []
11
  | | |none | 25|acc_norm|0.2389|± |0.0125|
12
  |truthfulqa_mc2| 2|none | 0|acc |0.4297|± |0.0152|
13
  |winogrande| 1|none | 5|acc |0.5217|± | 0.014|
14
-
15
-
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  ## Model Details
18
 
19
  ### Model Description
 
11
  | | |none | 25|acc_norm|0.2389|± |0.0125|
12
  |truthfulqa_mc2| 2|none | 0|acc |0.4297|± |0.0152|
13
  |winogrande| 1|none | 5|acc |0.5217|± | 0.014|
14
+ |hellaswag| 1|none | 10|acc |0.2923|± |0.0045|
15
+ | | |none | 10|acc_norm|0.3198|± |0.0047|
16
+ |gsm8k| 3|strict-match | 5|exact_match|0.0068|± |0.0023|
17
+ | | |flexible-extract| 5|exact_match|0.0167|± |0.0035|
18
+
19
+ ### MMLU *(0.26727368421052633, 0.004481878288705264)*
20
+
21
+ | Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
22
+ |-----------------------------------|------:|------|-----:|------|-----:|---|-----:|
23
+ |world_religions | 0|none | 5|acc |0.2515|± |0.0333|
24
+ |virology | 0|none | 5|acc |0.2470|± |0.0336|
25
+ |us_foreign_policy | 0|none | 5|acc |0.2600|± |0.0441|
26
+ |sociology | 0|none | 5|acc |0.2090|± |0.0287|
27
+ |security_studies | 0|none | 5|acc |0.4041|± |0.0314|
28
+ |public_relations | 0|none | 5|acc |0.2182|± |0.0396|
29
+ |professional_psychology | 0|none | 5|acc |0.2386|± |0.0172|
30
+ |professional_medicine | 0|none | 5|acc |0.4338|± |0.0301|
31
+ |professional_law | 0|none | 5|acc |0.2464|± |0.0110|
32
+ |professional_accounting | 0|none | 5|acc |0.2482|± |0.0258|
33
+ |prehistory | 0|none | 5|acc |0.2284|± |0.0234|
34
+ |philosophy | 0|none | 5|acc |0.2733|± |0.0253|
35
+ |nutrition | 0|none | 5|acc |0.2810|± |0.0257|
36
+ |moral_scenarios | 0|none | 5|acc |0.2268|± |0.0140|
37
+ |moral_disputes | 0|none | 5|acc |0.2572|± |0.0235|
38
+ |miscellaneous | 0|none | 5|acc |0.2146|± |0.0147|
39
+ |medical_genetics | 0|none | 5|acc |0.3300|± |0.0473|
40
+ |marketing | 0|none | 5|acc |0.1880|± |0.0256|
41
+ |management | 0|none | 5|acc |0.3107|± |0.0458|
42
+ |machine_learning | 0|none | 5|acc |0.1339|± |0.0323|
43
+ |logical_fallacies | 0|none | 5|acc |0.2638|± |0.0346|
44
+ |jurisprudence | 0|none | 5|acc |0.2315|± |0.0408|
45
+ |international_law | 0|none | 5|acc |0.3636|± |0.0439|
46
+ |human_sexuality | 0|none | 5|acc |0.2290|± |0.0369|
47
+ |human_aging | 0|none | 5|acc |0.2242|± |0.0280|
48
+ |high_school_world_history | 0|none | 5|acc |0.2700|± |0.0289|
49
+ |high_school_us_history | 0|none | 5|acc |0.3039|± |0.0323|
50
+ |high_school_statistics | 0|none | 5|acc |0.4259|± |0.0337|
51
+ |high_school_psychology | 0|none | 5|acc |0.3138|± |0.0199|
52
+ |high_school_physics | 0|none | 5|acc |0.2384|± |0.0348|
53
+ |high_school_microeconomics | 0|none | 5|acc |0.2395|± |0.0277|
54
+ |high_school_mathematics | 0|none | 5|acc |0.2963|± |0.0278|
55
+ |high_school_macroeconomics | 0|none | 5|acc |0.3410|± |0.0240|
56
+ |high_school_government_and_politics| 0|none | 5|acc |0.3627|± |0.0347|
57
+ |high_school_geography | 0|none | 5|acc |0.3131|± |0.0330|
58
+ |high_school_european_history | 0|none | 5|acc |0.2848|± |0.0352|
59
+ |high_school_computer_science | 0|none | 5|acc |0.2400|± |0.0429|
60
+ |high_school_chemistry | 0|none | 5|acc |0.2611|± |0.0309|
61
+ |high_school_biology | 0|none | 5|acc |0.3097|± |0.0263|
62
+ |global_facts | 0|none | 5|acc |0.2800|± |0.0451|
63
+ |formal_logic | 0|none | 5|acc |0.1825|± |0.0346|
64
+ |elementary_mathematics | 0|none | 5|acc |0.2646|± |0.0227|
65
+ |electrical_engineering | 0|none | 5|acc |0.2690|± |0.0370|
66
+ |econometrics | 0|none | 5|acc |0.2368|± |0.0400|
67
+ |conceptual_physics | 0|none | 5|acc |0.2979|± |0.0299|
68
+ |computer_security | 0|none | 5|acc |0.1900|± |0.0394|
69
+ |college_physics | 0|none | 5|acc |0.2549|± |0.0434|
70
+ |college_medicine | 0|none | 5|acc |0.2197|± |0.0316|
71
+ |college_mathematics | 0|none | 5|acc |0.2700|± |0.0446|
72
+ |college_computer_science | 0|none | 5|acc |0.2200|± |0.0416|
73
+ |college_chemistry | 0|none | 5|acc |0.3000|± |0.0461|
74
+ |college_biology | 0|none | 5|acc |0.2778|± |0.0375|
75
+ |clinical_knowledge | 0|none | 5|acc |0.3094|± |0.0285|
76
+ |business_ethics | 0|none | 5|acc |0.1800|± |0.0386|
77
+ |astronomy | 0|none | 5|acc |0.2697|± |0.0361|
78
+ |anatomy | 0|none | 5|acc |0.2593|± |0.0379|
79
+ |abstract_algebra | 0|none | 5|acc |0.2400|± |0.0429|
80
  ## Model Details
81
 
82
  ### Model Description