sanjay920 commited on
Commit
f641683
·
verified ·
1 Parent(s): e7501d0

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -10,7 +10,7 @@ model-index:
10
  name: MMLU
11
  metrics:
12
  - type: 5-shot
13
- value: 58.9
14
  verified: false
15
  - task:
16
  type: text-generation
@@ -19,7 +19,7 @@ model-index:
19
  name: GPQA
20
  metrics:
21
  - type: 0-shot
22
- value: 29.91
23
  verified: false
24
  - task:
25
  type: text-generation
@@ -28,7 +28,7 @@ model-index:
28
  name: GSM-8K
29
  metrics:
30
  - type: 8-shot, CoT
31
- value: 34.12
32
  verified: false
33
  - task:
34
  type: text-generation
@@ -37,7 +37,7 @@ model-index:
37
  name: MATH
38
  metrics:
39
  - type: 4-shot, CoT
40
- value: 8.36
41
  verified: false
42
  - task:
43
  type: text-generation
@@ -46,7 +46,7 @@ model-index:
46
  name: MT-bench
47
  metrics:
48
  - type: GPT-4 as Judge
49
- value: 7.36
50
  verified: false
51
  tags:
52
  - function-calling
 
10
  name: MMLU
11
  metrics:
12
  - type: 5-shot
13
+ value: 64.39
14
  verified: false
15
  - task:
16
  type: text-generation
 
19
  name: GPQA
20
  metrics:
21
  - type: 0-shot
22
+ value: 31.70
23
  verified: false
24
  - task:
25
  type: text-generation
 
28
  name: GSM-8K
29
  metrics:
30
  - type: 8-shot, CoT
31
+ value: 68.99
32
  verified: false
33
  - task:
34
  type: text-generation
 
37
  name: MATH
38
  metrics:
39
  - type: 4-shot, CoT
40
+ value: 23.76
41
  verified: false
42
  - task:
43
  type: text-generation
 
46
  name: MT-bench
47
  metrics:
48
  - type: GPT-4 as Judge
49
+ value: 8.03
50
  verified: false
51
  tags:
52
  - function-calling