MariaOls commited on
Commit
ebd8bbb
·
verified ·
1 Parent(s): 5effa06

Upload folder using huggingface_hub

Browse files
checkpoint-1131/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9b4e02309731a17a0e550199dfc0b653b8b2efb72a87cbe399f458df4d060a
3
  size 711449600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbfe56b9869f41724aeb21aff529b6fc717527dbece02e1d54b76e182981fe9d
3
  size 711449600
checkpoint-1131/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2cb20821891b7df7b9cffbd730ee591d833df2c136e22191a72e76023bd1592
3
  size 1423014650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e09a0e36bd4fa895040b7ce89d9e58bec334afecef8e2cdd80c5b98483fbde5
3
  size 1423014650
checkpoint-1131/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9873248832555037,
3
  "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
@@ -10,192 +10,192 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
- "grad_norm": 8.819928169250488,
14
  "learning_rate": 6.622516556291392e-06,
15
- "loss": 0.6464,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
- "grad_norm": 6.598285675048828,
21
  "learning_rate": 1.3245033112582784e-05,
22
- "loss": 0.388,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
- "grad_norm": 0.30871227383613586,
28
  "learning_rate": 1.9867549668874173e-05,
29
- "loss": 0.1931,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
- "grad_norm": 6.666228294372559,
35
  "learning_rate": 1.9277818717759768e-05,
36
- "loss": 0.1591,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
- "grad_norm": 0.44178861379623413,
42
  "learning_rate": 1.8540899042004423e-05,
43
- "loss": 0.1984,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
- "grad_norm": 0.37462666630744934,
49
  "learning_rate": 1.780397936624908e-05,
50
- "loss": 0.1124,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
- "grad_norm": 0.0416572205722332,
56
  "learning_rate": 1.7067059690493736e-05,
57
- "loss": 0.0809,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_accuracy": 0.9812206572769953,
63
- "eval_f1": 0.9865410497981157,
64
- "eval_loss": 0.0956883653998375,
65
- "eval_precision": 0.9932249322493225,
66
- "eval_recall": 0.9799465240641712,
67
- "eval_runtime": 60.5192,
68
- "eval_samples_per_second": 17.598,
69
- "eval_steps_per_second": 1.107,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
- "grad_norm": 8.403841018676758,
75
  "learning_rate": 1.6330140014738394e-05,
76
- "loss": 0.0611,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
- "grad_norm": 0.022825542837381363,
82
  "learning_rate": 1.5593220338983053e-05,
83
- "loss": 0.0758,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
- "grad_norm": 97.80863952636719,
89
  "learning_rate": 1.485630066322771e-05,
90
- "loss": 0.0747,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
- "grad_norm": 0.03205716982483864,
96
  "learning_rate": 1.4119380987472366e-05,
97
- "loss": 0.0719,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
- "grad_norm": 13.893011093139648,
103
  "learning_rate": 1.3382461311717023e-05,
104
- "loss": 0.1053,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
- "grad_norm": 0.03504275158047676,
110
  "learning_rate": 1.2645541635961683e-05,
111
- "loss": 0.0494,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
- "grad_norm": 0.11265891045331955,
117
  "learning_rate": 1.190862196020634e-05,
118
- "loss": 0.0142,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
- "grad_norm": 0.06097806990146637,
124
  "learning_rate": 1.1171702284450996e-05,
125
- "loss": 0.048,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
- "eval_accuracy": 0.9784037558685446,
131
- "eval_f1": 0.984778292521509,
132
- "eval_loss": 0.17541147768497467,
133
- "eval_precision": 0.9750982961992136,
134
  "eval_recall": 0.9946524064171123,
135
- "eval_runtime": 65.725,
136
- "eval_samples_per_second": 16.204,
137
- "eval_steps_per_second": 1.019,
138
  "step": 754
139
  },
140
  {
141
  "epoch": 2.1220159151193636,
142
- "grad_norm": 0.010624129325151443,
143
  "learning_rate": 1.0434782608695653e-05,
144
- "loss": 0.0328,
145
  "step": 800
146
  },
147
  {
148
  "epoch": 2.2546419098143238,
149
- "grad_norm": 0.009882211685180664,
150
  "learning_rate": 9.697862932940311e-06,
151
- "loss": 0.0254,
152
  "step": 850
153
  },
154
  {
155
  "epoch": 2.387267904509284,
156
- "grad_norm": 0.006466939579695463,
157
  "learning_rate": 8.960943257184968e-06,
158
- "loss": 0.0412,
159
  "step": 900
160
  },
161
  {
162
  "epoch": 2.519893899204244,
163
- "grad_norm": 0.025009147822856903,
164
  "learning_rate": 8.224023581429625e-06,
165
- "loss": 0.0377,
166
  "step": 950
167
  },
168
  {
169
  "epoch": 2.6525198938992043,
170
- "grad_norm": 16.0838565826416,
171
  "learning_rate": 7.487103905674282e-06,
172
- "loss": 0.0263,
173
  "step": 1000
174
  },
175
  {
176
  "epoch": 2.7851458885941645,
177
- "grad_norm": 0.006907904986292124,
178
  "learning_rate": 6.750184229918939e-06,
179
- "loss": 0.0039,
180
  "step": 1050
181
  },
182
  {
183
  "epoch": 2.9177718832891246,
184
- "grad_norm": 0.03146808221936226,
185
  "learning_rate": 6.013264554163597e-06,
186
- "loss": 0.0266,
187
  "step": 1100
188
  },
189
  {
190
  "epoch": 3.0,
191
- "eval_accuracy": 0.9821596244131455,
192
- "eval_f1": 0.9873248832555037,
193
- "eval_loss": 0.12112097442150116,
194
- "eval_precision": 0.9853528628495339,
195
  "eval_recall": 0.9893048128342246,
196
- "eval_runtime": 65.4812,
197
- "eval_samples_per_second": 16.264,
198
- "eval_steps_per_second": 1.023,
199
  "step": 1131
200
  }
201
  ],
@@ -216,7 +216,7 @@
216
  "attributes": {}
217
  }
218
  },
219
- "total_flos": 639096753469440.0,
220
  "train_batch_size": 16,
221
  "trial_name": null,
222
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9899665551839465,
3
  "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
4
  "epoch": 3.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
+ "grad_norm": 6.313917636871338,
14
  "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6543,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
+ "grad_norm": 8.760651588439941,
21
  "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.3545,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
+ "grad_norm": 12.38838005065918,
28
  "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1951,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
+ "grad_norm": 13.237753868103027,
35
  "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1559,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
+ "grad_norm": 11.964133262634277,
42
  "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1602,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
+ "grad_norm": 27.106698989868164,
49
  "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1055,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.026046760380268097,
56
  "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.1148,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_accuracy": 0.9774647887323944,
63
+ "eval_f1": 0.9838492597577388,
64
+ "eval_loss": 0.08887767791748047,
65
+ "eval_precision": 0.9905149051490515,
66
+ "eval_recall": 0.9772727272727273,
67
+ "eval_runtime": 62.9416,
68
+ "eval_samples_per_second": 16.92,
69
+ "eval_steps_per_second": 1.064,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
+ "grad_norm": 0.028772667050361633,
75
  "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0809,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
+ "grad_norm": 0.056088343262672424,
82
  "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0649,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
+ "grad_norm": 6.098559379577637,
89
  "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0768,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
+ "grad_norm": 99.54315948486328,
96
  "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0453,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
+ "grad_norm": 6.803869247436523,
103
  "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1294,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.2954126298427582,
110
  "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0839,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.047186098992824554,
117
  "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0557,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.12770341336727142,
124
  "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.0466,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
+ "eval_accuracy": 0.9802816901408451,
131
+ "eval_f1": 0.986083499005964,
132
+ "eval_loss": 0.1403597742319107,
133
+ "eval_precision": 0.9776609724047306,
134
  "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 64.2771,
136
+ "eval_samples_per_second": 16.569,
137
+ "eval_steps_per_second": 1.042,
138
  "step": 754
139
  },
140
  {
141
  "epoch": 2.1220159151193636,
142
+ "grad_norm": 0.012713871896266937,
143
  "learning_rate": 1.0434782608695653e-05,
144
+ "loss": 0.0479,
145
  "step": 800
146
  },
147
  {
148
  "epoch": 2.2546419098143238,
149
+ "grad_norm": 0.013412756845355034,
150
  "learning_rate": 9.697862932940311e-06,
151
+ "loss": 0.0227,
152
  "step": 850
153
  },
154
  {
155
  "epoch": 2.387267904509284,
156
+ "grad_norm": 0.0069837020710110664,
157
  "learning_rate": 8.960943257184968e-06,
158
+ "loss": 0.024,
159
  "step": 900
160
  },
161
  {
162
  "epoch": 2.519893899204244,
163
+ "grad_norm": 0.006205807905644178,
164
  "learning_rate": 8.224023581429625e-06,
165
+ "loss": 0.0216,
166
  "step": 950
167
  },
168
  {
169
  "epoch": 2.6525198938992043,
170
+ "grad_norm": 0.013195905834436417,
171
  "learning_rate": 7.487103905674282e-06,
172
+ "loss": 0.0302,
173
  "step": 1000
174
  },
175
  {
176
  "epoch": 2.7851458885941645,
177
+ "grad_norm": 0.010757376439869404,
178
  "learning_rate": 6.750184229918939e-06,
179
+ "loss": 0.0021,
180
  "step": 1050
181
  },
182
  {
183
  "epoch": 2.9177718832891246,
184
+ "grad_norm": 25.593114852905273,
185
  "learning_rate": 6.013264554163597e-06,
186
+ "loss": 0.0222,
187
  "step": 1100
188
  },
189
  {
190
  "epoch": 3.0,
191
+ "eval_accuracy": 0.9859154929577465,
192
+ "eval_f1": 0.9899665551839465,
193
+ "eval_loss": 0.0968979001045227,
194
+ "eval_precision": 0.9906291834002677,
195
  "eval_recall": 0.9893048128342246,
196
+ "eval_runtime": 62.8419,
197
+ "eval_samples_per_second": 16.947,
198
+ "eval_steps_per_second": 1.066,
199
  "step": 1131
200
  }
201
  ],
 
216
  "attributes": {}
217
  }
218
  },
219
+ "total_flos": 628769644546560.0,
220
  "train_batch_size": 16,
221
  "trial_name": null,
222
  "trial_params": null
checkpoint-1131/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
3
  size 5304
checkpoint-1508/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b30d84210b336753e94b844397015ae6635e4a978e6b132eaca6da156c50aead
3
  size 711449600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e83f2f723e82f1966a8c36143d07eab2a1e2ee605a5f9037b01aee55dcf80a87
3
  size 711449600
checkpoint-1508/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9b811be240a41d4a804950f308a647956d67f40ae2709923fe949706ade9b7b
3
  size 1423014650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae5f2268ddbb577fba0afe39ecc58dafe67fe7c96fedb3ee8652afc0b77f68c
3
  size 1423014650
checkpoint-1508/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.9899665551839465,
3
- "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1508",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1508,
@@ -10,260 +10,260 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
- "grad_norm": 8.819928169250488,
14
  "learning_rate": 6.622516556291392e-06,
15
- "loss": 0.6464,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
- "grad_norm": 6.598285675048828,
21
  "learning_rate": 1.3245033112582784e-05,
22
- "loss": 0.388,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
- "grad_norm": 0.30871227383613586,
28
  "learning_rate": 1.9867549668874173e-05,
29
- "loss": 0.1931,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
- "grad_norm": 6.666228294372559,
35
  "learning_rate": 1.9277818717759768e-05,
36
- "loss": 0.1591,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
- "grad_norm": 0.44178861379623413,
42
  "learning_rate": 1.8540899042004423e-05,
43
- "loss": 0.1984,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
- "grad_norm": 0.37462666630744934,
49
  "learning_rate": 1.780397936624908e-05,
50
- "loss": 0.1124,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
- "grad_norm": 0.0416572205722332,
56
  "learning_rate": 1.7067059690493736e-05,
57
- "loss": 0.0809,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_accuracy": 0.9812206572769953,
63
- "eval_f1": 0.9865410497981157,
64
- "eval_loss": 0.0956883653998375,
65
- "eval_precision": 0.9932249322493225,
66
- "eval_recall": 0.9799465240641712,
67
- "eval_runtime": 60.5192,
68
- "eval_samples_per_second": 17.598,
69
- "eval_steps_per_second": 1.107,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
- "grad_norm": 8.403841018676758,
75
  "learning_rate": 1.6330140014738394e-05,
76
- "loss": 0.0611,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
- "grad_norm": 0.022825542837381363,
82
  "learning_rate": 1.5593220338983053e-05,
83
- "loss": 0.0758,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
- "grad_norm": 97.80863952636719,
89
  "learning_rate": 1.485630066322771e-05,
90
- "loss": 0.0747,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
- "grad_norm": 0.03205716982483864,
96
  "learning_rate": 1.4119380987472366e-05,
97
- "loss": 0.0719,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
- "grad_norm": 13.893011093139648,
103
  "learning_rate": 1.3382461311717023e-05,
104
- "loss": 0.1053,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
- "grad_norm": 0.03504275158047676,
110
  "learning_rate": 1.2645541635961683e-05,
111
- "loss": 0.0494,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
- "grad_norm": 0.11265891045331955,
117
  "learning_rate": 1.190862196020634e-05,
118
- "loss": 0.0142,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
- "grad_norm": 0.06097806990146637,
124
  "learning_rate": 1.1171702284450996e-05,
125
- "loss": 0.048,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
- "eval_accuracy": 0.9784037558685446,
131
- "eval_f1": 0.984778292521509,
132
- "eval_loss": 0.17541147768497467,
133
- "eval_precision": 0.9750982961992136,
134
  "eval_recall": 0.9946524064171123,
135
- "eval_runtime": 65.725,
136
- "eval_samples_per_second": 16.204,
137
- "eval_steps_per_second": 1.019,
138
  "step": 754
139
  },
140
  {
141
  "epoch": 2.1220159151193636,
142
- "grad_norm": 0.010624129325151443,
143
  "learning_rate": 1.0434782608695653e-05,
144
- "loss": 0.0328,
145
  "step": 800
146
  },
147
  {
148
  "epoch": 2.2546419098143238,
149
- "grad_norm": 0.009882211685180664,
150
  "learning_rate": 9.697862932940311e-06,
151
- "loss": 0.0254,
152
  "step": 850
153
  },
154
  {
155
  "epoch": 2.387267904509284,
156
- "grad_norm": 0.006466939579695463,
157
  "learning_rate": 8.960943257184968e-06,
158
- "loss": 0.0412,
159
  "step": 900
160
  },
161
  {
162
  "epoch": 2.519893899204244,
163
- "grad_norm": 0.025009147822856903,
164
  "learning_rate": 8.224023581429625e-06,
165
- "loss": 0.0377,
166
  "step": 950
167
  },
168
  {
169
  "epoch": 2.6525198938992043,
170
- "grad_norm": 16.0838565826416,
171
  "learning_rate": 7.487103905674282e-06,
172
- "loss": 0.0263,
173
  "step": 1000
174
  },
175
  {
176
  "epoch": 2.7851458885941645,
177
- "grad_norm": 0.006907904986292124,
178
  "learning_rate": 6.750184229918939e-06,
179
- "loss": 0.0039,
180
  "step": 1050
181
  },
182
  {
183
  "epoch": 2.9177718832891246,
184
- "grad_norm": 0.03146808221936226,
185
  "learning_rate": 6.013264554163597e-06,
186
- "loss": 0.0266,
187
  "step": 1100
188
  },
189
  {
190
  "epoch": 3.0,
191
- "eval_accuracy": 0.9821596244131455,
192
- "eval_f1": 0.9873248832555037,
193
- "eval_loss": 0.12112097442150116,
194
- "eval_precision": 0.9853528628495339,
195
  "eval_recall": 0.9893048128342246,
196
- "eval_runtime": 65.4812,
197
- "eval_samples_per_second": 16.264,
198
- "eval_steps_per_second": 1.023,
199
  "step": 1131
200
  },
201
  {
202
  "epoch": 3.050397877984085,
203
- "grad_norm": 0.00711169233545661,
204
  "learning_rate": 5.276344878408254e-06,
205
- "loss": 0.0191,
206
  "step": 1150
207
  },
208
  {
209
  "epoch": 3.183023872679045,
210
- "grad_norm": 0.10712441056966782,
211
  "learning_rate": 4.5394252026529115e-06,
212
- "loss": 0.0079,
213
  "step": 1200
214
  },
215
  {
216
  "epoch": 3.315649867374005,
217
- "grad_norm": 0.014097067527472973,
218
  "learning_rate": 3.8025055268975686e-06,
219
- "loss": 0.0218,
220
  "step": 1250
221
  },
222
  {
223
  "epoch": 3.4482758620689653,
224
- "grad_norm": 0.08094095438718796,
225
  "learning_rate": 3.065585851142226e-06,
226
- "loss": 0.0053,
227
  "step": 1300
228
  },
229
  {
230
  "epoch": 3.5809018567639255,
231
- "grad_norm": 0.012457519769668579,
232
  "learning_rate": 2.328666175386883e-06,
233
- "loss": 0.0003,
234
  "step": 1350
235
  },
236
  {
237
  "epoch": 3.713527851458886,
238
- "grad_norm": 0.05693735554814339,
239
  "learning_rate": 1.59174649963154e-06,
240
- "loss": 0.0003,
241
  "step": 1400
242
  },
243
  {
244
  "epoch": 3.8461538461538463,
245
- "grad_norm": 0.004445453640073538,
246
  "learning_rate": 8.548268238761975e-07,
247
- "loss": 0.0246,
248
  "step": 1450
249
  },
250
  {
251
  "epoch": 3.9787798408488064,
252
- "grad_norm": 0.004754351451992989,
253
  "learning_rate": 1.1790714812085484e-07,
254
- "loss": 0.0111,
255
  "step": 1500
256
  },
257
  {
258
  "epoch": 4.0,
259
- "eval_accuracy": 0.9859154929577465,
260
- "eval_f1": 0.9899665551839465,
261
- "eval_loss": 0.10738077014684677,
262
- "eval_precision": 0.9906291834002677,
263
- "eval_recall": 0.9893048128342246,
264
- "eval_runtime": 65.4731,
265
- "eval_samples_per_second": 16.266,
266
- "eval_steps_per_second": 1.023,
267
  "step": 1508
268
  }
269
  ],
@@ -284,7 +284,7 @@
284
  "attributes": {}
285
  }
286
  },
287
- "total_flos": 850572264215040.0,
288
  "train_batch_size": 16,
289
  "trial_name": null,
290
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9899665551839465,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
4
  "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 1508,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
+ "grad_norm": 6.313917636871338,
14
  "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6543,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
+ "grad_norm": 8.760651588439941,
21
  "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.3545,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
+ "grad_norm": 12.38838005065918,
28
  "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1951,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
+ "grad_norm": 13.237753868103027,
35
  "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1559,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
+ "grad_norm": 11.964133262634277,
42
  "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1602,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
+ "grad_norm": 27.106698989868164,
49
  "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1055,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.026046760380268097,
56
  "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.1148,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_accuracy": 0.9774647887323944,
63
+ "eval_f1": 0.9838492597577388,
64
+ "eval_loss": 0.08887767791748047,
65
+ "eval_precision": 0.9905149051490515,
66
+ "eval_recall": 0.9772727272727273,
67
+ "eval_runtime": 62.9416,
68
+ "eval_samples_per_second": 16.92,
69
+ "eval_steps_per_second": 1.064,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
+ "grad_norm": 0.028772667050361633,
75
  "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0809,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
+ "grad_norm": 0.056088343262672424,
82
  "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0649,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
+ "grad_norm": 6.098559379577637,
89
  "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0768,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
+ "grad_norm": 99.54315948486328,
96
  "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0453,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
+ "grad_norm": 6.803869247436523,
103
  "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1294,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.2954126298427582,
110
  "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0839,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.047186098992824554,
117
  "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0557,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.12770341336727142,
124
  "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.0466,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
+ "eval_accuracy": 0.9802816901408451,
131
+ "eval_f1": 0.986083499005964,
132
+ "eval_loss": 0.1403597742319107,
133
+ "eval_precision": 0.9776609724047306,
134
  "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 64.2771,
136
+ "eval_samples_per_second": 16.569,
137
+ "eval_steps_per_second": 1.042,
138
  "step": 754
139
  },
140
  {
141
  "epoch": 2.1220159151193636,
142
+ "grad_norm": 0.012713871896266937,
143
  "learning_rate": 1.0434782608695653e-05,
144
+ "loss": 0.0479,
145
  "step": 800
146
  },
147
  {
148
  "epoch": 2.2546419098143238,
149
+ "grad_norm": 0.013412756845355034,
150
  "learning_rate": 9.697862932940311e-06,
151
+ "loss": 0.0227,
152
  "step": 850
153
  },
154
  {
155
  "epoch": 2.387267904509284,
156
+ "grad_norm": 0.0069837020710110664,
157
  "learning_rate": 8.960943257184968e-06,
158
+ "loss": 0.024,
159
  "step": 900
160
  },
161
  {
162
  "epoch": 2.519893899204244,
163
+ "grad_norm": 0.006205807905644178,
164
  "learning_rate": 8.224023581429625e-06,
165
+ "loss": 0.0216,
166
  "step": 950
167
  },
168
  {
169
  "epoch": 2.6525198938992043,
170
+ "grad_norm": 0.013195905834436417,
171
  "learning_rate": 7.487103905674282e-06,
172
+ "loss": 0.0302,
173
  "step": 1000
174
  },
175
  {
176
  "epoch": 2.7851458885941645,
177
+ "grad_norm": 0.010757376439869404,
178
  "learning_rate": 6.750184229918939e-06,
179
+ "loss": 0.0021,
180
  "step": 1050
181
  },
182
  {
183
  "epoch": 2.9177718832891246,
184
+ "grad_norm": 25.593114852905273,
185
  "learning_rate": 6.013264554163597e-06,
186
+ "loss": 0.0222,
187
  "step": 1100
188
  },
189
  {
190
  "epoch": 3.0,
191
+ "eval_accuracy": 0.9859154929577465,
192
+ "eval_f1": 0.9899665551839465,
193
+ "eval_loss": 0.0968979001045227,
194
+ "eval_precision": 0.9906291834002677,
195
  "eval_recall": 0.9893048128342246,
196
+ "eval_runtime": 62.8419,
197
+ "eval_samples_per_second": 16.947,
198
+ "eval_steps_per_second": 1.066,
199
  "step": 1131
200
  },
201
  {
202
  "epoch": 3.050397877984085,
203
+ "grad_norm": 0.005515966564416885,
204
  "learning_rate": 5.276344878408254e-06,
205
+ "loss": 0.0211,
206
  "step": 1150
207
  },
208
  {
209
  "epoch": 3.183023872679045,
210
+ "grad_norm": 0.007331592496484518,
211
  "learning_rate": 4.5394252026529115e-06,
212
+ "loss": 0.0045,
213
  "step": 1200
214
  },
215
  {
216
  "epoch": 3.315649867374005,
217
+ "grad_norm": 0.0053366441279649734,
218
  "learning_rate": 3.8025055268975686e-06,
219
+ "loss": 0.0003,
220
  "step": 1250
221
  },
222
  {
223
  "epoch": 3.4482758620689653,
224
+ "grad_norm": 0.00485859764739871,
225
  "learning_rate": 3.065585851142226e-06,
226
+ "loss": 0.0056,
227
  "step": 1300
228
  },
229
  {
230
  "epoch": 3.5809018567639255,
231
+ "grad_norm": 0.005309904459863901,
232
  "learning_rate": 2.328666175386883e-06,
233
+ "loss": 0.0072,
234
  "step": 1350
235
  },
236
  {
237
  "epoch": 3.713527851458886,
238
+ "grad_norm": 0.003933363128453493,
239
  "learning_rate": 1.59174649963154e-06,
240
+ "loss": 0.005,
241
  "step": 1400
242
  },
243
  {
244
  "epoch": 3.8461538461538463,
245
+ "grad_norm": 0.0033942251466214657,
246
  "learning_rate": 8.548268238761975e-07,
247
+ "loss": 0.0002,
248
  "step": 1450
249
  },
250
  {
251
  "epoch": 3.9787798408488064,
252
+ "grad_norm": 0.0044485898688435555,
253
  "learning_rate": 1.1790714812085484e-07,
254
+ "loss": 0.0037,
255
  "step": 1500
256
  },
257
  {
258
  "epoch": 4.0,
259
+ "eval_accuracy": 0.984037558685446,
260
+ "eval_f1": 0.9886135298057601,
261
+ "eval_loss": 0.116817407310009,
262
+ "eval_precision": 0.9906040268456375,
263
+ "eval_recall": 0.9866310160427807,
264
+ "eval_runtime": 62.7013,
265
+ "eval_samples_per_second": 16.985,
266
+ "eval_steps_per_second": 1.069,
267
  "step": 1508
268
  }
269
  ],
 
284
  "attributes": {}
285
  }
286
  },
287
+ "total_flos": 839587377653760.0,
288
  "train_batch_size": 16,
289
  "trial_name": null,
290
  "trial_params": null
checkpoint-1508/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
3
  size 5304
checkpoint-377/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ded7c527bf4f9cf448e7a1f8c244f442ee35e8ddf0b77ce3ce54bb9f8e4ce263
3
  size 711449600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c901a19e6b36a140f7d29fba603543b97cd75e33362611614d8986e508beef
3
  size 711449600
checkpoint-377/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:155a5a5f11c545764eead711ae7536af829e153aa81aca1630679af82398d252
3
  size 1423014650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80af33ce0ab52011f40575eac75a4c30cc4c58aacd2f7b923c777e87018db57c
3
  size 1423014650
checkpoint-377/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9865410497981157,
3
  "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,63 +10,63 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
- "grad_norm": 8.819928169250488,
14
  "learning_rate": 6.622516556291392e-06,
15
- "loss": 0.6464,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
- "grad_norm": 6.598285675048828,
21
  "learning_rate": 1.3245033112582784e-05,
22
- "loss": 0.388,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
- "grad_norm": 0.30871227383613586,
28
  "learning_rate": 1.9867549668874173e-05,
29
- "loss": 0.1931,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
- "grad_norm": 6.666228294372559,
35
  "learning_rate": 1.9277818717759768e-05,
36
- "loss": 0.1591,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
- "grad_norm": 0.44178861379623413,
42
  "learning_rate": 1.8540899042004423e-05,
43
- "loss": 0.1984,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
- "grad_norm": 0.37462666630744934,
49
  "learning_rate": 1.780397936624908e-05,
50
- "loss": 0.1124,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
- "grad_norm": 0.0416572205722332,
56
  "learning_rate": 1.7067059690493736e-05,
57
- "loss": 0.0809,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_accuracy": 0.9812206572769953,
63
- "eval_f1": 0.9865410497981157,
64
- "eval_loss": 0.0956883653998375,
65
- "eval_precision": 0.9932249322493225,
66
- "eval_recall": 0.9799465240641712,
67
- "eval_runtime": 60.5192,
68
- "eval_samples_per_second": 17.598,
69
- "eval_steps_per_second": 1.107,
70
  "step": 377
71
  }
72
  ],
@@ -87,7 +87,7 @@
87
  "attributes": {}
88
  }
89
  },
90
- "total_flos": 213580399188480.0,
91
  "train_batch_size": 16,
92
  "trial_name": null,
93
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9838492597577388,
3
  "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
+ "grad_norm": 6.313917636871338,
14
  "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6543,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
+ "grad_norm": 8.760651588439941,
21
  "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.3545,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
+ "grad_norm": 12.38838005065918,
28
  "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1951,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
+ "grad_norm": 13.237753868103027,
35
  "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1559,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
+ "grad_norm": 11.964133262634277,
42
  "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1602,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
+ "grad_norm": 27.106698989868164,
49
  "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1055,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.026046760380268097,
56
  "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.1148,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_accuracy": 0.9774647887323944,
63
+ "eval_f1": 0.9838492597577388,
64
+ "eval_loss": 0.08887767791748047,
65
+ "eval_precision": 0.9905149051490515,
66
+ "eval_recall": 0.9772727272727273,
67
+ "eval_runtime": 62.9416,
68
+ "eval_samples_per_second": 16.92,
69
+ "eval_steps_per_second": 1.064,
70
  "step": 377
71
  }
72
  ],
 
87
  "attributes": {}
88
  }
89
  },
90
+ "total_flos": 207660400442880.0,
91
  "train_batch_size": 16,
92
  "trial_name": null,
93
  "trial_params": null
checkpoint-377/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
3
  size 5304
checkpoint-754/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe96bf17c5ab694697666cdeed273085c6e509493dc0d2f29322ae07db9ad68
3
  size 711449600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc75dfbec12aad2bf8a2060a9fdccaae09d8d8a4174df16224891cdaef4a061
3
  size 711449600
checkpoint-754/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44199bd2dbf22a5f947f048391a17d003c7e0d73ef60c43dffd44b21ea64cde3
3
  size 1423014650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b1cf27a5b28b159414ba7755016549a6483ac7dee7ad6c97641445afeaa50b
3
  size 1423014650
checkpoint-754/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9865410497981157,
3
- "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 754,
@@ -10,131 +10,131 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
- "grad_norm": 8.819928169250488,
14
  "learning_rate": 6.622516556291392e-06,
15
- "loss": 0.6464,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
- "grad_norm": 6.598285675048828,
21
  "learning_rate": 1.3245033112582784e-05,
22
- "loss": 0.388,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
- "grad_norm": 0.30871227383613586,
28
  "learning_rate": 1.9867549668874173e-05,
29
- "loss": 0.1931,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
- "grad_norm": 6.666228294372559,
35
  "learning_rate": 1.9277818717759768e-05,
36
- "loss": 0.1591,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
- "grad_norm": 0.44178861379623413,
42
  "learning_rate": 1.8540899042004423e-05,
43
- "loss": 0.1984,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
- "grad_norm": 0.37462666630744934,
49
  "learning_rate": 1.780397936624908e-05,
50
- "loss": 0.1124,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
- "grad_norm": 0.0416572205722332,
56
  "learning_rate": 1.7067059690493736e-05,
57
- "loss": 0.0809,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
- "eval_accuracy": 0.9812206572769953,
63
- "eval_f1": 0.9865410497981157,
64
- "eval_loss": 0.0956883653998375,
65
- "eval_precision": 0.9932249322493225,
66
- "eval_recall": 0.9799465240641712,
67
- "eval_runtime": 60.5192,
68
- "eval_samples_per_second": 17.598,
69
- "eval_steps_per_second": 1.107,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
- "grad_norm": 8.403841018676758,
75
  "learning_rate": 1.6330140014738394e-05,
76
- "loss": 0.0611,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
- "grad_norm": 0.022825542837381363,
82
  "learning_rate": 1.5593220338983053e-05,
83
- "loss": 0.0758,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
- "grad_norm": 97.80863952636719,
89
  "learning_rate": 1.485630066322771e-05,
90
- "loss": 0.0747,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
- "grad_norm": 0.03205716982483864,
96
  "learning_rate": 1.4119380987472366e-05,
97
- "loss": 0.0719,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
- "grad_norm": 13.893011093139648,
103
  "learning_rate": 1.3382461311717023e-05,
104
- "loss": 0.1053,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
- "grad_norm": 0.03504275158047676,
110
  "learning_rate": 1.2645541635961683e-05,
111
- "loss": 0.0494,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
- "grad_norm": 0.11265891045331955,
117
  "learning_rate": 1.190862196020634e-05,
118
- "loss": 0.0142,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
- "grad_norm": 0.06097806990146637,
124
  "learning_rate": 1.1171702284450996e-05,
125
- "loss": 0.048,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
- "eval_accuracy": 0.9784037558685446,
131
- "eval_f1": 0.984778292521509,
132
- "eval_loss": 0.17541147768497467,
133
- "eval_precision": 0.9750982961992136,
134
  "eval_recall": 0.9946524064171123,
135
- "eval_runtime": 65.725,
136
- "eval_samples_per_second": 16.204,
137
- "eval_steps_per_second": 1.019,
138
  "step": 754
139
  }
140
  ],
@@ -155,7 +155,7 @@
155
  "attributes": {}
156
  }
157
  },
158
- "total_flos": 427095020613120.0,
159
  "train_batch_size": 16,
160
  "trial_name": null,
161
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.986083499005964,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-754",
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 754,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.13262599469496023,
13
+ "grad_norm": 6.313917636871338,
14
  "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6543,
16
  "step": 50
17
  },
18
  {
19
  "epoch": 0.26525198938992045,
20
+ "grad_norm": 8.760651588439941,
21
  "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.3545,
23
  "step": 100
24
  },
25
  {
26
  "epoch": 0.3978779840848806,
27
+ "grad_norm": 12.38838005065918,
28
  "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1951,
30
  "step": 150
31
  },
32
  {
33
  "epoch": 0.5305039787798409,
34
+ "grad_norm": 13.237753868103027,
35
  "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1559,
37
  "step": 200
38
  },
39
  {
40
  "epoch": 0.6631299734748011,
41
+ "grad_norm": 11.964133262634277,
42
  "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1602,
44
  "step": 250
45
  },
46
  {
47
  "epoch": 0.7957559681697612,
48
+ "grad_norm": 27.106698989868164,
49
  "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1055,
51
  "step": 300
52
  },
53
  {
54
  "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.026046760380268097,
56
  "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.1148,
58
  "step": 350
59
  },
60
  {
61
  "epoch": 1.0,
62
+ "eval_accuracy": 0.9774647887323944,
63
+ "eval_f1": 0.9838492597577388,
64
+ "eval_loss": 0.08887767791748047,
65
+ "eval_precision": 0.9905149051490515,
66
+ "eval_recall": 0.9772727272727273,
67
+ "eval_runtime": 62.9416,
68
+ "eval_samples_per_second": 16.92,
69
+ "eval_steps_per_second": 1.064,
70
  "step": 377
71
  },
72
  {
73
  "epoch": 1.0610079575596818,
74
+ "grad_norm": 0.028772667050361633,
75
  "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0809,
77
  "step": 400
78
  },
79
  {
80
  "epoch": 1.193633952254642,
81
+ "grad_norm": 0.056088343262672424,
82
  "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0649,
84
  "step": 450
85
  },
86
  {
87
  "epoch": 1.3262599469496021,
88
+ "grad_norm": 6.098559379577637,
89
  "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0768,
91
  "step": 500
92
  },
93
  {
94
  "epoch": 1.4588859416445623,
95
+ "grad_norm": 99.54315948486328,
96
  "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0453,
98
  "step": 550
99
  },
100
  {
101
  "epoch": 1.5915119363395225,
102
+ "grad_norm": 6.803869247436523,
103
  "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1294,
105
  "step": 600
106
  },
107
  {
108
  "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.2954126298427582,
110
  "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0839,
112
  "step": 650
113
  },
114
  {
115
  "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.047186098992824554,
117
  "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0557,
119
  "step": 700
120
  },
121
  {
122
  "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.12770341336727142,
124
  "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.0466,
126
  "step": 750
127
  },
128
  {
129
  "epoch": 2.0,
130
+ "eval_accuracy": 0.9802816901408451,
131
+ "eval_f1": 0.986083499005964,
132
+ "eval_loss": 0.1403597742319107,
133
+ "eval_precision": 0.9776609724047306,
134
  "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 64.2771,
136
+ "eval_samples_per_second": 16.569,
137
+ "eval_steps_per_second": 1.042,
138
  "step": 754
139
  }
140
  ],
 
155
  "attributes": {}
156
  }
157
  },
158
+ "total_flos": 417294133800960.0,
159
  "train_batch_size": 16,
160
  "trial_name": null,
161
  "trial_params": null
checkpoint-754/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
3
  size 5304
metrics.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "eval_loss": 0.10738077014684677,
3
  "eval_accuracy": 0.9859154929577465,
4
  "eval_precision": 0.9906291834002677,
5
  "eval_recall": 0.9893048128342246,
6
  "eval_f1": 0.9899665551839465,
7
- "eval_runtime": 64.2094,
8
- "eval_samples_per_second": 16.586,
9
- "eval_steps_per_second": 1.043,
10
  "epoch": 4.0
11
  }
 
1
  {
2
+ "eval_loss": 0.0968979001045227,
3
  "eval_accuracy": 0.9859154929577465,
4
  "eval_precision": 0.9906291834002677,
5
  "eval_recall": 0.9893048128342246,
6
  "eval_f1": 0.9899665551839465,
7
+ "eval_runtime": 62.9117,
8
+ "eval_samples_per_second": 16.928,
9
+ "eval_steps_per_second": 1.065,
10
  "epoch": 4.0
11
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b30d84210b336753e94b844397015ae6635e4a978e6b132eaca6da156c50aead
3
  size 711449600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbfe56b9869f41724aeb21aff529b6fc717527dbece02e1d54b76e182981fe9d
3
  size 711449600
test_records.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_records.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
3
  size 5304