Upload folder using huggingface_hub
Browse files- checkpoint-1131/model.safetensors +1 -1
- checkpoint-1131/optimizer.pt +1 -1
- checkpoint-1131/trainer_state.json +68 -68
- checkpoint-1131/training_args.bin +1 -1
- checkpoint-1508/model.safetensors +1 -1
- checkpoint-1508/optimizer.pt +1 -1
- checkpoint-1508/trainer_state.json +92 -92
- checkpoint-1508/training_args.bin +1 -1
- checkpoint-377/model.safetensors +1 -1
- checkpoint-377/optimizer.pt +1 -1
- checkpoint-377/trainer_state.json +24 -24
- checkpoint-377/training_args.bin +1 -1
- checkpoint-754/model.safetensors +1 -1
- checkpoint-754/optimizer.pt +1 -1
- checkpoint-754/trainer_state.json +48 -48
- checkpoint-754/training_args.bin +1 -1
- metrics.json +4 -4
- model.safetensors +1 -1
- test_records.json +0 -0
- train_records.json +0 -0
- training_args.bin +1 -1
checkpoint-1131/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 711449600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbfe56b9869f41724aeb21aff529b6fc717527dbece02e1d54b76e182981fe9d
|
| 3 |
size 711449600
|
checkpoint-1131/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1423014650
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e09a0e36bd4fa895040b7ce89d9e58bec334afecef8e2cdd80c5b98483fbde5
|
| 3 |
size 1423014650
|
checkpoint-1131/trainer_state.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
|
| 4 |
"epoch": 3.0,
|
| 5 |
"eval_steps": 500,
|
|
@@ -10,192 +10,192 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
-
"grad_norm":
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
-
"grad_norm":
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
-
"loss": 0.
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
-
"grad_norm":
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
-
"loss": 0.
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
-
"grad_norm":
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
-
"loss": 0.
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
-
"grad_norm":
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
-
"loss": 0.
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
-
"grad_norm":
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
-
"loss": 0.
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
-
"grad_norm": 0.
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
-
"loss": 0.
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
-
"eval_accuracy": 0.
|
| 63 |
-
"eval_f1": 0.
|
| 64 |
-
"eval_loss": 0.
|
| 65 |
-
"eval_precision": 0.
|
| 66 |
-
"eval_recall": 0.
|
| 67 |
-
"eval_runtime":
|
| 68 |
-
"eval_samples_per_second":
|
| 69 |
-
"eval_steps_per_second": 1.
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
-
"grad_norm":
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
-
"loss": 0.
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
-
"grad_norm": 0.
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
-
"loss": 0.
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
-
"grad_norm":
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
-
"loss": 0.
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
-
"grad_norm":
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
-
"loss": 0.
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
-
"grad_norm":
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
-
"loss": 0.
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
-
"grad_norm": 0.
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
-
"loss": 0.
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
-
"grad_norm": 0.
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
-
"loss": 0.
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
-
"grad_norm": 0.
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
-
"loss": 0.
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
-
"eval_accuracy": 0.
|
| 131 |
-
"eval_f1": 0.
|
| 132 |
-
"eval_loss": 0.
|
| 133 |
-
"eval_precision": 0.
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
-
"eval_runtime":
|
| 136 |
-
"eval_samples_per_second": 16.
|
| 137 |
-
"eval_steps_per_second": 1.
|
| 138 |
"step": 754
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 2.1220159151193636,
|
| 142 |
-
"grad_norm": 0.
|
| 143 |
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
-
"loss": 0.
|
| 145 |
"step": 800
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 2.2546419098143238,
|
| 149 |
-
"grad_norm": 0.
|
| 150 |
"learning_rate": 9.697862932940311e-06,
|
| 151 |
-
"loss": 0.
|
| 152 |
"step": 850
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.387267904509284,
|
| 156 |
-
"grad_norm": 0.
|
| 157 |
"learning_rate": 8.960943257184968e-06,
|
| 158 |
-
"loss": 0.
|
| 159 |
"step": 900
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"epoch": 2.519893899204244,
|
| 163 |
-
"grad_norm": 0.
|
| 164 |
"learning_rate": 8.224023581429625e-06,
|
| 165 |
-
"loss": 0.
|
| 166 |
"step": 950
|
| 167 |
},
|
| 168 |
{
|
| 169 |
"epoch": 2.6525198938992043,
|
| 170 |
-
"grad_norm":
|
| 171 |
"learning_rate": 7.487103905674282e-06,
|
| 172 |
-
"loss": 0.
|
| 173 |
"step": 1000
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"epoch": 2.7851458885941645,
|
| 177 |
-
"grad_norm": 0.
|
| 178 |
"learning_rate": 6.750184229918939e-06,
|
| 179 |
-
"loss": 0.
|
| 180 |
"step": 1050
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"epoch": 2.9177718832891246,
|
| 184 |
-
"grad_norm":
|
| 185 |
"learning_rate": 6.013264554163597e-06,
|
| 186 |
-
"loss": 0.
|
| 187 |
"step": 1100
|
| 188 |
},
|
| 189 |
{
|
| 190 |
"epoch": 3.0,
|
| 191 |
-
"eval_accuracy": 0.
|
| 192 |
-
"eval_f1": 0.
|
| 193 |
-
"eval_loss": 0.
|
| 194 |
-
"eval_precision": 0.
|
| 195 |
"eval_recall": 0.9893048128342246,
|
| 196 |
-
"eval_runtime":
|
| 197 |
-
"eval_samples_per_second": 16.
|
| 198 |
-
"eval_steps_per_second": 1.
|
| 199 |
"step": 1131
|
| 200 |
}
|
| 201 |
],
|
|
@@ -216,7 +216,7 @@
|
|
| 216 |
"attributes": {}
|
| 217 |
}
|
| 218 |
},
|
| 219 |
-
"total_flos":
|
| 220 |
"train_batch_size": 16,
|
| 221 |
"trial_name": null,
|
| 222 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.9899665551839465,
|
| 3 |
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
|
| 4 |
"epoch": 3.0,
|
| 5 |
"eval_steps": 500,
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 6.313917636871338,
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6543,
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 8.760651588439941,
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.3545,
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 12.38838005065918,
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1951,
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 13.237753868103027,
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1559,
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 11.964133262634277,
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1602,
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 27.106698989868164,
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1055,
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.026046760380268097,
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.1148,
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9774647887323944,
|
| 63 |
+
"eval_f1": 0.9838492597577388,
|
| 64 |
+
"eval_loss": 0.08887767791748047,
|
| 65 |
+
"eval_precision": 0.9905149051490515,
|
| 66 |
+
"eval_recall": 0.9772727272727273,
|
| 67 |
+
"eval_runtime": 62.9416,
|
| 68 |
+
"eval_samples_per_second": 16.92,
|
| 69 |
+
"eval_steps_per_second": 1.064,
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 0.028772667050361633,
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0809,
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.056088343262672424,
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0649,
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 6.098559379577637,
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0768,
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 99.54315948486328,
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0453,
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 6.803869247436523,
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1294,
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.2954126298427582,
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0839,
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.047186098992824554,
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0557,
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.12770341336727142,
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.0466,
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9802816901408451,
|
| 131 |
+
"eval_f1": 0.986083499005964,
|
| 132 |
+
"eval_loss": 0.1403597742319107,
|
| 133 |
+
"eval_precision": 0.9776609724047306,
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 64.2771,
|
| 136 |
+
"eval_samples_per_second": 16.569,
|
| 137 |
+
"eval_steps_per_second": 1.042,
|
| 138 |
"step": 754
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 2.1220159151193636,
|
| 142 |
+
"grad_norm": 0.012713871896266937,
|
| 143 |
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
+
"loss": 0.0479,
|
| 145 |
"step": 800
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 2.2546419098143238,
|
| 149 |
+
"grad_norm": 0.013412756845355034,
|
| 150 |
"learning_rate": 9.697862932940311e-06,
|
| 151 |
+
"loss": 0.0227,
|
| 152 |
"step": 850
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.387267904509284,
|
| 156 |
+
"grad_norm": 0.0069837020710110664,
|
| 157 |
"learning_rate": 8.960943257184968e-06,
|
| 158 |
+
"loss": 0.024,
|
| 159 |
"step": 900
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"epoch": 2.519893899204244,
|
| 163 |
+
"grad_norm": 0.006205807905644178,
|
| 164 |
"learning_rate": 8.224023581429625e-06,
|
| 165 |
+
"loss": 0.0216,
|
| 166 |
"step": 950
|
| 167 |
},
|
| 168 |
{
|
| 169 |
"epoch": 2.6525198938992043,
|
| 170 |
+
"grad_norm": 0.013195905834436417,
|
| 171 |
"learning_rate": 7.487103905674282e-06,
|
| 172 |
+
"loss": 0.0302,
|
| 173 |
"step": 1000
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"epoch": 2.7851458885941645,
|
| 177 |
+
"grad_norm": 0.010757376439869404,
|
| 178 |
"learning_rate": 6.750184229918939e-06,
|
| 179 |
+
"loss": 0.0021,
|
| 180 |
"step": 1050
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"epoch": 2.9177718832891246,
|
| 184 |
+
"grad_norm": 25.593114852905273,
|
| 185 |
"learning_rate": 6.013264554163597e-06,
|
| 186 |
+
"loss": 0.0222,
|
| 187 |
"step": 1100
|
| 188 |
},
|
| 189 |
{
|
| 190 |
"epoch": 3.0,
|
| 191 |
+
"eval_accuracy": 0.9859154929577465,
|
| 192 |
+
"eval_f1": 0.9899665551839465,
|
| 193 |
+
"eval_loss": 0.0968979001045227,
|
| 194 |
+
"eval_precision": 0.9906291834002677,
|
| 195 |
"eval_recall": 0.9893048128342246,
|
| 196 |
+
"eval_runtime": 62.8419,
|
| 197 |
+
"eval_samples_per_second": 16.947,
|
| 198 |
+
"eval_steps_per_second": 1.066,
|
| 199 |
"step": 1131
|
| 200 |
}
|
| 201 |
],
|
|
|
|
| 216 |
"attributes": {}
|
| 217 |
}
|
| 218 |
},
|
| 219 |
+
"total_flos": 628769644546560.0,
|
| 220 |
"train_batch_size": 16,
|
| 221 |
"trial_name": null,
|
| 222 |
"trial_params": null
|
checkpoint-1131/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
|
| 3 |
size 5304
|
checkpoint-1508/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 711449600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e83f2f723e82f1966a8c36143d07eab2a1e2ee605a5f9037b01aee55dcf80a87
|
| 3 |
size 711449600
|
checkpoint-1508/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1423014650
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fae5f2268ddbb577fba0afe39ecc58dafe67fe7c96fedb3ee8652afc0b77f68c
|
| 3 |
size 1423014650
|
checkpoint-1508/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9899665551839465,
|
| 3 |
-
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-
|
| 4 |
"epoch": 4.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1508,
|
|
@@ -10,260 +10,260 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
-
"grad_norm":
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
-
"grad_norm":
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
-
"loss": 0.
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
-
"grad_norm":
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
-
"loss": 0.
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
-
"grad_norm":
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
-
"loss": 0.
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
-
"grad_norm":
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
-
"loss": 0.
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
-
"grad_norm":
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
-
"loss": 0.
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
-
"grad_norm": 0.
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
-
"loss": 0.
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
-
"eval_accuracy": 0.
|
| 63 |
-
"eval_f1": 0.
|
| 64 |
-
"eval_loss": 0.
|
| 65 |
-
"eval_precision": 0.
|
| 66 |
-
"eval_recall": 0.
|
| 67 |
-
"eval_runtime":
|
| 68 |
-
"eval_samples_per_second":
|
| 69 |
-
"eval_steps_per_second": 1.
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
-
"grad_norm":
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
-
"loss": 0.
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
-
"grad_norm": 0.
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
-
"loss": 0.
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
-
"grad_norm":
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
-
"loss": 0.
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
-
"grad_norm":
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
-
"loss": 0.
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
-
"grad_norm":
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
-
"loss": 0.
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
-
"grad_norm": 0.
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
-
"loss": 0.
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
-
"grad_norm": 0.
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
-
"loss": 0.
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
-
"grad_norm": 0.
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
-
"loss": 0.
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
-
"eval_accuracy": 0.
|
| 131 |
-
"eval_f1": 0.
|
| 132 |
-
"eval_loss": 0.
|
| 133 |
-
"eval_precision": 0.
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
-
"eval_runtime":
|
| 136 |
-
"eval_samples_per_second": 16.
|
| 137 |
-
"eval_steps_per_second": 1.
|
| 138 |
"step": 754
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 2.1220159151193636,
|
| 142 |
-
"grad_norm": 0.
|
| 143 |
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
-
"loss": 0.
|
| 145 |
"step": 800
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 2.2546419098143238,
|
| 149 |
-
"grad_norm": 0.
|
| 150 |
"learning_rate": 9.697862932940311e-06,
|
| 151 |
-
"loss": 0.
|
| 152 |
"step": 850
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.387267904509284,
|
| 156 |
-
"grad_norm": 0.
|
| 157 |
"learning_rate": 8.960943257184968e-06,
|
| 158 |
-
"loss": 0.
|
| 159 |
"step": 900
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"epoch": 2.519893899204244,
|
| 163 |
-
"grad_norm": 0.
|
| 164 |
"learning_rate": 8.224023581429625e-06,
|
| 165 |
-
"loss": 0.
|
| 166 |
"step": 950
|
| 167 |
},
|
| 168 |
{
|
| 169 |
"epoch": 2.6525198938992043,
|
| 170 |
-
"grad_norm":
|
| 171 |
"learning_rate": 7.487103905674282e-06,
|
| 172 |
-
"loss": 0.
|
| 173 |
"step": 1000
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"epoch": 2.7851458885941645,
|
| 177 |
-
"grad_norm": 0.
|
| 178 |
"learning_rate": 6.750184229918939e-06,
|
| 179 |
-
"loss": 0.
|
| 180 |
"step": 1050
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"epoch": 2.9177718832891246,
|
| 184 |
-
"grad_norm":
|
| 185 |
"learning_rate": 6.013264554163597e-06,
|
| 186 |
-
"loss": 0.
|
| 187 |
"step": 1100
|
| 188 |
},
|
| 189 |
{
|
| 190 |
"epoch": 3.0,
|
| 191 |
-
"eval_accuracy": 0.
|
| 192 |
-
"eval_f1": 0.
|
| 193 |
-
"eval_loss": 0.
|
| 194 |
-
"eval_precision": 0.
|
| 195 |
"eval_recall": 0.9893048128342246,
|
| 196 |
-
"eval_runtime":
|
| 197 |
-
"eval_samples_per_second": 16.
|
| 198 |
-
"eval_steps_per_second": 1.
|
| 199 |
"step": 1131
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 3.050397877984085,
|
| 203 |
-
"grad_norm": 0.
|
| 204 |
"learning_rate": 5.276344878408254e-06,
|
| 205 |
-
"loss": 0.
|
| 206 |
"step": 1150
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 3.183023872679045,
|
| 210 |
-
"grad_norm": 0.
|
| 211 |
"learning_rate": 4.5394252026529115e-06,
|
| 212 |
-
"loss": 0.
|
| 213 |
"step": 1200
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 3.315649867374005,
|
| 217 |
-
"grad_norm": 0.
|
| 218 |
"learning_rate": 3.8025055268975686e-06,
|
| 219 |
-
"loss": 0.
|
| 220 |
"step": 1250
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 3.4482758620689653,
|
| 224 |
-
"grad_norm": 0.
|
| 225 |
"learning_rate": 3.065585851142226e-06,
|
| 226 |
-
"loss": 0.
|
| 227 |
"step": 1300
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 3.5809018567639255,
|
| 231 |
-
"grad_norm": 0.
|
| 232 |
"learning_rate": 2.328666175386883e-06,
|
| 233 |
-
"loss": 0.
|
| 234 |
"step": 1350
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 3.713527851458886,
|
| 238 |
-
"grad_norm": 0.
|
| 239 |
"learning_rate": 1.59174649963154e-06,
|
| 240 |
-
"loss": 0.
|
| 241 |
"step": 1400
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 3.8461538461538463,
|
| 245 |
-
"grad_norm": 0.
|
| 246 |
"learning_rate": 8.548268238761975e-07,
|
| 247 |
-
"loss": 0.
|
| 248 |
"step": 1450
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 3.9787798408488064,
|
| 252 |
-
"grad_norm": 0.
|
| 253 |
"learning_rate": 1.1790714812085484e-07,
|
| 254 |
-
"loss": 0.
|
| 255 |
"step": 1500
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 4.0,
|
| 259 |
-
"eval_accuracy": 0.
|
| 260 |
-
"eval_f1": 0.
|
| 261 |
-
"eval_loss": 0.
|
| 262 |
-
"eval_precision": 0.
|
| 263 |
-
"eval_recall": 0.
|
| 264 |
-
"eval_runtime":
|
| 265 |
-
"eval_samples_per_second": 16.
|
| 266 |
-
"eval_steps_per_second": 1.
|
| 267 |
"step": 1508
|
| 268 |
}
|
| 269 |
],
|
|
@@ -284,7 +284,7 @@
|
|
| 284 |
"attributes": {}
|
| 285 |
}
|
| 286 |
},
|
| 287 |
-
"total_flos":
|
| 288 |
"train_batch_size": 16,
|
| 289 |
"trial_name": null,
|
| 290 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9899665551839465,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
|
| 4 |
"epoch": 4.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1508,
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 6.313917636871338,
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6543,
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 8.760651588439941,
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.3545,
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 12.38838005065918,
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1951,
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 13.237753868103027,
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1559,
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 11.964133262634277,
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1602,
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 27.106698989868164,
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1055,
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.026046760380268097,
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.1148,
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9774647887323944,
|
| 63 |
+
"eval_f1": 0.9838492597577388,
|
| 64 |
+
"eval_loss": 0.08887767791748047,
|
| 65 |
+
"eval_precision": 0.9905149051490515,
|
| 66 |
+
"eval_recall": 0.9772727272727273,
|
| 67 |
+
"eval_runtime": 62.9416,
|
| 68 |
+
"eval_samples_per_second": 16.92,
|
| 69 |
+
"eval_steps_per_second": 1.064,
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 0.028772667050361633,
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0809,
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.056088343262672424,
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0649,
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 6.098559379577637,
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0768,
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 99.54315948486328,
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0453,
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 6.803869247436523,
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1294,
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.2954126298427582,
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0839,
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.047186098992824554,
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0557,
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.12770341336727142,
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.0466,
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9802816901408451,
|
| 131 |
+
"eval_f1": 0.986083499005964,
|
| 132 |
+
"eval_loss": 0.1403597742319107,
|
| 133 |
+
"eval_precision": 0.9776609724047306,
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 64.2771,
|
| 136 |
+
"eval_samples_per_second": 16.569,
|
| 137 |
+
"eval_steps_per_second": 1.042,
|
| 138 |
"step": 754
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"epoch": 2.1220159151193636,
|
| 142 |
+
"grad_norm": 0.012713871896266937,
|
| 143 |
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
+
"loss": 0.0479,
|
| 145 |
"step": 800
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"epoch": 2.2546419098143238,
|
| 149 |
+
"grad_norm": 0.013412756845355034,
|
| 150 |
"learning_rate": 9.697862932940311e-06,
|
| 151 |
+
"loss": 0.0227,
|
| 152 |
"step": 850
|
| 153 |
},
|
| 154 |
{
|
| 155 |
"epoch": 2.387267904509284,
|
| 156 |
+
"grad_norm": 0.0069837020710110664,
|
| 157 |
"learning_rate": 8.960943257184968e-06,
|
| 158 |
+
"loss": 0.024,
|
| 159 |
"step": 900
|
| 160 |
},
|
| 161 |
{
|
| 162 |
"epoch": 2.519893899204244,
|
| 163 |
+
"grad_norm": 0.006205807905644178,
|
| 164 |
"learning_rate": 8.224023581429625e-06,
|
| 165 |
+
"loss": 0.0216,
|
| 166 |
"step": 950
|
| 167 |
},
|
| 168 |
{
|
| 169 |
"epoch": 2.6525198938992043,
|
| 170 |
+
"grad_norm": 0.013195905834436417,
|
| 171 |
"learning_rate": 7.487103905674282e-06,
|
| 172 |
+
"loss": 0.0302,
|
| 173 |
"step": 1000
|
| 174 |
},
|
| 175 |
{
|
| 176 |
"epoch": 2.7851458885941645,
|
| 177 |
+
"grad_norm": 0.010757376439869404,
|
| 178 |
"learning_rate": 6.750184229918939e-06,
|
| 179 |
+
"loss": 0.0021,
|
| 180 |
"step": 1050
|
| 181 |
},
|
| 182 |
{
|
| 183 |
"epoch": 2.9177718832891246,
|
| 184 |
+
"grad_norm": 25.593114852905273,
|
| 185 |
"learning_rate": 6.013264554163597e-06,
|
| 186 |
+
"loss": 0.0222,
|
| 187 |
"step": 1100
|
| 188 |
},
|
| 189 |
{
|
| 190 |
"epoch": 3.0,
|
| 191 |
+
"eval_accuracy": 0.9859154929577465,
|
| 192 |
+
"eval_f1": 0.9899665551839465,
|
| 193 |
+
"eval_loss": 0.0968979001045227,
|
| 194 |
+
"eval_precision": 0.9906291834002677,
|
| 195 |
"eval_recall": 0.9893048128342246,
|
| 196 |
+
"eval_runtime": 62.8419,
|
| 197 |
+
"eval_samples_per_second": 16.947,
|
| 198 |
+
"eval_steps_per_second": 1.066,
|
| 199 |
"step": 1131
|
| 200 |
},
|
| 201 |
{
|
| 202 |
"epoch": 3.050397877984085,
|
| 203 |
+
"grad_norm": 0.005515966564416885,
|
| 204 |
"learning_rate": 5.276344878408254e-06,
|
| 205 |
+
"loss": 0.0211,
|
| 206 |
"step": 1150
|
| 207 |
},
|
| 208 |
{
|
| 209 |
"epoch": 3.183023872679045,
|
| 210 |
+
"grad_norm": 0.007331592496484518,
|
| 211 |
"learning_rate": 4.5394252026529115e-06,
|
| 212 |
+
"loss": 0.0045,
|
| 213 |
"step": 1200
|
| 214 |
},
|
| 215 |
{
|
| 216 |
"epoch": 3.315649867374005,
|
| 217 |
+
"grad_norm": 0.0053366441279649734,
|
| 218 |
"learning_rate": 3.8025055268975686e-06,
|
| 219 |
+
"loss": 0.0003,
|
| 220 |
"step": 1250
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"epoch": 3.4482758620689653,
|
| 224 |
+
"grad_norm": 0.00485859764739871,
|
| 225 |
"learning_rate": 3.065585851142226e-06,
|
| 226 |
+
"loss": 0.0056,
|
| 227 |
"step": 1300
|
| 228 |
},
|
| 229 |
{
|
| 230 |
"epoch": 3.5809018567639255,
|
| 231 |
+
"grad_norm": 0.005309904459863901,
|
| 232 |
"learning_rate": 2.328666175386883e-06,
|
| 233 |
+
"loss": 0.0072,
|
| 234 |
"step": 1350
|
| 235 |
},
|
| 236 |
{
|
| 237 |
"epoch": 3.713527851458886,
|
| 238 |
+
"grad_norm": 0.003933363128453493,
|
| 239 |
"learning_rate": 1.59174649963154e-06,
|
| 240 |
+
"loss": 0.005,
|
| 241 |
"step": 1400
|
| 242 |
},
|
| 243 |
{
|
| 244 |
"epoch": 3.8461538461538463,
|
| 245 |
+
"grad_norm": 0.0033942251466214657,
|
| 246 |
"learning_rate": 8.548268238761975e-07,
|
| 247 |
+
"loss": 0.0002,
|
| 248 |
"step": 1450
|
| 249 |
},
|
| 250 |
{
|
| 251 |
"epoch": 3.9787798408488064,
|
| 252 |
+
"grad_norm": 0.0044485898688435555,
|
| 253 |
"learning_rate": 1.1790714812085484e-07,
|
| 254 |
+
"loss": 0.0037,
|
| 255 |
"step": 1500
|
| 256 |
},
|
| 257 |
{
|
| 258 |
"epoch": 4.0,
|
| 259 |
+
"eval_accuracy": 0.984037558685446,
|
| 260 |
+
"eval_f1": 0.9886135298057601,
|
| 261 |
+
"eval_loss": 0.116817407310009,
|
| 262 |
+
"eval_precision": 0.9906040268456375,
|
| 263 |
+
"eval_recall": 0.9866310160427807,
|
| 264 |
+
"eval_runtime": 62.7013,
|
| 265 |
+
"eval_samples_per_second": 16.985,
|
| 266 |
+
"eval_steps_per_second": 1.069,
|
| 267 |
"step": 1508
|
| 268 |
}
|
| 269 |
],
|
|
|
|
| 284 |
"attributes": {}
|
| 285 |
}
|
| 286 |
},
|
| 287 |
+
"total_flos": 839587377653760.0,
|
| 288 |
"train_batch_size": 16,
|
| 289 |
"trial_name": null,
|
| 290 |
"trial_params": null
|
checkpoint-1508/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
|
| 3 |
size 5304
|
checkpoint-377/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 711449600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88c901a19e6b36a140f7d29fba603543b97cd75e33362611614d8986e508beef
|
| 3 |
size 711449600
|
checkpoint-377/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1423014650
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80af33ce0ab52011f40575eac75a4c30cc4c58aacd2f7b923c777e87018db57c
|
| 3 |
size 1423014650
|
checkpoint-377/trainer_state.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
|
| 4 |
"epoch": 1.0,
|
| 5 |
"eval_steps": 500,
|
|
@@ -10,63 +10,63 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
-
"grad_norm":
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
-
"grad_norm":
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
-
"loss": 0.
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
-
"grad_norm":
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
-
"loss": 0.
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
-
"grad_norm":
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
-
"loss": 0.
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
-
"grad_norm":
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
-
"loss": 0.
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
-
"grad_norm":
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
-
"loss": 0.
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
-
"grad_norm": 0.
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
-
"loss": 0.
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
-
"eval_accuracy": 0.
|
| 63 |
-
"eval_f1": 0.
|
| 64 |
-
"eval_loss": 0.
|
| 65 |
-
"eval_precision": 0.
|
| 66 |
-
"eval_recall": 0.
|
| 67 |
-
"eval_runtime":
|
| 68 |
-
"eval_samples_per_second":
|
| 69 |
-
"eval_steps_per_second": 1.
|
| 70 |
"step": 377
|
| 71 |
}
|
| 72 |
],
|
|
@@ -87,7 +87,7 @@
|
|
| 87 |
"attributes": {}
|
| 88 |
}
|
| 89 |
},
|
| 90 |
-
"total_flos":
|
| 91 |
"train_batch_size": 16,
|
| 92 |
"trial_name": null,
|
| 93 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.9838492597577388,
|
| 3 |
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
|
| 4 |
"epoch": 1.0,
|
| 5 |
"eval_steps": 500,
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 6.313917636871338,
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6543,
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 8.760651588439941,
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.3545,
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 12.38838005065918,
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1951,
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 13.237753868103027,
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1559,
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 11.964133262634277,
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1602,
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 27.106698989868164,
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1055,
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.026046760380268097,
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.1148,
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9774647887323944,
|
| 63 |
+
"eval_f1": 0.9838492597577388,
|
| 64 |
+
"eval_loss": 0.08887767791748047,
|
| 65 |
+
"eval_precision": 0.9905149051490515,
|
| 66 |
+
"eval_recall": 0.9772727272727273,
|
| 67 |
+
"eval_runtime": 62.9416,
|
| 68 |
+
"eval_samples_per_second": 16.92,
|
| 69 |
+
"eval_steps_per_second": 1.064,
|
| 70 |
"step": 377
|
| 71 |
}
|
| 72 |
],
|
|
|
|
| 87 |
"attributes": {}
|
| 88 |
}
|
| 89 |
},
|
| 90 |
+
"total_flos": 207660400442880.0,
|
| 91 |
"train_batch_size": 16,
|
| 92 |
"trial_name": null,
|
| 93 |
"trial_params": null
|
checkpoint-377/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
|
| 3 |
size 5304
|
checkpoint-754/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 711449600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dc75dfbec12aad2bf8a2060a9fdccaae09d8d8a4174df16224891cdaef4a061
|
| 3 |
size 711449600
|
checkpoint-754/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1423014650
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10b1cf27a5b28b159414ba7755016549a6483ac7dee7ad6c97641445afeaa50b
|
| 3 |
size 1423014650
|
checkpoint-754/trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-
|
| 4 |
"epoch": 2.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 754,
|
|
@@ -10,131 +10,131 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
-
"grad_norm":
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
-
"grad_norm":
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
-
"loss": 0.
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
-
"grad_norm":
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
-
"loss": 0.
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
-
"grad_norm":
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
-
"loss": 0.
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
-
"grad_norm":
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
-
"loss": 0.
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
-
"grad_norm":
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
-
"loss": 0.
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
-
"grad_norm": 0.
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
-
"loss": 0.
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
-
"eval_accuracy": 0.
|
| 63 |
-
"eval_f1": 0.
|
| 64 |
-
"eval_loss": 0.
|
| 65 |
-
"eval_precision": 0.
|
| 66 |
-
"eval_recall": 0.
|
| 67 |
-
"eval_runtime":
|
| 68 |
-
"eval_samples_per_second":
|
| 69 |
-
"eval_steps_per_second": 1.
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
-
"grad_norm":
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
-
"loss": 0.
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
-
"grad_norm": 0.
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
-
"loss": 0.
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
-
"grad_norm":
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
-
"loss": 0.
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
-
"grad_norm":
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
-
"loss": 0.
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
-
"grad_norm":
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
-
"loss": 0.
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
-
"grad_norm": 0.
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
-
"loss": 0.
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
-
"grad_norm": 0.
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
-
"loss": 0.
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
-
"grad_norm": 0.
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
-
"loss": 0.
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
-
"eval_accuracy": 0.
|
| 131 |
-
"eval_f1": 0.
|
| 132 |
-
"eval_loss": 0.
|
| 133 |
-
"eval_precision": 0.
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
-
"eval_runtime":
|
| 136 |
-
"eval_samples_per_second": 16.
|
| 137 |
-
"eval_steps_per_second": 1.
|
| 138 |
"step": 754
|
| 139 |
}
|
| 140 |
],
|
|
@@ -155,7 +155,7 @@
|
|
| 155 |
"attributes": {}
|
| 156 |
}
|
| 157 |
},
|
| 158 |
-
"total_flos":
|
| 159 |
"train_batch_size": 16,
|
| 160 |
"trial_name": null,
|
| 161 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.986083499005964,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-754",
|
| 4 |
"epoch": 2.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 754,
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 6.313917636871338,
|
| 14 |
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6543,
|
| 16 |
"step": 50
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 8.760651588439941,
|
| 21 |
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.3545,
|
| 23 |
"step": 100
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 12.38838005065918,
|
| 28 |
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1951,
|
| 30 |
"step": 150
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 13.237753868103027,
|
| 35 |
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1559,
|
| 37 |
"step": 200
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 11.964133262634277,
|
| 42 |
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1602,
|
| 44 |
"step": 250
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 27.106698989868164,
|
| 49 |
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1055,
|
| 51 |
"step": 300
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.026046760380268097,
|
| 56 |
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.1148,
|
| 58 |
"step": 350
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9774647887323944,
|
| 63 |
+
"eval_f1": 0.9838492597577388,
|
| 64 |
+
"eval_loss": 0.08887767791748047,
|
| 65 |
+
"eval_precision": 0.9905149051490515,
|
| 66 |
+
"eval_recall": 0.9772727272727273,
|
| 67 |
+
"eval_runtime": 62.9416,
|
| 68 |
+
"eval_samples_per_second": 16.92,
|
| 69 |
+
"eval_steps_per_second": 1.064,
|
| 70 |
"step": 377
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 0.028772667050361633,
|
| 75 |
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0809,
|
| 77 |
"step": 400
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.056088343262672424,
|
| 82 |
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0649,
|
| 84 |
"step": 450
|
| 85 |
},
|
| 86 |
{
|
| 87 |
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 6.098559379577637,
|
| 89 |
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0768,
|
| 91 |
"step": 500
|
| 92 |
},
|
| 93 |
{
|
| 94 |
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 99.54315948486328,
|
| 96 |
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0453,
|
| 98 |
"step": 550
|
| 99 |
},
|
| 100 |
{
|
| 101 |
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 6.803869247436523,
|
| 103 |
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1294,
|
| 105 |
"step": 600
|
| 106 |
},
|
| 107 |
{
|
| 108 |
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.2954126298427582,
|
| 110 |
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0839,
|
| 112 |
"step": 650
|
| 113 |
},
|
| 114 |
{
|
| 115 |
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.047186098992824554,
|
| 117 |
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0557,
|
| 119 |
"step": 700
|
| 120 |
},
|
| 121 |
{
|
| 122 |
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.12770341336727142,
|
| 124 |
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.0466,
|
| 126 |
"step": 750
|
| 127 |
},
|
| 128 |
{
|
| 129 |
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9802816901408451,
|
| 131 |
+
"eval_f1": 0.986083499005964,
|
| 132 |
+
"eval_loss": 0.1403597742319107,
|
| 133 |
+
"eval_precision": 0.9776609724047306,
|
| 134 |
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 64.2771,
|
| 136 |
+
"eval_samples_per_second": 16.569,
|
| 137 |
+
"eval_steps_per_second": 1.042,
|
| 138 |
"step": 754
|
| 139 |
}
|
| 140 |
],
|
|
|
|
| 155 |
"attributes": {}
|
| 156 |
}
|
| 157 |
},
|
| 158 |
+
"total_flos": 417294133800960.0,
|
| 159 |
"train_batch_size": 16,
|
| 160 |
"trial_name": null,
|
| 161 |
"trial_params": null
|
checkpoint-754/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
|
| 3 |
size 5304
|
metrics.json
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
{
|
| 2 |
-
"eval_loss": 0.
|
| 3 |
"eval_accuracy": 0.9859154929577465,
|
| 4 |
"eval_precision": 0.9906291834002677,
|
| 5 |
"eval_recall": 0.9893048128342246,
|
| 6 |
"eval_f1": 0.9899665551839465,
|
| 7 |
-
"eval_runtime":
|
| 8 |
-
"eval_samples_per_second": 16.
|
| 9 |
-
"eval_steps_per_second": 1.
|
| 10 |
"epoch": 4.0
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"eval_loss": 0.0968979001045227,
|
| 3 |
"eval_accuracy": 0.9859154929577465,
|
| 4 |
"eval_precision": 0.9906291834002677,
|
| 5 |
"eval_recall": 0.9893048128342246,
|
| 6 |
"eval_f1": 0.9899665551839465,
|
| 7 |
+
"eval_runtime": 62.9117,
|
| 8 |
+
"eval_samples_per_second": 16.928,
|
| 9 |
+
"eval_steps_per_second": 1.065,
|
| 10 |
"epoch": 4.0
|
| 11 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 711449600
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbfe56b9869f41724aeb21aff529b6fc717527dbece02e1d54b76e182981fe9d
|
| 3 |
size 711449600
|
test_records.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_records.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:495a3cc45f1033c780ed08b02ec5466e255ca6a4bc480ecf9586486920684433
|
| 3 |
size 5304
|