End of training

Browse files

Files changed (6) hide show

README.md +16 -4
all_results.json +16 -0
eval_results.json +10 -0
logs/events.out.tfevents.1750880481.ki-g0008.1143286.1 +3 -0
train_results.json +9 -0
trainer_state.json +511 -0

README.md CHANGED Viewed

@@ -4,11 +4,23 @@ license: apache-2.0
 base_model: google-bert/bert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
 - accuracy
 model-index:
 - name: bert_base_code_uml
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,10 +28,10 @@ should probably proofread and complete it, then remove this comment. -->
 # bert_base_code_uml
-This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8292
-- Accuracy: 0.8286
 ## Model description

 base_model: google-bert/bert-base-uncased
 tags:
 - generated_from_trainer
+datasets:
+- devgpt-aimotion/the-stack-v2_PlantUML_filtered
 metrics:
 - accuracy
 model-index:
 - name: bert_base_code_uml
+  results:
+  - task:
+      name: Masked Language Modeling
+      type: fill-mask
+    dataset:
+      name: devgpt-aimotion/the-stack-v2_PlantUML_filtered
+      type: devgpt-aimotion/the-stack-v2_PlantUML_filtered
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.829663160408593
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # bert_base_code_uml
+This model is a fine-tuned version of [google-bert/bert-base-uncased](https://huggingface.co/google-bert/bert-base-uncased) on the devgpt-aimotion/the-stack-v2_PlantUML_filtered dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8230
+- Accuracy: 0.8297
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 25.0,
+    "eval_accuracy": 0.829663160408593,
+    "eval_loss": 0.822982132434845,
+    "eval_runtime": 31.502,
+    "eval_samples": 6151,
+    "eval_samples_per_second": 195.258,
+    "eval_steps_per_second": 2.063,
+    "perplexity": 2.2772808746912707,
+    "total_flos": 8.0444602960128e+17,
+    "train_loss": 1.908989332549426,
+    "train_runtime": 23433.9018,
+    "train_samples": 122254,
+    "train_samples_per_second": 130.424,
+    "train_steps_per_second": 1.359
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "epoch": 25.0,
+    "eval_accuracy": 0.829663160408593,
+    "eval_loss": 0.822982132434845,
+    "eval_runtime": 31.502,
+    "eval_samples": 6151,
+    "eval_samples_per_second": 195.258,
+    "eval_steps_per_second": 2.063,
+    "perplexity": 2.2772808746912707
+}

logs/events.out.tfevents.1750880481.ki-g0008.1143286.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e84d5fe95d0d8246931c473747cad9ab697fb1f27b0174608f57b860d69413c0
+size 417

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 25.0,
+    "total_flos": 8.0444602960128e+17,
+    "train_loss": 1.908989332549426,
+    "train_runtime": 23433.9018,
+    "train_samples": 122254,
+    "train_samples_per_second": 130.424,
+    "train_steps_per_second": 1.359
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,511 @@

+{
+  "best_global_step": 30000,
+  "best_metric": 0.8292354941368103,
+  "best_model_checkpoint": "bert_base_code_uml/checkpoint-30000",
+  "epoch": 25.0,
+  "eval_steps": 10000,
+  "global_step": 31850,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.3924646781789639,
+      "grad_norm": 2.2605140209198,
+      "learning_rate": 4.9900000000000005e-06,
+      "loss": 8.1534,
+      "step": 500
+    },
+    {
+      "epoch": 0.7849293563579278,
+      "grad_norm": 1.4277119636535645,
+      "learning_rate": 9.990000000000001e-06,
+      "loss": 5.4981,
+      "step": 1000
+    },
+    {
+      "epoch": 1.1773940345368916,
+      "grad_norm": 1.1594833135604858,
+      "learning_rate": 1.499e-05,
+      "loss": 4.6668,
+      "step": 1500
+    },
+    {
+      "epoch": 1.5698587127158556,
+      "grad_norm": 1.3609659671783447,
+      "learning_rate": 1.999e-05,
+      "loss": 4.4569,
+      "step": 2000
+    },
+    {
+      "epoch": 1.9623233908948194,
+      "grad_norm": 1.4516750574111938,
+      "learning_rate": 2.4990000000000003e-05,
+      "loss": 4.3236,
+      "step": 2500
+    },
+    {
+      "epoch": 2.3547880690737832,
+      "grad_norm": 1.307254672050476,
+      "learning_rate": 2.9990000000000003e-05,
+      "loss": 4.2234,
+      "step": 3000
+    },
+    {
+      "epoch": 2.7472527472527473,
+      "grad_norm": 1.1777299642562866,
+      "learning_rate": 3.499e-05,
+      "loss": 4.1369,
+      "step": 3500
+    },
+    {
+      "epoch": 3.1397174254317113,
+      "grad_norm": 1.277431607246399,
+      "learning_rate": 3.999e-05,
+      "loss": 4.0883,
+      "step": 4000
+    },
+    {
+      "epoch": 3.5321821036106753,
+      "grad_norm": 1.136020302772522,
+      "learning_rate": 4.499e-05,
+      "loss": 4.0251,
+      "step": 4500
+    },
+    {
+      "epoch": 3.924646781789639,
+      "grad_norm": 1.5430645942687988,
+      "learning_rate": 4.999e-05,
+      "loss": 3.7435,
+      "step": 5000
+    },
+    {
+      "epoch": 4.3171114599686025,
+      "grad_norm": 1.1859745979309082,
+      "learning_rate": 5.499000000000001e-05,
+      "loss": 3.5562,
+      "step": 5500
+    },
+    {
+      "epoch": 4.7095761381475665,
+      "grad_norm": 1.1602009534835815,
+      "learning_rate": 5.999e-05,
+      "loss": 3.4409,
+      "step": 6000
+    },
+    {
+      "epoch": 5.1020408163265305,
+      "grad_norm": 1.5617371797561646,
+      "learning_rate": 6.499000000000001e-05,
+      "loss": 3.3426,
+      "step": 6500
+    },
+    {
+      "epoch": 5.4945054945054945,
+      "grad_norm": 1.3554491996765137,
+      "learning_rate": 6.999e-05,
+      "loss": 3.2194,
+      "step": 7000
+    },
+    {
+      "epoch": 5.8869701726844585,
+      "grad_norm": 2.1539087295532227,
+      "learning_rate": 7.499e-05,
+      "loss": 3.1264,
+      "step": 7500
+    },
+    {
+      "epoch": 6.279434850863423,
+      "grad_norm": 1.4375736713409424,
+      "learning_rate": 7.999000000000001e-05,
+      "loss": 3.0421,
+      "step": 8000
+    },
+    {
+      "epoch": 6.671899529042387,
+      "grad_norm": 1.8041514158248901,
+      "learning_rate": 8.499e-05,
+      "loss": 2.9334,
+      "step": 8500
+    },
+    {
+      "epoch": 7.06436420722135,
+      "grad_norm": 2.089439868927002,
+      "learning_rate": 8.999000000000001e-05,
+      "loss": 2.8356,
+      "step": 9000
+    },
+    {
+      "epoch": 7.456828885400314,
+      "grad_norm": 1.8236392736434937,
+      "learning_rate": 9.499e-05,
+      "loss": 2.6914,
+      "step": 9500
+    },
+    {
+      "epoch": 7.849293563579278,
+      "grad_norm": 1.8073580265045166,
+      "learning_rate": 9.999000000000001e-05,
+      "loss": 2.4929,
+      "step": 10000
+    },
+    {
+      "epoch": 7.849293563579278,
+      "eval_accuracy": 0.5692341405099076,
+      "eval_loss": 2.151398181915283,
+      "eval_runtime": 38.2798,
+      "eval_samples_per_second": 160.685,
+      "eval_steps_per_second": 1.698,
+      "step": 10000
+    },
+    {
+      "epoch": 8.241758241758241,
+      "grad_norm": 1.849391222000122,
+      "learning_rate": 9.77162471395881e-05,
+      "loss": 2.1576,
+      "step": 10500
+    },
+    {
+      "epoch": 8.634222919937205,
+      "grad_norm": 1.2290756702423096,
+      "learning_rate": 9.542791762013731e-05,
+      "loss": 1.7337,
+      "step": 11000
+    },
+    {
+      "epoch": 9.026687598116169,
+      "grad_norm": 1.1669484376907349,
+      "learning_rate": 9.313958810068651e-05,
+      "loss": 1.4375,
+      "step": 11500
+    },
+    {
+      "epoch": 9.419152276295133,
+      "grad_norm": 1.0519758462905884,
+      "learning_rate": 9.08512585812357e-05,
+      "loss": 1.3162,
+      "step": 12000
+    },
+    {
+      "epoch": 9.811616954474097,
+      "grad_norm": 1.0862187147140503,
+      "learning_rate": 8.85629290617849e-05,
+      "loss": 1.2368,
+      "step": 12500
+    },
+    {
+      "epoch": 10.204081632653061,
+      "grad_norm": 0.9377219676971436,
+      "learning_rate": 8.62745995423341e-05,
+      "loss": 1.1784,
+      "step": 13000
+    },
+    {
+      "epoch": 10.596546310832025,
+      "grad_norm": 0.9312331676483154,
+      "learning_rate": 8.398627002288329e-05,
+      "loss": 1.1388,
+      "step": 13500
+    },
+    {
+      "epoch": 10.989010989010989,
+      "grad_norm": 0.9040568470954895,
+      "learning_rate": 8.16979405034325e-05,
+      "loss": 1.1097,
+      "step": 14000
+    },
+    {
+      "epoch": 11.381475667189953,
+      "grad_norm": 0.8583242297172546,
+      "learning_rate": 7.94096109839817e-05,
+      "loss": 1.0736,
+      "step": 14500
+    },
+    {
+      "epoch": 11.773940345368917,
+      "grad_norm": 0.8321512937545776,
+      "learning_rate": 7.712128146453089e-05,
+      "loss": 1.0626,
+      "step": 15000
+    },
+    {
+      "epoch": 12.166405023547881,
+      "grad_norm": 0.9143489003181458,
+      "learning_rate": 7.48329519450801e-05,
+      "loss": 1.0358,
+      "step": 15500
+    },
+    {
+      "epoch": 12.558869701726845,
+      "grad_norm": 0.8196631669998169,
+      "learning_rate": 7.25446224256293e-05,
+      "loss": 1.0207,
+      "step": 16000
+    },
+    {
+      "epoch": 12.95133437990581,
+      "grad_norm": 0.7631738781929016,
+      "learning_rate": 7.025629290617849e-05,
+      "loss": 1.004,
+      "step": 16500
+    },
+    {
+      "epoch": 13.343799058084773,
+      "grad_norm": 0.8194634914398193,
+      "learning_rate": 6.79679633867277e-05,
+      "loss": 0.9921,
+      "step": 17000
+    },
+    {
+      "epoch": 13.736263736263737,
+      "grad_norm": 0.7670016884803772,
+      "learning_rate": 6.56796338672769e-05,
+      "loss": 0.9779,
+      "step": 17500
+    },
+    {
+      "epoch": 14.1287284144427,
+      "grad_norm": 0.7673987746238708,
+      "learning_rate": 6.339130434782609e-05,
+      "loss": 0.9608,
+      "step": 18000
+    },
+    {
+      "epoch": 14.521193092621663,
+      "grad_norm": 0.7936846613883972,
+      "learning_rate": 6.110297482837529e-05,
+      "loss": 0.9558,
+      "step": 18500
+    },
+    {
+      "epoch": 14.913657770800627,
+      "grad_norm": 0.7623568177223206,
+      "learning_rate": 5.881464530892449e-05,
+      "loss": 0.9505,
+      "step": 19000
+    },
+    {
+      "epoch": 15.306122448979592,
+      "grad_norm": 0.7214558720588684,
+      "learning_rate": 5.652631578947368e-05,
+      "loss": 0.9402,
+      "step": 19500
+    },
+    {
+      "epoch": 15.698587127158556,
+      "grad_norm": 0.827078640460968,
+      "learning_rate": 5.423798627002289e-05,
+      "loss": 0.9263,
+      "step": 20000
+    },
+    {
+      "epoch": 15.698587127158556,
+      "eval_accuracy": 0.8142541052951258,
+      "eval_loss": 0.9068173170089722,
+      "eval_runtime": 31.5271,
+      "eval_samples_per_second": 195.102,
+      "eval_steps_per_second": 2.062,
+      "step": 20000
+    },
+    {
+      "epoch": 16.09105180533752,
+      "grad_norm": 0.7756440043449402,
+      "learning_rate": 5.1949656750572084e-05,
+      "loss": 0.9185,
+      "step": 20500
+    },
+    {
+      "epoch": 16.483516483516482,
+      "grad_norm": 0.7866923809051514,
+      "learning_rate": 4.966132723112129e-05,
+      "loss": 0.9115,
+      "step": 21000
+    },
+    {
+      "epoch": 16.875981161695446,
+      "grad_norm": 0.7449353337287903,
+      "learning_rate": 4.737299771167048e-05,
+      "loss": 0.9021,
+      "step": 21500
+    },
+    {
+      "epoch": 17.26844583987441,
+      "grad_norm": 0.7738542556762695,
+      "learning_rate": 4.508466819221968e-05,
+      "loss": 0.9021,
+      "step": 22000
+    },
+    {
+      "epoch": 17.660910518053374,
+      "grad_norm": 0.7117587924003601,
+      "learning_rate": 4.279633867276888e-05,
+      "loss": 0.8932,
+      "step": 22500
+    },
+    {
+      "epoch": 18.053375196232338,
+      "grad_norm": 0.6952142715454102,
+      "learning_rate": 4.0508009153318077e-05,
+      "loss": 0.8866,
+      "step": 23000
+    },
+    {
+      "epoch": 18.445839874411302,
+      "grad_norm": 0.6748417615890503,
+      "learning_rate": 3.821967963386728e-05,
+      "loss": 0.8831,
+      "step": 23500
+    },
+    {
+      "epoch": 18.838304552590266,
+      "grad_norm": 0.7013327479362488,
+      "learning_rate": 3.593135011441648e-05,
+      "loss": 0.8714,
+      "step": 24000
+    },
+    {
+      "epoch": 19.23076923076923,
+      "grad_norm": 0.629546046257019,
+      "learning_rate": 3.364302059496568e-05,
+      "loss": 0.8684,
+      "step": 24500
+    },
+    {
+      "epoch": 19.623233908948194,
+      "grad_norm": 0.6739959120750427,
+      "learning_rate": 3.135469107551487e-05,
+      "loss": 0.8664,
+      "step": 25000
+    },
+    {
+      "epoch": 20.015698587127158,
+      "grad_norm": 0.6923867464065552,
+      "learning_rate": 2.9066361556064075e-05,
+      "loss": 0.8613,
+      "step": 25500
+    },
+    {
+      "epoch": 20.408163265306122,
+      "grad_norm": 0.7043192386627197,
+      "learning_rate": 2.677803203661327e-05,
+      "loss": 0.8541,
+      "step": 26000
+    },
+    {
+      "epoch": 20.800627943485086,
+      "grad_norm": 0.6633190512657166,
+      "learning_rate": 2.448970251716247e-05,
+      "loss": 0.8558,
+      "step": 26500
+    },
+    {
+      "epoch": 21.19309262166405,
+      "grad_norm": 0.6382936239242554,
+      "learning_rate": 2.2201372997711673e-05,
+      "loss": 0.8486,
+      "step": 27000
+    },
+    {
+      "epoch": 21.585557299843014,
+      "grad_norm": 0.7126407623291016,
+      "learning_rate": 1.9913043478260872e-05,
+      "loss": 0.8455,
+      "step": 27500
+    },
+    {
+      "epoch": 21.978021978021978,
+      "grad_norm": 0.6809006929397583,
+      "learning_rate": 1.7624713958810068e-05,
+      "loss": 0.8382,
+      "step": 28000
+    },
+    {
+      "epoch": 22.370486656200942,
+      "grad_norm": 0.6693772077560425,
+      "learning_rate": 1.533638443935927e-05,
+      "loss": 0.8377,
+      "step": 28500
+    },
+    {
+      "epoch": 22.762951334379906,
+      "grad_norm": 0.7368608117103577,
+      "learning_rate": 1.3048054919908468e-05,
+      "loss": 0.8346,
+      "step": 29000
+    },
+    {
+      "epoch": 23.15541601255887,
+      "grad_norm": 0.6541247963905334,
+      "learning_rate": 1.0759725400457667e-05,
+      "loss": 0.8298,
+      "step": 29500
+    },
+    {
+      "epoch": 23.547880690737834,
+      "grad_norm": 0.6780161261558533,
+      "learning_rate": 8.471395881006864e-06,
+      "loss": 0.8293,
+      "step": 30000
+    },
+    {
+      "epoch": 23.547880690737834,
+      "eval_accuracy": 0.8285928159953133,
+      "eval_loss": 0.8292354941368103,
+      "eval_runtime": 31.6198,
+      "eval_samples_per_second": 194.53,
+      "eval_steps_per_second": 2.056,
+      "step": 30000
+    },
+    {
+      "epoch": 23.940345368916798,
+      "grad_norm": 0.6645314693450928,
+      "learning_rate": 6.183066361556064e-06,
+      "loss": 0.8306,
+      "step": 30500
+    },
+    {
+      "epoch": 24.332810047095762,
+      "grad_norm": 0.7622667551040649,
+      "learning_rate": 3.894736842105264e-06,
+      "loss": 0.8225,
+      "step": 31000
+    },
+    {
+      "epoch": 24.725274725274726,
+      "grad_norm": 0.6563706398010254,
+      "learning_rate": 1.6064073226544622e-06,
+      "loss": 0.8275,
+      "step": 31500
+    },
+    {
+      "epoch": 25.0,
+      "step": 31850,
+      "total_flos": 8.0444602960128e+17,
+      "train_loss": 1.908989332549426,
+      "train_runtime": 23433.9018,
+      "train_samples_per_second": 130.424,
+      "train_steps_per_second": 1.359
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 31850,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 25,
+  "save_steps": 10000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 8.0444602960128e+17,
+  "train_batch_size": 96,
+  "trial_name": null,
+  "trial_params": null
+}