Upload folder using huggingface_hub

Files changed (4) hide show

README.md CHANGED Viewed

@@ -7,14 +7,14 @@ tags:
 - lora
 - generated_from_trainer
 model-index:
-- name: train_2024-11-18_128_256
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# train_2024-11-18_128_256
 This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mydata14 dataset.
@@ -37,12 +37,12 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
 - train_batch_size: 8
-- eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
 - total_train_batch_size: 16
-- total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - num_epochs: 2.0

 - lora
 - generated_from_trainer
 model-index:
+- name: train_2024-11-18_128_256_01
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# train_2024-11-18_128_256_01
 This model is a fine-tuned version of [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) on the mydata14 dataset.
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
 - train_batch_size: 8
+- eval_batch_size: 2
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
 - total_train_batch_size: 16
+- total_eval_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - num_epochs: 2.0

adapter_config.json CHANGED Viewed

@@ -16,17 +16,17 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "k_proj",
     "down_proj",
     "v_proj",
-    "q_proj",
     "o_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "v_proj",
     "o_proj",
+    "gate_proj",
+    "up_proj",
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:298438fccacd33cb626710a24dc5e2d0887f8fb6b6b853e02ae81e5ae7f46b8c
-size 1291899160

 version https://git-lfs.github.com/spec/v1
+oid sha256:9656e23481392f126b5fa7d62e4de2c9a5df0745c9e9698b242a68d041c1bb78
+size 323014168

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38a5ace2077e3640cdc5439fd4fdaab140ba6ffcf99e8fe819903a6bc4511a15
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa6f30cd6047c953e7f7a17e09f5ba2377c59718c3e7c643095a6ffce6cd5041
 size 5432