ItsMaxNorm commited on
Commit
d504ab6
·
verified ·
1 Parent(s): 134e73e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. Abalation/No_decompo/checkpoints/0000400/knowledge_injection_config.bin +3 -0
  3. Abalation/No_decompo/checkpoints/0000400/knowledge_injection_config.json +773 -0
  4. Abalation/No_decompo/checkpoints/0000400/knowledge_injection_state.bin +3 -0
  5. Abalation/No_decompo/log.txt +0 -0
  6. Abalation/No_decompo/tensorboard_log/1753892021.956854/events.out.tfevents.1753892021.mbzuaiser-desktop.531264.1 +3 -0
  7. Abalation/No_decompo/tensorboard_log/1753892021.9594307/hparams.yml +30 -0
  8. Abalation/No_decompo/tensorboard_log/1753892508.3819938/events.out.tfevents.1753892508.mbzuaiser-desktop.538312.1 +3 -0
  9. Abalation/No_decompo/tensorboard_log/1753892508.3843977/hparams.yml +30 -0
  10. Abalation/No_decompo/tensorboard_log/1753892773.181688/events.out.tfevents.1753892773.mbzuaiser-desktop.541446.1 +3 -0
  11. Abalation/No_decompo/tensorboard_log/1753892773.1841798/hparams.yml +30 -0
  12. Abalation/No_decompo/tensorboard_log/1753893265.4539661/events.out.tfevents.1753893265.mbzuaiser-desktop.551681.1 +3 -0
  13. Abalation/No_decompo/tensorboard_log/1753893265.455913/hparams.yml +30 -0
  14. Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892021.mbzuaiser-desktop.531264.0 +3 -0
  15. Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892508.mbzuaiser-desktop.538312.0 +3 -0
  16. Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892773.mbzuaiser-desktop.541446.0 +3 -0
  17. Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753893265.mbzuaiser-desktop.551681.0 +3 -0
  18. Abalation/No_decompo/train_args.json +1 -0
  19. Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_config.bin +3 -0
  20. Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_config.json +772 -0
  21. Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_state.bin +3 -0
  22. Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_config.bin +3 -0
  23. Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_config.json +772 -0
  24. Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_state.bin +3 -0
  25. Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_config.bin +3 -0
  26. Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_config.json +772 -0
  27. Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_state.bin +3 -0
  28. Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_config.bin +3 -0
  29. Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_config.json +772 -0
  30. Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_state.bin +3 -0
  31. Abalation/No_decompoMinus/log.txt +0 -0
  32. Abalation/No_decompoMinus/tensorboard_log/1753904604.5828323/events.out.tfevents.1753904604.mbzuaiser-desktop.915432.1 +3 -0
  33. Abalation/No_decompoMinus/tensorboard_log/1753904604.5853636/hparams.yml +30 -0
  34. Abalation/No_decompoMinus/tensorboard_log/events.out.tfevents.1753904604.mbzuaiser-desktop.915432.0 +3 -0
  35. Abalation/No_decompoMinus/train_args.json +1 -0
  36. Abalation/PP^T/checkpoints/0000100/knowledge_injection_config.bin +3 -0
  37. Abalation/PP^T/checkpoints/0000100/knowledge_injection_config.json +772 -0
  38. Abalation/PP^T/checkpoints/0000100/knowledge_injection_state.bin +3 -0
  39. Abalation/PP^T/checkpoints/0000200/knowledge_injection_config.bin +3 -0
  40. Abalation/PP^T/checkpoints/0000200/knowledge_injection_config.json +772 -0
  41. Abalation/PP^T/checkpoints/0000200/knowledge_injection_state.bin +3 -0
  42. Abalation/PP^T/checkpoints/0000300/knowledge_injection_config.bin +3 -0
  43. Abalation/PP^T/checkpoints/0000300/knowledge_injection_config.json +772 -0
  44. Abalation/PP^T/checkpoints/0000300/knowledge_injection_state.bin +3 -0
  45. Abalation/PP^T/checkpoints/0000400/knowledge_injection_config.bin +3 -0
  46. Abalation/PP^T/checkpoints/0000400/knowledge_injection_config.json +772 -0
  47. Abalation/PP^T/checkpoints/0000400/knowledge_injection_state.bin +3 -0
  48. Abalation/PP^T/log.txt +0 -0
  49. Abalation/PP^T/tensorboard_log/1753912670.641676/events.out.tfevents.1753912670.mbzuaiser-desktop.1014837.1 +3 -0
  50. Abalation/PP^T/tensorboard_log/1753912670.643952/hparams.yml +30 -0
.gitattributes CHANGED
@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_0.png filter=lfs diff=lfs merge=lfs -text
37
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_1.png filter=lfs diff=lfs merge=lfs -text
38
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_2.png filter=lfs diff=lfs merge=lfs -text
39
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_3.png filter=lfs diff=lfs merge=lfs -text
40
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_4.png filter=lfs diff=lfs merge=lfs -text
41
+ INJpara/SKSdog/QLr_R4_PP^T/text2img_cond/output_5.png filter=lfs diff=lfs merge=lfs -text
Abalation/No_decompo/checkpoints/0000400/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c4e75277eafe700012dd6ac4e5bf32b86daeded16838dee31ea9526d5b26e7
3
+ size 7916
Abalation/No_decompo/checkpoints/0000400/knowledge_injection_config.json ADDED
@@ -0,0 +1,773 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8.0,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8.0,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8.0,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8.0,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8.0,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8.0,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8.0,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8.0,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8.0,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8.0,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8.0,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8.0,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8.0,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8.0,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8.0,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8.0,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8.0,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8.0,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8.0,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8.0,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8.0,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8.0,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8.0,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8.0,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8.0,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8.0,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8.0,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8.0,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8.0,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8.0,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8.0,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8.0,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8.0,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8.0,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8.0,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8.0,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8.0,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8.0,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8.0,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8.0,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8.0,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8.0,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8.0,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8.0,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8.0,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8.0,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8.0,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8.0,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8.0,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8.0,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8.0,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8.0,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8.0,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8.0,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8.0,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8.0,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8.0,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8.0,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8.0,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8.0,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8.0,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8.0,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8.0,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8.0,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ },
772
+ "config": {}
773
+ }
Abalation/No_decompo/checkpoints/0000400/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc04d39a149ec2b3983dfbf1f939fa8934b539bf8eec2d03c60424ad868027f9
3
+ size 15792890
Abalation/No_decompo/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
Abalation/No_decompo/tensorboard_log/1753892021.956854/events.out.tfevents.1753892021.mbzuaiser-desktop.531264.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2718501177dfe0087483e675c74d51408710c32ac6d4e8d7d9c702d42406ce7e
3
+ size 1774
Abalation/No_decompo/tensorboard_log/1753892021.9594307/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 10
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/No_decompo
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null
Abalation/No_decompo/tensorboard_log/1753892508.3819938/events.out.tfevents.1753892508.mbzuaiser-desktop.538312.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea8a865f64d8760195122243d0e349e81d5dad5586a0bc9a376a09c1770fdc3a
3
+ size 1774
Abalation/No_decompo/tensorboard_log/1753892508.3843977/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 10
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/No_decompo
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null
Abalation/No_decompo/tensorboard_log/1753892773.181688/events.out.tfevents.1753892773.mbzuaiser-desktop.541446.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b30d1ccc2e6ada1a232087851526f238a6eafdd9f5eb1882e8a441a742de68
3
+ size 1774
Abalation/No_decompo/tensorboard_log/1753892773.1841798/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 10
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/No_decompo
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null
Abalation/No_decompo/tensorboard_log/1753893265.4539661/events.out.tfevents.1753893265.mbzuaiser-desktop.551681.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d684f762e97ccbb980cfe67e0528c104f443d20e55536fc02df2e4cdcb9bf375
3
+ size 1774
Abalation/No_decompo/tensorboard_log/1753893265.455913/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 10
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/No_decompo
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null
Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892021.mbzuaiser-desktop.531264.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5c760c59c605598d412f645e45b74e2982224f200a6454d869de9abf0b616b
3
+ size 751
Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892508.mbzuaiser-desktop.538312.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed4931a307fd50657f85c255f4eed6ffe39c311419fc9a0c7758e0d5b32b8fb
3
+ size 88
Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753892773.mbzuaiser-desktop.541446.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe6c4c0abfc4ae6c7eb918b9c5da92b2707bc6fd100cce0c666f3c6f447c181
3
+ size 88
Abalation/No_decompo/tensorboard_log/events.out.tfevents.1753893265.mbzuaiser-desktop.551681.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3e78a27a7f85f4f47a13f3cfb641cfe599573ff605a25dcf259f9576fff8dd
3
+ size 20761
Abalation/No_decompo/train_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results_dir": "/nvme-data/Komal/documents/results/Abalation/No_decompo", "model_name_or_path": "Shitao/OmniGen-v1", "json_file": "./toy_data/toy_subject_data.jsonl", "image_path": "./toy_data/images", "epochs": 200, "batch_size_per_device": 2, "vae_path": null, "num_workers": 4, "log_every": 1, "ckpt_every": 10, "max_grad_norm": 1.0, "lr": 0.001, "max_input_length_limit": 18000, "condition_dropout_prob": 0.01, "adam_weight_decay": 0.0, "keep_raw_resolution": true, "max_image_size": 1024, "use_lora": false, "use_para": false, "use_injection": true, "use_lorapara": false, "lora_rank": 8, "use_svd": false, "use_ema": false, "lr_scheduler": "constant", "decomposition_method": "None", "lr_warmup_steps": 1000, "report_to": "tensorboard", "mixed_precision": "bf16", "gradient_accumulation_steps": 1}
Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/No_decompoMinus/checkpoints/0000100/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6709536e44e9d288ae2d85046f022a7d7f362bfa725c6b78aa6b2d30878897ad
3
+ size 9479738
Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/No_decompoMinus/checkpoints/0000200/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:764dc153ef44a1d3420cbda604557aeb5d13397e8ebc8f56a6027875cad44b18
3
+ size 9479738
Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/No_decompoMinus/checkpoints/0000300/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c07fe28114391d809c31b1a4c6c05598c0d486f9922b0d6539278709ea464c
3
+ size 9479738
Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/No_decompoMinus/checkpoints/0000400/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8537bd0c0b1ab7ab24a16c393d0875b1831ed2158aa9c0d2c7f97eb8375dec39
3
+ size 9479738
Abalation/No_decompoMinus/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
Abalation/No_decompoMinus/tensorboard_log/1753904604.5828323/events.out.tfevents.1753904604.mbzuaiser-desktop.915432.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:836efb590f1e0f33c9b311d3e50bd6d0108ec6dd64b6d1855d6e451ea7764f16
3
+ size 1779
Abalation/No_decompoMinus/tensorboard_log/1753904604.5853636/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 100
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/No_decompoMinus
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null
Abalation/No_decompoMinus/tensorboard_log/events.out.tfevents.1753904604.mbzuaiser-desktop.915432.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8954aacfd5983a6767310e9d4e69b2d44987659dd41d822915eccd7431ae31e5
3
+ size 20761
Abalation/No_decompoMinus/train_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results_dir": "/nvme-data/Komal/documents/results/Abalation/No_decompoMinus", "model_name_or_path": "Shitao/OmniGen-v1", "json_file": "./toy_data/toy_subject_data.jsonl", "image_path": "./toy_data/images", "epochs": 200, "batch_size_per_device": 2, "vae_path": null, "num_workers": 4, "log_every": 1, "ckpt_every": 100, "max_grad_norm": 1.0, "lr": 0.001, "max_input_length_limit": 18000, "condition_dropout_prob": 0.01, "adam_weight_decay": 0.0, "keep_raw_resolution": true, "max_image_size": 1024, "use_lora": false, "use_para": false, "use_injection": true, "use_lorapara": false, "lora_rank": 8, "use_svd": false, "use_ema": false, "lr_scheduler": "constant", "decomposition_method": "None", "lr_warmup_steps": 1000, "report_to": "tensorboard", "mixed_precision": "bf16", "gradient_accumulation_steps": 1}
Abalation/PP^T/checkpoints/0000100/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/PP^T/checkpoints/0000100/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/PP^T/checkpoints/0000100/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c9b67035cba3779cb9610cf4688e877cf17ff112b0fd1aae2c81d8dcedc2fd
3
+ size 9479738
Abalation/PP^T/checkpoints/0000200/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/PP^T/checkpoints/0000200/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/PP^T/checkpoints/0000200/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b83d9c79577752b337fb2113e98c8e7d6463d1cc26e98ffe1a273db497ecc3df
3
+ size 9479738
Abalation/PP^T/checkpoints/0000300/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/PP^T/checkpoints/0000300/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/PP^T/checkpoints/0000300/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb911820821cbe8d7076e92ea7ea4a8949b4da1d8292ae65693a4c4cfd57654
3
+ size 9479738
Abalation/PP^T/checkpoints/0000400/knowledge_injection_config.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7404bc354d58873063daebf4dd70ed0ca65b3688c29b078d80a04f5d7e182f
3
+ size 7404
Abalation/PP^T/checkpoints/0000400/knowledge_injection_config.json ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "layer_configs": {
3
+ "llm.layers.0.self_attn.o_proj": {
4
+ "r": 8,
5
+ "injection_method": "residual_projection",
6
+ "lora_alpha": 8,
7
+ "lora_dropout": 0.0,
8
+ "in_features": 3072,
9
+ "out_features": 3072,
10
+ "init_scale": 1.0,
11
+ "use_gating": false,
12
+ "decomposition_method": "None",
13
+ "compute_svd_each_forward": false
14
+ },
15
+ "llm.layers.0.self_attn.qkv_proj": {
16
+ "r": 8,
17
+ "injection_method": "residual_projection",
18
+ "lora_alpha": 8,
19
+ "lora_dropout": 0.0,
20
+ "in_features": 3072,
21
+ "out_features": 9216,
22
+ "init_scale": 1.0,
23
+ "use_gating": false,
24
+ "decomposition_method": "None",
25
+ "compute_svd_each_forward": false
26
+ },
27
+ "llm.layers.1.self_attn.o_proj": {
28
+ "r": 8,
29
+ "injection_method": "residual_projection",
30
+ "lora_alpha": 8,
31
+ "lora_dropout": 0.0,
32
+ "in_features": 3072,
33
+ "out_features": 3072,
34
+ "init_scale": 1.0,
35
+ "use_gating": false,
36
+ "decomposition_method": "None",
37
+ "compute_svd_each_forward": false
38
+ },
39
+ "llm.layers.1.self_attn.qkv_proj": {
40
+ "r": 8,
41
+ "injection_method": "residual_projection",
42
+ "lora_alpha": 8,
43
+ "lora_dropout": 0.0,
44
+ "in_features": 3072,
45
+ "out_features": 9216,
46
+ "init_scale": 1.0,
47
+ "use_gating": false,
48
+ "decomposition_method": "None",
49
+ "compute_svd_each_forward": false
50
+ },
51
+ "llm.layers.2.self_attn.o_proj": {
52
+ "r": 8,
53
+ "injection_method": "residual_projection",
54
+ "lora_alpha": 8,
55
+ "lora_dropout": 0.0,
56
+ "in_features": 3072,
57
+ "out_features": 3072,
58
+ "init_scale": 1.0,
59
+ "use_gating": false,
60
+ "decomposition_method": "None",
61
+ "compute_svd_each_forward": false
62
+ },
63
+ "llm.layers.2.self_attn.qkv_proj": {
64
+ "r": 8,
65
+ "injection_method": "residual_projection",
66
+ "lora_alpha": 8,
67
+ "lora_dropout": 0.0,
68
+ "in_features": 3072,
69
+ "out_features": 9216,
70
+ "init_scale": 1.0,
71
+ "use_gating": false,
72
+ "decomposition_method": "None",
73
+ "compute_svd_each_forward": false
74
+ },
75
+ "llm.layers.3.self_attn.o_proj": {
76
+ "r": 8,
77
+ "injection_method": "residual_projection",
78
+ "lora_alpha": 8,
79
+ "lora_dropout": 0.0,
80
+ "in_features": 3072,
81
+ "out_features": 3072,
82
+ "init_scale": 1.0,
83
+ "use_gating": false,
84
+ "decomposition_method": "None",
85
+ "compute_svd_each_forward": false
86
+ },
87
+ "llm.layers.3.self_attn.qkv_proj": {
88
+ "r": 8,
89
+ "injection_method": "residual_projection",
90
+ "lora_alpha": 8,
91
+ "lora_dropout": 0.0,
92
+ "in_features": 3072,
93
+ "out_features": 9216,
94
+ "init_scale": 1.0,
95
+ "use_gating": false,
96
+ "decomposition_method": "None",
97
+ "compute_svd_each_forward": false
98
+ },
99
+ "llm.layers.4.self_attn.o_proj": {
100
+ "r": 8,
101
+ "injection_method": "residual_projection",
102
+ "lora_alpha": 8,
103
+ "lora_dropout": 0.0,
104
+ "in_features": 3072,
105
+ "out_features": 3072,
106
+ "init_scale": 1.0,
107
+ "use_gating": false,
108
+ "decomposition_method": "None",
109
+ "compute_svd_each_forward": false
110
+ },
111
+ "llm.layers.4.self_attn.qkv_proj": {
112
+ "r": 8,
113
+ "injection_method": "residual_projection",
114
+ "lora_alpha": 8,
115
+ "lora_dropout": 0.0,
116
+ "in_features": 3072,
117
+ "out_features": 9216,
118
+ "init_scale": 1.0,
119
+ "use_gating": false,
120
+ "decomposition_method": "None",
121
+ "compute_svd_each_forward": false
122
+ },
123
+ "llm.layers.5.self_attn.o_proj": {
124
+ "r": 8,
125
+ "injection_method": "residual_projection",
126
+ "lora_alpha": 8,
127
+ "lora_dropout": 0.0,
128
+ "in_features": 3072,
129
+ "out_features": 3072,
130
+ "init_scale": 1.0,
131
+ "use_gating": false,
132
+ "decomposition_method": "None",
133
+ "compute_svd_each_forward": false
134
+ },
135
+ "llm.layers.5.self_attn.qkv_proj": {
136
+ "r": 8,
137
+ "injection_method": "residual_projection",
138
+ "lora_alpha": 8,
139
+ "lora_dropout": 0.0,
140
+ "in_features": 3072,
141
+ "out_features": 9216,
142
+ "init_scale": 1.0,
143
+ "use_gating": false,
144
+ "decomposition_method": "None",
145
+ "compute_svd_each_forward": false
146
+ },
147
+ "llm.layers.6.self_attn.o_proj": {
148
+ "r": 8,
149
+ "injection_method": "residual_projection",
150
+ "lora_alpha": 8,
151
+ "lora_dropout": 0.0,
152
+ "in_features": 3072,
153
+ "out_features": 3072,
154
+ "init_scale": 1.0,
155
+ "use_gating": false,
156
+ "decomposition_method": "None",
157
+ "compute_svd_each_forward": false
158
+ },
159
+ "llm.layers.6.self_attn.qkv_proj": {
160
+ "r": 8,
161
+ "injection_method": "residual_projection",
162
+ "lora_alpha": 8,
163
+ "lora_dropout": 0.0,
164
+ "in_features": 3072,
165
+ "out_features": 9216,
166
+ "init_scale": 1.0,
167
+ "use_gating": false,
168
+ "decomposition_method": "None",
169
+ "compute_svd_each_forward": false
170
+ },
171
+ "llm.layers.7.self_attn.o_proj": {
172
+ "r": 8,
173
+ "injection_method": "residual_projection",
174
+ "lora_alpha": 8,
175
+ "lora_dropout": 0.0,
176
+ "in_features": 3072,
177
+ "out_features": 3072,
178
+ "init_scale": 1.0,
179
+ "use_gating": false,
180
+ "decomposition_method": "None",
181
+ "compute_svd_each_forward": false
182
+ },
183
+ "llm.layers.7.self_attn.qkv_proj": {
184
+ "r": 8,
185
+ "injection_method": "residual_projection",
186
+ "lora_alpha": 8,
187
+ "lora_dropout": 0.0,
188
+ "in_features": 3072,
189
+ "out_features": 9216,
190
+ "init_scale": 1.0,
191
+ "use_gating": false,
192
+ "decomposition_method": "None",
193
+ "compute_svd_each_forward": false
194
+ },
195
+ "llm.layers.8.self_attn.o_proj": {
196
+ "r": 8,
197
+ "injection_method": "residual_projection",
198
+ "lora_alpha": 8,
199
+ "lora_dropout": 0.0,
200
+ "in_features": 3072,
201
+ "out_features": 3072,
202
+ "init_scale": 1.0,
203
+ "use_gating": false,
204
+ "decomposition_method": "None",
205
+ "compute_svd_each_forward": false
206
+ },
207
+ "llm.layers.8.self_attn.qkv_proj": {
208
+ "r": 8,
209
+ "injection_method": "residual_projection",
210
+ "lora_alpha": 8,
211
+ "lora_dropout": 0.0,
212
+ "in_features": 3072,
213
+ "out_features": 9216,
214
+ "init_scale": 1.0,
215
+ "use_gating": false,
216
+ "decomposition_method": "None",
217
+ "compute_svd_each_forward": false
218
+ },
219
+ "llm.layers.9.self_attn.o_proj": {
220
+ "r": 8,
221
+ "injection_method": "residual_projection",
222
+ "lora_alpha": 8,
223
+ "lora_dropout": 0.0,
224
+ "in_features": 3072,
225
+ "out_features": 3072,
226
+ "init_scale": 1.0,
227
+ "use_gating": false,
228
+ "decomposition_method": "None",
229
+ "compute_svd_each_forward": false
230
+ },
231
+ "llm.layers.9.self_attn.qkv_proj": {
232
+ "r": 8,
233
+ "injection_method": "residual_projection",
234
+ "lora_alpha": 8,
235
+ "lora_dropout": 0.0,
236
+ "in_features": 3072,
237
+ "out_features": 9216,
238
+ "init_scale": 1.0,
239
+ "use_gating": false,
240
+ "decomposition_method": "None",
241
+ "compute_svd_each_forward": false
242
+ },
243
+ "llm.layers.10.self_attn.o_proj": {
244
+ "r": 8,
245
+ "injection_method": "residual_projection",
246
+ "lora_alpha": 8,
247
+ "lora_dropout": 0.0,
248
+ "in_features": 3072,
249
+ "out_features": 3072,
250
+ "init_scale": 1.0,
251
+ "use_gating": false,
252
+ "decomposition_method": "None",
253
+ "compute_svd_each_forward": false
254
+ },
255
+ "llm.layers.10.self_attn.qkv_proj": {
256
+ "r": 8,
257
+ "injection_method": "residual_projection",
258
+ "lora_alpha": 8,
259
+ "lora_dropout": 0.0,
260
+ "in_features": 3072,
261
+ "out_features": 9216,
262
+ "init_scale": 1.0,
263
+ "use_gating": false,
264
+ "decomposition_method": "None",
265
+ "compute_svd_each_forward": false
266
+ },
267
+ "llm.layers.11.self_attn.o_proj": {
268
+ "r": 8,
269
+ "injection_method": "residual_projection",
270
+ "lora_alpha": 8,
271
+ "lora_dropout": 0.0,
272
+ "in_features": 3072,
273
+ "out_features": 3072,
274
+ "init_scale": 1.0,
275
+ "use_gating": false,
276
+ "decomposition_method": "None",
277
+ "compute_svd_each_forward": false
278
+ },
279
+ "llm.layers.11.self_attn.qkv_proj": {
280
+ "r": 8,
281
+ "injection_method": "residual_projection",
282
+ "lora_alpha": 8,
283
+ "lora_dropout": 0.0,
284
+ "in_features": 3072,
285
+ "out_features": 9216,
286
+ "init_scale": 1.0,
287
+ "use_gating": false,
288
+ "decomposition_method": "None",
289
+ "compute_svd_each_forward": false
290
+ },
291
+ "llm.layers.12.self_attn.o_proj": {
292
+ "r": 8,
293
+ "injection_method": "residual_projection",
294
+ "lora_alpha": 8,
295
+ "lora_dropout": 0.0,
296
+ "in_features": 3072,
297
+ "out_features": 3072,
298
+ "init_scale": 1.0,
299
+ "use_gating": false,
300
+ "decomposition_method": "None",
301
+ "compute_svd_each_forward": false
302
+ },
303
+ "llm.layers.12.self_attn.qkv_proj": {
304
+ "r": 8,
305
+ "injection_method": "residual_projection",
306
+ "lora_alpha": 8,
307
+ "lora_dropout": 0.0,
308
+ "in_features": 3072,
309
+ "out_features": 9216,
310
+ "init_scale": 1.0,
311
+ "use_gating": false,
312
+ "decomposition_method": "None",
313
+ "compute_svd_each_forward": false
314
+ },
315
+ "llm.layers.13.self_attn.o_proj": {
316
+ "r": 8,
317
+ "injection_method": "residual_projection",
318
+ "lora_alpha": 8,
319
+ "lora_dropout": 0.0,
320
+ "in_features": 3072,
321
+ "out_features": 3072,
322
+ "init_scale": 1.0,
323
+ "use_gating": false,
324
+ "decomposition_method": "None",
325
+ "compute_svd_each_forward": false
326
+ },
327
+ "llm.layers.13.self_attn.qkv_proj": {
328
+ "r": 8,
329
+ "injection_method": "residual_projection",
330
+ "lora_alpha": 8,
331
+ "lora_dropout": 0.0,
332
+ "in_features": 3072,
333
+ "out_features": 9216,
334
+ "init_scale": 1.0,
335
+ "use_gating": false,
336
+ "decomposition_method": "None",
337
+ "compute_svd_each_forward": false
338
+ },
339
+ "llm.layers.14.self_attn.o_proj": {
340
+ "r": 8,
341
+ "injection_method": "residual_projection",
342
+ "lora_alpha": 8,
343
+ "lora_dropout": 0.0,
344
+ "in_features": 3072,
345
+ "out_features": 3072,
346
+ "init_scale": 1.0,
347
+ "use_gating": false,
348
+ "decomposition_method": "None",
349
+ "compute_svd_each_forward": false
350
+ },
351
+ "llm.layers.14.self_attn.qkv_proj": {
352
+ "r": 8,
353
+ "injection_method": "residual_projection",
354
+ "lora_alpha": 8,
355
+ "lora_dropout": 0.0,
356
+ "in_features": 3072,
357
+ "out_features": 9216,
358
+ "init_scale": 1.0,
359
+ "use_gating": false,
360
+ "decomposition_method": "None",
361
+ "compute_svd_each_forward": false
362
+ },
363
+ "llm.layers.15.self_attn.o_proj": {
364
+ "r": 8,
365
+ "injection_method": "residual_projection",
366
+ "lora_alpha": 8,
367
+ "lora_dropout": 0.0,
368
+ "in_features": 3072,
369
+ "out_features": 3072,
370
+ "init_scale": 1.0,
371
+ "use_gating": false,
372
+ "decomposition_method": "None",
373
+ "compute_svd_each_forward": false
374
+ },
375
+ "llm.layers.15.self_attn.qkv_proj": {
376
+ "r": 8,
377
+ "injection_method": "residual_projection",
378
+ "lora_alpha": 8,
379
+ "lora_dropout": 0.0,
380
+ "in_features": 3072,
381
+ "out_features": 9216,
382
+ "init_scale": 1.0,
383
+ "use_gating": false,
384
+ "decomposition_method": "None",
385
+ "compute_svd_each_forward": false
386
+ },
387
+ "llm.layers.16.self_attn.o_proj": {
388
+ "r": 8,
389
+ "injection_method": "residual_projection",
390
+ "lora_alpha": 8,
391
+ "lora_dropout": 0.0,
392
+ "in_features": 3072,
393
+ "out_features": 3072,
394
+ "init_scale": 1.0,
395
+ "use_gating": false,
396
+ "decomposition_method": "None",
397
+ "compute_svd_each_forward": false
398
+ },
399
+ "llm.layers.16.self_attn.qkv_proj": {
400
+ "r": 8,
401
+ "injection_method": "residual_projection",
402
+ "lora_alpha": 8,
403
+ "lora_dropout": 0.0,
404
+ "in_features": 3072,
405
+ "out_features": 9216,
406
+ "init_scale": 1.0,
407
+ "use_gating": false,
408
+ "decomposition_method": "None",
409
+ "compute_svd_each_forward": false
410
+ },
411
+ "llm.layers.17.self_attn.o_proj": {
412
+ "r": 8,
413
+ "injection_method": "residual_projection",
414
+ "lora_alpha": 8,
415
+ "lora_dropout": 0.0,
416
+ "in_features": 3072,
417
+ "out_features": 3072,
418
+ "init_scale": 1.0,
419
+ "use_gating": false,
420
+ "decomposition_method": "None",
421
+ "compute_svd_each_forward": false
422
+ },
423
+ "llm.layers.17.self_attn.qkv_proj": {
424
+ "r": 8,
425
+ "injection_method": "residual_projection",
426
+ "lora_alpha": 8,
427
+ "lora_dropout": 0.0,
428
+ "in_features": 3072,
429
+ "out_features": 9216,
430
+ "init_scale": 1.0,
431
+ "use_gating": false,
432
+ "decomposition_method": "None",
433
+ "compute_svd_each_forward": false
434
+ },
435
+ "llm.layers.18.self_attn.o_proj": {
436
+ "r": 8,
437
+ "injection_method": "residual_projection",
438
+ "lora_alpha": 8,
439
+ "lora_dropout": 0.0,
440
+ "in_features": 3072,
441
+ "out_features": 3072,
442
+ "init_scale": 1.0,
443
+ "use_gating": false,
444
+ "decomposition_method": "None",
445
+ "compute_svd_each_forward": false
446
+ },
447
+ "llm.layers.18.self_attn.qkv_proj": {
448
+ "r": 8,
449
+ "injection_method": "residual_projection",
450
+ "lora_alpha": 8,
451
+ "lora_dropout": 0.0,
452
+ "in_features": 3072,
453
+ "out_features": 9216,
454
+ "init_scale": 1.0,
455
+ "use_gating": false,
456
+ "decomposition_method": "None",
457
+ "compute_svd_each_forward": false
458
+ },
459
+ "llm.layers.19.self_attn.o_proj": {
460
+ "r": 8,
461
+ "injection_method": "residual_projection",
462
+ "lora_alpha": 8,
463
+ "lora_dropout": 0.0,
464
+ "in_features": 3072,
465
+ "out_features": 3072,
466
+ "init_scale": 1.0,
467
+ "use_gating": false,
468
+ "decomposition_method": "None",
469
+ "compute_svd_each_forward": false
470
+ },
471
+ "llm.layers.19.self_attn.qkv_proj": {
472
+ "r": 8,
473
+ "injection_method": "residual_projection",
474
+ "lora_alpha": 8,
475
+ "lora_dropout": 0.0,
476
+ "in_features": 3072,
477
+ "out_features": 9216,
478
+ "init_scale": 1.0,
479
+ "use_gating": false,
480
+ "decomposition_method": "None",
481
+ "compute_svd_each_forward": false
482
+ },
483
+ "llm.layers.20.self_attn.o_proj": {
484
+ "r": 8,
485
+ "injection_method": "residual_projection",
486
+ "lora_alpha": 8,
487
+ "lora_dropout": 0.0,
488
+ "in_features": 3072,
489
+ "out_features": 3072,
490
+ "init_scale": 1.0,
491
+ "use_gating": false,
492
+ "decomposition_method": "None",
493
+ "compute_svd_each_forward": false
494
+ },
495
+ "llm.layers.20.self_attn.qkv_proj": {
496
+ "r": 8,
497
+ "injection_method": "residual_projection",
498
+ "lora_alpha": 8,
499
+ "lora_dropout": 0.0,
500
+ "in_features": 3072,
501
+ "out_features": 9216,
502
+ "init_scale": 1.0,
503
+ "use_gating": false,
504
+ "decomposition_method": "None",
505
+ "compute_svd_each_forward": false
506
+ },
507
+ "llm.layers.21.self_attn.o_proj": {
508
+ "r": 8,
509
+ "injection_method": "residual_projection",
510
+ "lora_alpha": 8,
511
+ "lora_dropout": 0.0,
512
+ "in_features": 3072,
513
+ "out_features": 3072,
514
+ "init_scale": 1.0,
515
+ "use_gating": false,
516
+ "decomposition_method": "None",
517
+ "compute_svd_each_forward": false
518
+ },
519
+ "llm.layers.21.self_attn.qkv_proj": {
520
+ "r": 8,
521
+ "injection_method": "residual_projection",
522
+ "lora_alpha": 8,
523
+ "lora_dropout": 0.0,
524
+ "in_features": 3072,
525
+ "out_features": 9216,
526
+ "init_scale": 1.0,
527
+ "use_gating": false,
528
+ "decomposition_method": "None",
529
+ "compute_svd_each_forward": false
530
+ },
531
+ "llm.layers.22.self_attn.o_proj": {
532
+ "r": 8,
533
+ "injection_method": "residual_projection",
534
+ "lora_alpha": 8,
535
+ "lora_dropout": 0.0,
536
+ "in_features": 3072,
537
+ "out_features": 3072,
538
+ "init_scale": 1.0,
539
+ "use_gating": false,
540
+ "decomposition_method": "None",
541
+ "compute_svd_each_forward": false
542
+ },
543
+ "llm.layers.22.self_attn.qkv_proj": {
544
+ "r": 8,
545
+ "injection_method": "residual_projection",
546
+ "lora_alpha": 8,
547
+ "lora_dropout": 0.0,
548
+ "in_features": 3072,
549
+ "out_features": 9216,
550
+ "init_scale": 1.0,
551
+ "use_gating": false,
552
+ "decomposition_method": "None",
553
+ "compute_svd_each_forward": false
554
+ },
555
+ "llm.layers.23.self_attn.o_proj": {
556
+ "r": 8,
557
+ "injection_method": "residual_projection",
558
+ "lora_alpha": 8,
559
+ "lora_dropout": 0.0,
560
+ "in_features": 3072,
561
+ "out_features": 3072,
562
+ "init_scale": 1.0,
563
+ "use_gating": false,
564
+ "decomposition_method": "None",
565
+ "compute_svd_each_forward": false
566
+ },
567
+ "llm.layers.23.self_attn.qkv_proj": {
568
+ "r": 8,
569
+ "injection_method": "residual_projection",
570
+ "lora_alpha": 8,
571
+ "lora_dropout": 0.0,
572
+ "in_features": 3072,
573
+ "out_features": 9216,
574
+ "init_scale": 1.0,
575
+ "use_gating": false,
576
+ "decomposition_method": "None",
577
+ "compute_svd_each_forward": false
578
+ },
579
+ "llm.layers.24.self_attn.o_proj": {
580
+ "r": 8,
581
+ "injection_method": "residual_projection",
582
+ "lora_alpha": 8,
583
+ "lora_dropout": 0.0,
584
+ "in_features": 3072,
585
+ "out_features": 3072,
586
+ "init_scale": 1.0,
587
+ "use_gating": false,
588
+ "decomposition_method": "None",
589
+ "compute_svd_each_forward": false
590
+ },
591
+ "llm.layers.24.self_attn.qkv_proj": {
592
+ "r": 8,
593
+ "injection_method": "residual_projection",
594
+ "lora_alpha": 8,
595
+ "lora_dropout": 0.0,
596
+ "in_features": 3072,
597
+ "out_features": 9216,
598
+ "init_scale": 1.0,
599
+ "use_gating": false,
600
+ "decomposition_method": "None",
601
+ "compute_svd_each_forward": false
602
+ },
603
+ "llm.layers.25.self_attn.o_proj": {
604
+ "r": 8,
605
+ "injection_method": "residual_projection",
606
+ "lora_alpha": 8,
607
+ "lora_dropout": 0.0,
608
+ "in_features": 3072,
609
+ "out_features": 3072,
610
+ "init_scale": 1.0,
611
+ "use_gating": false,
612
+ "decomposition_method": "None",
613
+ "compute_svd_each_forward": false
614
+ },
615
+ "llm.layers.25.self_attn.qkv_proj": {
616
+ "r": 8,
617
+ "injection_method": "residual_projection",
618
+ "lora_alpha": 8,
619
+ "lora_dropout": 0.0,
620
+ "in_features": 3072,
621
+ "out_features": 9216,
622
+ "init_scale": 1.0,
623
+ "use_gating": false,
624
+ "decomposition_method": "None",
625
+ "compute_svd_each_forward": false
626
+ },
627
+ "llm.layers.26.self_attn.o_proj": {
628
+ "r": 8,
629
+ "injection_method": "residual_projection",
630
+ "lora_alpha": 8,
631
+ "lora_dropout": 0.0,
632
+ "in_features": 3072,
633
+ "out_features": 3072,
634
+ "init_scale": 1.0,
635
+ "use_gating": false,
636
+ "decomposition_method": "None",
637
+ "compute_svd_each_forward": false
638
+ },
639
+ "llm.layers.26.self_attn.qkv_proj": {
640
+ "r": 8,
641
+ "injection_method": "residual_projection",
642
+ "lora_alpha": 8,
643
+ "lora_dropout": 0.0,
644
+ "in_features": 3072,
645
+ "out_features": 9216,
646
+ "init_scale": 1.0,
647
+ "use_gating": false,
648
+ "decomposition_method": "None",
649
+ "compute_svd_each_forward": false
650
+ },
651
+ "llm.layers.27.self_attn.o_proj": {
652
+ "r": 8,
653
+ "injection_method": "residual_projection",
654
+ "lora_alpha": 8,
655
+ "lora_dropout": 0.0,
656
+ "in_features": 3072,
657
+ "out_features": 3072,
658
+ "init_scale": 1.0,
659
+ "use_gating": false,
660
+ "decomposition_method": "None",
661
+ "compute_svd_each_forward": false
662
+ },
663
+ "llm.layers.27.self_attn.qkv_proj": {
664
+ "r": 8,
665
+ "injection_method": "residual_projection",
666
+ "lora_alpha": 8,
667
+ "lora_dropout": 0.0,
668
+ "in_features": 3072,
669
+ "out_features": 9216,
670
+ "init_scale": 1.0,
671
+ "use_gating": false,
672
+ "decomposition_method": "None",
673
+ "compute_svd_each_forward": false
674
+ },
675
+ "llm.layers.28.self_attn.o_proj": {
676
+ "r": 8,
677
+ "injection_method": "residual_projection",
678
+ "lora_alpha": 8,
679
+ "lora_dropout": 0.0,
680
+ "in_features": 3072,
681
+ "out_features": 3072,
682
+ "init_scale": 1.0,
683
+ "use_gating": false,
684
+ "decomposition_method": "None",
685
+ "compute_svd_each_forward": false
686
+ },
687
+ "llm.layers.28.self_attn.qkv_proj": {
688
+ "r": 8,
689
+ "injection_method": "residual_projection",
690
+ "lora_alpha": 8,
691
+ "lora_dropout": 0.0,
692
+ "in_features": 3072,
693
+ "out_features": 9216,
694
+ "init_scale": 1.0,
695
+ "use_gating": false,
696
+ "decomposition_method": "None",
697
+ "compute_svd_each_forward": false
698
+ },
699
+ "llm.layers.29.self_attn.o_proj": {
700
+ "r": 8,
701
+ "injection_method": "residual_projection",
702
+ "lora_alpha": 8,
703
+ "lora_dropout": 0.0,
704
+ "in_features": 3072,
705
+ "out_features": 3072,
706
+ "init_scale": 1.0,
707
+ "use_gating": false,
708
+ "decomposition_method": "None",
709
+ "compute_svd_each_forward": false
710
+ },
711
+ "llm.layers.29.self_attn.qkv_proj": {
712
+ "r": 8,
713
+ "injection_method": "residual_projection",
714
+ "lora_alpha": 8,
715
+ "lora_dropout": 0.0,
716
+ "in_features": 3072,
717
+ "out_features": 9216,
718
+ "init_scale": 1.0,
719
+ "use_gating": false,
720
+ "decomposition_method": "None",
721
+ "compute_svd_each_forward": false
722
+ },
723
+ "llm.layers.30.self_attn.o_proj": {
724
+ "r": 8,
725
+ "injection_method": "residual_projection",
726
+ "lora_alpha": 8,
727
+ "lora_dropout": 0.0,
728
+ "in_features": 3072,
729
+ "out_features": 3072,
730
+ "init_scale": 1.0,
731
+ "use_gating": false,
732
+ "decomposition_method": "None",
733
+ "compute_svd_each_forward": false
734
+ },
735
+ "llm.layers.30.self_attn.qkv_proj": {
736
+ "r": 8,
737
+ "injection_method": "residual_projection",
738
+ "lora_alpha": 8,
739
+ "lora_dropout": 0.0,
740
+ "in_features": 3072,
741
+ "out_features": 9216,
742
+ "init_scale": 1.0,
743
+ "use_gating": false,
744
+ "decomposition_method": "None",
745
+ "compute_svd_each_forward": false
746
+ },
747
+ "llm.layers.31.self_attn.o_proj": {
748
+ "r": 8,
749
+ "injection_method": "residual_projection",
750
+ "lora_alpha": 8,
751
+ "lora_dropout": 0.0,
752
+ "in_features": 3072,
753
+ "out_features": 3072,
754
+ "init_scale": 1.0,
755
+ "use_gating": false,
756
+ "decomposition_method": "None",
757
+ "compute_svd_each_forward": false
758
+ },
759
+ "llm.layers.31.self_attn.qkv_proj": {
760
+ "r": 8,
761
+ "injection_method": "residual_projection",
762
+ "lora_alpha": 8,
763
+ "lora_dropout": 0.0,
764
+ "in_features": 3072,
765
+ "out_features": 9216,
766
+ "init_scale": 1.0,
767
+ "use_gating": false,
768
+ "decomposition_method": "None",
769
+ "compute_svd_each_forward": false
770
+ }
771
+ }
772
+ }
Abalation/PP^T/checkpoints/0000400/knowledge_injection_state.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c81f516525ed4b82f47ebdc42c0e1e116c7562e1078dc1cf15cf0d2ae230990
3
+ size 9479738
Abalation/PP^T/log.txt ADDED
The diff for this file is too large to render. See raw diff
 
Abalation/PP^T/tensorboard_log/1753912670.641676/events.out.tfevents.1753912670.mbzuaiser-desktop.1014837.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59658dea34c5d86f8ec4422bab033b42d36faac492415f43b60034ce3629833d
3
+ size 1768
Abalation/PP^T/tensorboard_log/1753912670.643952/hparams.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ adam_weight_decay: 0.0
2
+ batch_size_per_device: 2
3
+ ckpt_every: 100
4
+ condition_dropout_prob: 0.01
5
+ decomposition_method: None
6
+ epochs: 200
7
+ gradient_accumulation_steps: 1
8
+ image_path: ./toy_data/images
9
+ json_file: ./toy_data/toy_subject_data.jsonl
10
+ keep_raw_resolution: true
11
+ log_every: 1
12
+ lora_rank: 8
13
+ lr: 0.001
14
+ lr_scheduler: constant
15
+ lr_warmup_steps: 1000
16
+ max_grad_norm: 1.0
17
+ max_image_size: 1024
18
+ max_input_length_limit: 18000
19
+ mixed_precision: bf16
20
+ model_name_or_path: Shitao/OmniGen-v1
21
+ num_workers: 4
22
+ report_to: tensorboard
23
+ results_dir: /nvme-data/Komal/documents/results/Abalation/PP^T
24
+ use_ema: false
25
+ use_injection: true
26
+ use_lora: false
27
+ use_lorapara: false
28
+ use_para: false
29
+ use_svd: false
30
+ vae_path: null