syedMohib44 commited on
Commit
97dddb8
·
1 Parent(s): 75d5bcd
Files changed (36) hide show
  1. .gitignore +165 -0
  2. Dockerfile +29 -0
  3. LICENSE +21 -0
  4. README.md +2 -12
  5. app.py +142 -0
  6. audit-model/README.md +202 -0
  7. audit-model/adapter_config.json +29 -0
  8. audit-model/adapter_model.safetensors +3 -0
  9. audit-model/checkpoint-900/README.md +202 -0
  10. audit-model/checkpoint-900/adapter_config.json +29 -0
  11. audit-model/checkpoint-900/adapter_model.safetensors +3 -0
  12. audit-model/checkpoint-900/optimizer.pt +3 -0
  13. audit-model/checkpoint-900/rng_state.pth +3 -0
  14. audit-model/checkpoint-900/scheduler.pt +3 -0
  15. audit-model/checkpoint-900/special_tokens_map.json +30 -0
  16. audit-model/checkpoint-900/tokenizer.json +0 -0
  17. audit-model/checkpoint-900/tokenizer_config.json +43 -0
  18. audit-model/checkpoint-900/trainer_state.json +663 -0
  19. audit-model/checkpoint-900/training_args.bin +3 -0
  20. audit-model/checkpoint-927/README.md +202 -0
  21. audit-model/checkpoint-927/adapter_config.json +29 -0
  22. audit-model/checkpoint-927/adapter_model.safetensors +3 -0
  23. audit-model/checkpoint-927/optimizer.pt +3 -0
  24. audit-model/checkpoint-927/rng_state.pth +3 -0
  25. audit-model/checkpoint-927/scheduler.pt +3 -0
  26. audit-model/checkpoint-927/special_tokens_map.json +30 -0
  27. audit-model/checkpoint-927/tokenizer.json +0 -0
  28. audit-model/checkpoint-927/tokenizer_config.json +43 -0
  29. audit-model/checkpoint-927/trainer_state.json +677 -0
  30. audit-model/checkpoint-927/training_args.bin +3 -0
  31. audit-model/special_tokens_map.json +30 -0
  32. audit-model/tokenizer.json +0 -0
  33. audit-model/tokenizer_config.json +43 -0
  34. audit-model/training_args.bin +3 -0
  35. requirements.txt +71 -0
  36. space.yaml +3 -0
.gitignore ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ huggingface/
10
+ venv/
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113
+ .pdm.toml
114
+ .pdm-python
115
+ .pdm-build/
116
+
117
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118
+ __pypackages__/
119
+
120
+ # Celery stuff
121
+ celerybeat-schedule
122
+ celerybeat.pid
123
+
124
+ # SageMath parsed files
125
+ *.sage.py
126
+
127
+ # Environments
128
+ .env
129
+ .venv
130
+ env/
131
+ venv/
132
+ ENV/
133
+ env.bak/
134
+ venv.bak/
135
+
136
+ # Spyder project settings
137
+ .spyderproject
138
+ .spyproject
139
+
140
+ # Rope project settings
141
+ .ropeproject
142
+
143
+ # mkdocs documentation
144
+ /site
145
+
146
+ # mypy
147
+ .mypy_cache/
148
+ .dmypy.json
149
+ dmypy.json
150
+
151
+ # Pyre type checker
152
+ .pyre/
153
+
154
+ # pytype static type analyzer
155
+ .pytype/
156
+
157
+ # Cython debug symbols
158
+ cython_debug/
159
+
160
+ # PyCharm
161
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
164
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
165
+ #.idea/
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set environment variables
5
+ ENV PYTHONDONTWRITEBYTECODE=1
6
+ ENV PYTHONUNBUFFERED=1
7
+
8
+ # Install system dependencies
9
+ RUN apt-get update && apt-get install -y \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Set work directory
14
+ WORKDIR /app
15
+
16
+ # Copy requirements
17
+ COPY requirements.txt .
18
+
19
+ # Install Python dependencies
20
+ RUN pip install --upgrade pip && pip install -r requirements.txt
21
+
22
+ # Copy the application code
23
+ COPY . .
24
+
25
+ # Expose the port Gradio runs on
26
+ EXPOSE 7860
27
+
28
+ # Run the Gradio app
29
+ CMD ["python", "app.py"]
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Syed Mohib Uddin
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,2 @@
1
- ---
2
- title: AI Auditor
3
- emoji: 🦀
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- short_description: AI auditor for smart contracts
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # ML-Auditor
2
+ Smart Contract testing and auditing using ML
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install necessary packages first (uncomment if not already installed)
2
+ # !pip install transformers datasets peft accelerate gradio
3
+
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, pipeline
5
+ from peft import LoraConfig, get_peft_model, TaskType
6
+ from datasets import load_dataset
7
+ import gradio as gr
8
+
9
+ # ================== MODEL TRAINING PART ==================
10
+
11
+ # Load the dataset
12
+ dataset = load_dataset("msc-smart-contract-auditing/audits-with-reasons", split="train")
13
+
14
+ # Load model and tokenizer
15
+ model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
16
+ model_dir = "./huggingface/hub"
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_dir)
19
+
20
+ # Format example for instruction-tuned model
21
+ def format_example(example):
22
+ return f"""
23
+ ### Instruction:
24
+ Please audit the following smart contract and provide a recommendation.
25
+
26
+ ### Code:
27
+ {example['code']}
28
+
29
+ ### Description:
30
+ {example['description']}
31
+
32
+ ### Recommendation:
33
+ {example['recommendation']}
34
+ """
35
+
36
+ # Tokenization function
37
+ def tokenize(example):
38
+ text = format_example(example)
39
+ tokenized = tokenizer(
40
+ text,
41
+ truncation=True,
42
+ padding="max_length",
43
+ max_length=512,
44
+ return_tensors="pt"
45
+ )
46
+ return {
47
+ "input_ids": tokenized["input_ids"][0],
48
+ "attention_mask": tokenized["attention_mask"][0],
49
+ "labels": tokenized["input_ids"][0],
50
+ }
51
+
52
+ # Tokenize dataset
53
+ tokenized_dataset = dataset.map(tokenize, batched=False, remove_columns=dataset.column_names)
54
+
55
+ # Load model
56
+ model = AutoModelForCausalLM.from_pretrained(model_name)
57
+
58
+ # Apply LoRA
59
+ lora_config = LoraConfig(
60
+ r=8,
61
+ lora_alpha=16,
62
+ target_modules=["q_proj", "v_proj"],
63
+ lora_dropout=0.1,
64
+ bias="none",
65
+ task_type=TaskType.CAUSAL_LM,
66
+ )
67
+ model = get_peft_model(model, lora_config)
68
+ model.print_trainable_parameters()
69
+
70
+ # Training config
71
+ training_args = TrainingArguments(
72
+ output_dir="./audit-model",
73
+ per_device_train_batch_size=1,
74
+ gradient_accumulation_steps=8,
75
+ num_train_epochs=3,
76
+ learning_rate=5e-5,
77
+ logging_dir="./logs",
78
+ logging_steps=10,
79
+ save_steps=100,
80
+ save_total_limit=2,
81
+ report_to="none",
82
+ fp16=False,
83
+ remove_unused_columns=False,
84
+ )
85
+
86
+ # Trainer
87
+ trainer = Trainer(
88
+ model=model,
89
+ args=training_args,
90
+ train_dataset=tokenized_dataset,
91
+ tokenizer=tokenizer
92
+ )
93
+
94
+ # Train
95
+ trainer.train()
96
+
97
+ # Save model
98
+ trainer.save_model("./audit-model")
99
+ tokenizer.save_pretrained("./audit-model")
100
+
101
+ # ================== INFERENCE + GRADIO UI ==================
102
+
103
+ # Load inference pipeline
104
+ pipe = pipeline("text-generation", model="./audit-model", tokenizer="./audit-model")
105
+
106
+ # Function to audit uploaded contract
107
+ def audit_contract(file, description):
108
+ if file is None:
109
+ return "Please upload a Solidity contract file."
110
+
111
+ # Read code
112
+ with open(file.name, "r") as f:
113
+ contract_code = f.read()
114
+
115
+ # Build prompt
116
+ prompt = f"""### Instruction:
117
+ Please audit the following smart contract and provide a recommendation.
118
+
119
+ ### Code:
120
+ {contract_code}
121
+
122
+ ### Description:
123
+ {description}
124
+
125
+ ### Recommendation:
126
+ """
127
+
128
+ # Generate response
129
+ output = pipe(prompt, max_new_tokens=100)[0]["generated_text"]
130
+ return output
131
+
132
+ # Gradio interface
133
+ gr.Interface(
134
+ fn=audit_contract,
135
+ inputs=[
136
+ gr.File(label="Upload Smart Contract (.sol)"),
137
+ gr.Textbox(label="Contract Description", placeholder="E.g., This contract handles fund withdrawals...")
138
+ ],
139
+ outputs=gr.Textbox(label="Audit Recommendation"),
140
+ title="Smart Contract Auditor",
141
+ description="Upload a Solidity contract and get audit recommendations from the TinyLlama-powered model."
142
+ ).launch()
audit-model/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
audit-model/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
audit-model/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d033b586daf2a7319110bf104c08a9dfbef330c7508114de1b17b210971a51e2
3
+ size 4517152
audit-model/checkpoint-900/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
audit-model/checkpoint-900/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
audit-model/checkpoint-900/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d4c6f8de7780dabc7f654a09ca82c7ea1211489e30e9430c36d29e6dabe64cf
3
+ size 4517152
audit-model/checkpoint-900/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bedf44f55757566c97e5de360665f0d2743bc8297684273a5d376ac634019a35
3
+ size 9085370
audit-model/checkpoint-900/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91a18bb82dce417c46eaccbf3fba32dec2caff0f32011c9040a8cdf814e396ea
3
+ size 14244
audit-model/checkpoint-900/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee90a154b01533f0c339534ce958394e611633b977afe8daa52834a1ad2ee838
3
+ size 1064
audit-model/checkpoint-900/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
audit-model/checkpoint-900/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
audit-model/checkpoint-900/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": false,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
audit-model/checkpoint-900/trainer_state.json ADDED
@@ -0,0 +1,663 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.912621359223301,
5
+ "eval_steps": 500,
6
+ "global_step": 900,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.032362459546925564,
13
+ "grad_norm": 11.313185691833496,
14
+ "learning_rate": 4.946062567421791e-05,
15
+ "loss": 4.4527,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.06472491909385113,
20
+ "grad_norm": 10.617666244506836,
21
+ "learning_rate": 4.892125134843581e-05,
22
+ "loss": 3.9824,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.0970873786407767,
27
+ "grad_norm": 19.003602981567383,
28
+ "learning_rate": 4.838187702265373e-05,
29
+ "loss": 3.6846,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.12944983818770225,
34
+ "grad_norm": 18.550548553466797,
35
+ "learning_rate": 4.784250269687163e-05,
36
+ "loss": 2.351,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.16181229773462782,
41
+ "grad_norm": 5.793719291687012,
42
+ "learning_rate": 4.730312837108954e-05,
43
+ "loss": 1.9381,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.1941747572815534,
48
+ "grad_norm": 1.2957172393798828,
49
+ "learning_rate": 4.676375404530744e-05,
50
+ "loss": 1.3053,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.22653721682847897,
55
+ "grad_norm": 0.6527612209320068,
56
+ "learning_rate": 4.622437971952535e-05,
57
+ "loss": 1.2609,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2588996763754045,
62
+ "grad_norm": 0.49805423617362976,
63
+ "learning_rate": 4.568500539374326e-05,
64
+ "loss": 1.0638,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.2912621359223301,
69
+ "grad_norm": 0.4786827564239502,
70
+ "learning_rate": 4.514563106796117e-05,
71
+ "loss": 1.0933,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.32362459546925565,
76
+ "grad_norm": 0.462676465511322,
77
+ "learning_rate": 4.460625674217908e-05,
78
+ "loss": 1.0909,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3559870550161812,
83
+ "grad_norm": 0.5132231116294861,
84
+ "learning_rate": 4.406688241639698e-05,
85
+ "loss": 1.1627,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.3883495145631068,
90
+ "grad_norm": 0.7181512713432312,
91
+ "learning_rate": 4.352750809061489e-05,
92
+ "loss": 1.058,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.42071197411003236,
97
+ "grad_norm": 0.6864293813705444,
98
+ "learning_rate": 4.29881337648328e-05,
99
+ "loss": 0.974,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.45307443365695793,
104
+ "grad_norm": 0.5242889523506165,
105
+ "learning_rate": 4.2448759439050706e-05,
106
+ "loss": 1.0578,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.4854368932038835,
111
+ "grad_norm": 0.43969759345054626,
112
+ "learning_rate": 4.1909385113268615e-05,
113
+ "loss": 0.9293,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.517799352750809,
118
+ "grad_norm": 0.41124609112739563,
119
+ "learning_rate": 4.137001078748652e-05,
120
+ "loss": 0.7604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5501618122977346,
125
+ "grad_norm": 0.4797498285770416,
126
+ "learning_rate": 4.0830636461704426e-05,
127
+ "loss": 1.0593,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.5825242718446602,
132
+ "grad_norm": 0.395654559135437,
133
+ "learning_rate": 4.029126213592233e-05,
134
+ "loss": 0.8929,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6148867313915858,
139
+ "grad_norm": 0.5006524920463562,
140
+ "learning_rate": 3.9751887810140237e-05,
141
+ "loss": 0.9012,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.6472491909385113,
146
+ "grad_norm": 0.43280333280563354,
147
+ "learning_rate": 3.9212513484358145e-05,
148
+ "loss": 1.0251,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.6796116504854369,
153
+ "grad_norm": 0.39595478773117065,
154
+ "learning_rate": 3.8673139158576054e-05,
155
+ "loss": 0.9151,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7119741100323624,
160
+ "grad_norm": 0.44638216495513916,
161
+ "learning_rate": 3.813376483279396e-05,
162
+ "loss": 0.8771,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7443365695792881,
167
+ "grad_norm": 0.4109503924846649,
168
+ "learning_rate": 3.7594390507011865e-05,
169
+ "loss": 0.8672,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.7766990291262136,
174
+ "grad_norm": 0.438164085149765,
175
+ "learning_rate": 3.7055016181229774e-05,
176
+ "loss": 0.8855,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8090614886731392,
181
+ "grad_norm": 0.4250308871269226,
182
+ "learning_rate": 3.651564185544768e-05,
183
+ "loss": 0.953,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8414239482200647,
188
+ "grad_norm": 0.4708111882209778,
189
+ "learning_rate": 3.597626752966559e-05,
190
+ "loss": 0.9524,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.8737864077669902,
195
+ "grad_norm": 0.3774057626724243,
196
+ "learning_rate": 3.54368932038835e-05,
197
+ "loss": 0.8109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.9061488673139159,
202
+ "grad_norm": 0.4693313539028168,
203
+ "learning_rate": 3.48975188781014e-05,
204
+ "loss": 0.9636,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9385113268608414,
209
+ "grad_norm": 0.46089011430740356,
210
+ "learning_rate": 3.435814455231931e-05,
211
+ "loss": 0.798,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.970873786407767,
216
+ "grad_norm": 0.38023099303245544,
217
+ "learning_rate": 3.3818770226537214e-05,
218
+ "loss": 0.9137,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 1.0032362459546926,
223
+ "grad_norm": 0.40833815932273865,
224
+ "learning_rate": 3.327939590075513e-05,
225
+ "loss": 0.9773,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 1.035598705501618,
230
+ "grad_norm": 0.3781765103340149,
231
+ "learning_rate": 3.274002157497303e-05,
232
+ "loss": 0.8947,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 1.0679611650485437,
237
+ "grad_norm": 0.5418028235435486,
238
+ "learning_rate": 3.220064724919094e-05,
239
+ "loss": 0.8948,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 1.1003236245954693,
244
+ "grad_norm": 0.4426631033420563,
245
+ "learning_rate": 3.166127292340885e-05,
246
+ "loss": 0.8272,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 1.132686084142395,
251
+ "grad_norm": 0.462091863155365,
252
+ "learning_rate": 3.112189859762675e-05,
253
+ "loss": 0.8894,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 1.1650485436893203,
258
+ "grad_norm": 0.3814358413219452,
259
+ "learning_rate": 3.058252427184466e-05,
260
+ "loss": 0.9384,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 1.197411003236246,
265
+ "grad_norm": 0.4682667553424835,
266
+ "learning_rate": 3.0043149946062572e-05,
267
+ "loss": 0.9448,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 1.2297734627831716,
272
+ "grad_norm": 0.44027456641197205,
273
+ "learning_rate": 2.9503775620280478e-05,
274
+ "loss": 0.882,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 1.262135922330097,
279
+ "grad_norm": 0.33765849471092224,
280
+ "learning_rate": 2.8964401294498383e-05,
281
+ "loss": 0.8233,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 1.2944983818770226,
286
+ "grad_norm": 0.41791439056396484,
287
+ "learning_rate": 2.842502696871629e-05,
288
+ "loss": 0.8723,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 1.3268608414239482,
293
+ "grad_norm": 0.5905632376670837,
294
+ "learning_rate": 2.7885652642934197e-05,
295
+ "loss": 0.9817,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 1.3592233009708738,
300
+ "grad_norm": 0.43448638916015625,
301
+ "learning_rate": 2.7346278317152106e-05,
302
+ "loss": 0.8918,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 1.3915857605177995,
307
+ "grad_norm": 0.5770216584205627,
308
+ "learning_rate": 2.6806903991370015e-05,
309
+ "loss": 0.8559,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 1.4239482200647249,
314
+ "grad_norm": 0.4389037787914276,
315
+ "learning_rate": 2.626752966558792e-05,
316
+ "loss": 0.8687,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 1.4563106796116505,
321
+ "grad_norm": 0.5336058735847473,
322
+ "learning_rate": 2.5728155339805826e-05,
323
+ "loss": 0.9136,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 1.4886731391585761,
328
+ "grad_norm": 0.5232294797897339,
329
+ "learning_rate": 2.518878101402373e-05,
330
+ "loss": 0.9582,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 1.5210355987055015,
335
+ "grad_norm": 0.42966699600219727,
336
+ "learning_rate": 2.464940668824164e-05,
337
+ "loss": 0.9858,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 1.5533980582524272,
342
+ "grad_norm": 0.5163992643356323,
343
+ "learning_rate": 2.411003236245955e-05,
344
+ "loss": 0.916,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 1.5857605177993528,
349
+ "grad_norm": 0.47947341203689575,
350
+ "learning_rate": 2.3570658036677458e-05,
351
+ "loss": 0.8943,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 1.6181229773462782,
356
+ "grad_norm": 0.42414242029190063,
357
+ "learning_rate": 2.3031283710895363e-05,
358
+ "loss": 0.901,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 1.650485436893204,
363
+ "grad_norm": 0.4730052947998047,
364
+ "learning_rate": 2.249190938511327e-05,
365
+ "loss": 0.7823,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 1.6828478964401294,
370
+ "grad_norm": 0.4772244393825531,
371
+ "learning_rate": 2.1952535059331178e-05,
372
+ "loss": 0.8497,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 1.715210355987055,
377
+ "grad_norm": 0.5551290512084961,
378
+ "learning_rate": 2.1413160733549083e-05,
379
+ "loss": 0.8933,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 1.7475728155339807,
384
+ "grad_norm": 0.4729432761669159,
385
+ "learning_rate": 2.0873786407766992e-05,
386
+ "loss": 0.9068,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 1.779935275080906,
391
+ "grad_norm": 0.4728154242038727,
392
+ "learning_rate": 2.03344120819849e-05,
393
+ "loss": 0.8927,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 1.8122977346278317,
398
+ "grad_norm": 0.468170166015625,
399
+ "learning_rate": 1.9795037756202806e-05,
400
+ "loss": 0.8018,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 1.8446601941747574,
405
+ "grad_norm": 0.4387604892253876,
406
+ "learning_rate": 1.9255663430420712e-05,
407
+ "loss": 0.9354,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 1.8770226537216828,
412
+ "grad_norm": 0.4523237943649292,
413
+ "learning_rate": 1.871628910463862e-05,
414
+ "loss": 0.97,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 1.9093851132686084,
419
+ "grad_norm": 0.35041147470474243,
420
+ "learning_rate": 1.8176914778856526e-05,
421
+ "loss": 0.7746,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 1.941747572815534,
426
+ "grad_norm": 0.415325790643692,
427
+ "learning_rate": 1.763754045307443e-05,
428
+ "loss": 0.9324,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 1.9741100323624594,
433
+ "grad_norm": 0.4483698010444641,
434
+ "learning_rate": 1.7098166127292344e-05,
435
+ "loss": 0.9117,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 2.0064724919093853,
440
+ "grad_norm": 0.5569983720779419,
441
+ "learning_rate": 1.655879180151025e-05,
442
+ "loss": 0.7934,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 2.0388349514563107,
447
+ "grad_norm": 0.5829730033874512,
448
+ "learning_rate": 1.6019417475728158e-05,
449
+ "loss": 0.8655,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 2.071197411003236,
454
+ "grad_norm": 0.5252796411514282,
455
+ "learning_rate": 1.5480043149946064e-05,
456
+ "loss": 0.8609,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 2.103559870550162,
461
+ "grad_norm": 0.48137661814689636,
462
+ "learning_rate": 1.4940668824163969e-05,
463
+ "loss": 0.8301,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 2.1359223300970873,
468
+ "grad_norm": 0.5046062469482422,
469
+ "learning_rate": 1.440129449838188e-05,
470
+ "loss": 0.829,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 2.168284789644013,
475
+ "grad_norm": 0.4735497236251831,
476
+ "learning_rate": 1.3861920172599785e-05,
477
+ "loss": 0.9003,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 2.2006472491909386,
482
+ "grad_norm": 0.5104610323905945,
483
+ "learning_rate": 1.332254584681769e-05,
484
+ "loss": 0.9015,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 2.233009708737864,
489
+ "grad_norm": 0.5547719597816467,
490
+ "learning_rate": 1.27831715210356e-05,
491
+ "loss": 0.9294,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 2.26537216828479,
496
+ "grad_norm": 0.5612460970878601,
497
+ "learning_rate": 1.2243797195253506e-05,
498
+ "loss": 1.0278,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 2.2977346278317152,
503
+ "grad_norm": 0.5788902044296265,
504
+ "learning_rate": 1.1704422869471414e-05,
505
+ "loss": 0.9947,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 2.3300970873786406,
510
+ "grad_norm": 0.4961574375629425,
511
+ "learning_rate": 1.116504854368932e-05,
512
+ "loss": 0.8979,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 2.3624595469255665,
517
+ "grad_norm": 0.475431889295578,
518
+ "learning_rate": 1.0625674217907228e-05,
519
+ "loss": 0.7793,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 2.394822006472492,
524
+ "grad_norm": 0.44658321142196655,
525
+ "learning_rate": 1.0086299892125135e-05,
526
+ "loss": 0.8751,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 2.4271844660194173,
531
+ "grad_norm": 0.4376422166824341,
532
+ "learning_rate": 9.546925566343042e-06,
533
+ "loss": 0.8552,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 2.459546925566343,
538
+ "grad_norm": 0.49501657485961914,
539
+ "learning_rate": 9.00755124056095e-06,
540
+ "loss": 0.9324,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 2.4919093851132685,
545
+ "grad_norm": 0.5048530697822571,
546
+ "learning_rate": 8.468176914778857e-06,
547
+ "loss": 0.9295,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 2.524271844660194,
552
+ "grad_norm": 0.42359548807144165,
553
+ "learning_rate": 7.928802588996764e-06,
554
+ "loss": 0.7242,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 2.55663430420712,
559
+ "grad_norm": 0.5107340812683105,
560
+ "learning_rate": 7.389428263214672e-06,
561
+ "loss": 0.8196,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 2.588996763754045,
566
+ "grad_norm": 0.5559754967689514,
567
+ "learning_rate": 6.850053937432578e-06,
568
+ "loss": 0.8591,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 2.6213592233009706,
573
+ "grad_norm": 0.6188119053840637,
574
+ "learning_rate": 6.310679611650486e-06,
575
+ "loss": 0.8992,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 2.6537216828478964,
580
+ "grad_norm": 0.4786069691181183,
581
+ "learning_rate": 5.771305285868392e-06,
582
+ "loss": 0.8756,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 2.686084142394822,
587
+ "grad_norm": 0.4293358623981476,
588
+ "learning_rate": 5.2319309600863e-06,
589
+ "loss": 0.7657,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 2.7184466019417477,
594
+ "grad_norm": 0.5650451183319092,
595
+ "learning_rate": 4.6925566343042074e-06,
596
+ "loss": 0.8836,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 2.750809061488673,
601
+ "grad_norm": 0.5144199728965759,
602
+ "learning_rate": 4.153182308522115e-06,
603
+ "loss": 0.9088,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 2.783171521035599,
608
+ "grad_norm": 0.4247129261493683,
609
+ "learning_rate": 3.6138079827400217e-06,
610
+ "loss": 0.8326,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 2.8155339805825244,
615
+ "grad_norm": 0.5197082161903381,
616
+ "learning_rate": 3.074433656957929e-06,
617
+ "loss": 0.9088,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 2.8478964401294498,
622
+ "grad_norm": 0.5575164556503296,
623
+ "learning_rate": 2.535059331175836e-06,
624
+ "loss": 0.8786,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 2.8802588996763756,
629
+ "grad_norm": 0.49595561623573303,
630
+ "learning_rate": 1.995685005393743e-06,
631
+ "loss": 0.8106,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 2.912621359223301,
636
+ "grad_norm": 0.5974913239479065,
637
+ "learning_rate": 1.4563106796116506e-06,
638
+ "loss": 0.9802,
639
+ "step": 900
640
+ }
641
+ ],
642
+ "logging_steps": 10,
643
+ "max_steps": 927,
644
+ "num_input_tokens_seen": 0,
645
+ "num_train_epochs": 3,
646
+ "save_steps": 100,
647
+ "stateful_callbacks": {
648
+ "TrainerControl": {
649
+ "args": {
650
+ "should_epoch_stop": false,
651
+ "should_evaluate": false,
652
+ "should_log": false,
653
+ "should_save": true,
654
+ "should_training_stop": false
655
+ },
656
+ "attributes": {}
657
+ }
658
+ },
659
+ "total_flos": 2.29066728800256e+16,
660
+ "train_batch_size": 1,
661
+ "trial_name": null,
662
+ "trial_params": null
663
+ }
audit-model/checkpoint-900/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644c0428fb02cac178615829c2997bb8df92212208f0ad1949508909926dcc04
3
+ size 5112
audit-model/checkpoint-927/README.md ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
3
+ library_name: peft
4
+ ---
5
+
6
+ # Model Card for Model ID
7
+
8
+ <!-- Provide a quick summary of what the model is/does. -->
9
+
10
+
11
+
12
+ ## Model Details
13
+
14
+ ### Model Description
15
+
16
+ <!-- Provide a longer summary of what this model is. -->
17
+
18
+
19
+
20
+ - **Developed by:** [More Information Needed]
21
+ - **Funded by [optional]:** [More Information Needed]
22
+ - **Shared by [optional]:** [More Information Needed]
23
+ - **Model type:** [More Information Needed]
24
+ - **Language(s) (NLP):** [More Information Needed]
25
+ - **License:** [More Information Needed]
26
+ - **Finetuned from model [optional]:** [More Information Needed]
27
+
28
+ ### Model Sources [optional]
29
+
30
+ <!-- Provide the basic links for the model. -->
31
+
32
+ - **Repository:** [More Information Needed]
33
+ - **Paper [optional]:** [More Information Needed]
34
+ - **Demo [optional]:** [More Information Needed]
35
+
36
+ ## Uses
37
+
38
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
39
+
40
+ ### Direct Use
41
+
42
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
43
+
44
+ [More Information Needed]
45
+
46
+ ### Downstream Use [optional]
47
+
48
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
49
+
50
+ [More Information Needed]
51
+
52
+ ### Out-of-Scope Use
53
+
54
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
55
+
56
+ [More Information Needed]
57
+
58
+ ## Bias, Risks, and Limitations
59
+
60
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
61
+
62
+ [More Information Needed]
63
+
64
+ ### Recommendations
65
+
66
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
67
+
68
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
69
+
70
+ ## How to Get Started with the Model
71
+
72
+ Use the code below to get started with the model.
73
+
74
+ [More Information Needed]
75
+
76
+ ## Training Details
77
+
78
+ ### Training Data
79
+
80
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
81
+
82
+ [More Information Needed]
83
+
84
+ ### Training Procedure
85
+
86
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
87
+
88
+ #### Preprocessing [optional]
89
+
90
+ [More Information Needed]
91
+
92
+
93
+ #### Training Hyperparameters
94
+
95
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
96
+
97
+ #### Speeds, Sizes, Times [optional]
98
+
99
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
100
+
101
+ [More Information Needed]
102
+
103
+ ## Evaluation
104
+
105
+ <!-- This section describes the evaluation protocols and provides the results. -->
106
+
107
+ ### Testing Data, Factors & Metrics
108
+
109
+ #### Testing Data
110
+
111
+ <!-- This should link to a Dataset Card if possible. -->
112
+
113
+ [More Information Needed]
114
+
115
+ #### Factors
116
+
117
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
118
+
119
+ [More Information Needed]
120
+
121
+ #### Metrics
122
+
123
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
124
+
125
+ [More Information Needed]
126
+
127
+ ### Results
128
+
129
+ [More Information Needed]
130
+
131
+ #### Summary
132
+
133
+
134
+
135
+ ## Model Examination [optional]
136
+
137
+ <!-- Relevant interpretability work for the model goes here -->
138
+
139
+ [More Information Needed]
140
+
141
+ ## Environmental Impact
142
+
143
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
144
+
145
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
146
+
147
+ - **Hardware Type:** [More Information Needed]
148
+ - **Hours used:** [More Information Needed]
149
+ - **Cloud Provider:** [More Information Needed]
150
+ - **Compute Region:** [More Information Needed]
151
+ - **Carbon Emitted:** [More Information Needed]
152
+
153
+ ## Technical Specifications [optional]
154
+
155
+ ### Model Architecture and Objective
156
+
157
+ [More Information Needed]
158
+
159
+ ### Compute Infrastructure
160
+
161
+ [More Information Needed]
162
+
163
+ #### Hardware
164
+
165
+ [More Information Needed]
166
+
167
+ #### Software
168
+
169
+ [More Information Needed]
170
+
171
+ ## Citation [optional]
172
+
173
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
174
+
175
+ **BibTeX:**
176
+
177
+ [More Information Needed]
178
+
179
+ **APA:**
180
+
181
+ [More Information Needed]
182
+
183
+ ## Glossary [optional]
184
+
185
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
186
+
187
+ [More Information Needed]
188
+
189
+ ## More Information [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Authors [optional]
194
+
195
+ [More Information Needed]
196
+
197
+ ## Model Card Contact
198
+
199
+ [More Information Needed]
200
+ ### Framework versions
201
+
202
+ - PEFT 0.12.0
audit-model/checkpoint-927/adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "v_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
audit-model/checkpoint-927/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d033b586daf2a7319110bf104c08a9dfbef330c7508114de1b17b210971a51e2
3
+ size 4517152
audit-model/checkpoint-927/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdfa2ce00f71638b29e5879250128fa6fb7a4a28f15ad7f7cd1dd63809fccb00
3
+ size 9085370
audit-model/checkpoint-927/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3496cd2f876804a3137aaed890f2a03b91460a7697939915c950dcd989f11f67
3
+ size 14244
audit-model/checkpoint-927/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a513404a883f83b0bc3719fda4769a722cfd4ece54fc322c4e3e78974f03b25f
3
+ size 1064
audit-model/checkpoint-927/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
audit-model/checkpoint-927/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
audit-model/checkpoint-927/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": false,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
audit-model/checkpoint-927/trainer_state.json ADDED
@@ -0,0 +1,677 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 927,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.032362459546925564,
13
+ "grad_norm": 11.313185691833496,
14
+ "learning_rate": 4.946062567421791e-05,
15
+ "loss": 4.4527,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.06472491909385113,
20
+ "grad_norm": 10.617666244506836,
21
+ "learning_rate": 4.892125134843581e-05,
22
+ "loss": 3.9824,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.0970873786407767,
27
+ "grad_norm": 19.003602981567383,
28
+ "learning_rate": 4.838187702265373e-05,
29
+ "loss": 3.6846,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.12944983818770225,
34
+ "grad_norm": 18.550548553466797,
35
+ "learning_rate": 4.784250269687163e-05,
36
+ "loss": 2.351,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.16181229773462782,
41
+ "grad_norm": 5.793719291687012,
42
+ "learning_rate": 4.730312837108954e-05,
43
+ "loss": 1.9381,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.1941747572815534,
48
+ "grad_norm": 1.2957172393798828,
49
+ "learning_rate": 4.676375404530744e-05,
50
+ "loss": 1.3053,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.22653721682847897,
55
+ "grad_norm": 0.6527612209320068,
56
+ "learning_rate": 4.622437971952535e-05,
57
+ "loss": 1.2609,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.2588996763754045,
62
+ "grad_norm": 0.49805423617362976,
63
+ "learning_rate": 4.568500539374326e-05,
64
+ "loss": 1.0638,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.2912621359223301,
69
+ "grad_norm": 0.4786827564239502,
70
+ "learning_rate": 4.514563106796117e-05,
71
+ "loss": 1.0933,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.32362459546925565,
76
+ "grad_norm": 0.462676465511322,
77
+ "learning_rate": 4.460625674217908e-05,
78
+ "loss": 1.0909,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.3559870550161812,
83
+ "grad_norm": 0.5132231116294861,
84
+ "learning_rate": 4.406688241639698e-05,
85
+ "loss": 1.1627,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.3883495145631068,
90
+ "grad_norm": 0.7181512713432312,
91
+ "learning_rate": 4.352750809061489e-05,
92
+ "loss": 1.058,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.42071197411003236,
97
+ "grad_norm": 0.6864293813705444,
98
+ "learning_rate": 4.29881337648328e-05,
99
+ "loss": 0.974,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.45307443365695793,
104
+ "grad_norm": 0.5242889523506165,
105
+ "learning_rate": 4.2448759439050706e-05,
106
+ "loss": 1.0578,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.4854368932038835,
111
+ "grad_norm": 0.43969759345054626,
112
+ "learning_rate": 4.1909385113268615e-05,
113
+ "loss": 0.9293,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.517799352750809,
118
+ "grad_norm": 0.41124609112739563,
119
+ "learning_rate": 4.137001078748652e-05,
120
+ "loss": 0.7604,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.5501618122977346,
125
+ "grad_norm": 0.4797498285770416,
126
+ "learning_rate": 4.0830636461704426e-05,
127
+ "loss": 1.0593,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.5825242718446602,
132
+ "grad_norm": 0.395654559135437,
133
+ "learning_rate": 4.029126213592233e-05,
134
+ "loss": 0.8929,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.6148867313915858,
139
+ "grad_norm": 0.5006524920463562,
140
+ "learning_rate": 3.9751887810140237e-05,
141
+ "loss": 0.9012,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.6472491909385113,
146
+ "grad_norm": 0.43280333280563354,
147
+ "learning_rate": 3.9212513484358145e-05,
148
+ "loss": 1.0251,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.6796116504854369,
153
+ "grad_norm": 0.39595478773117065,
154
+ "learning_rate": 3.8673139158576054e-05,
155
+ "loss": 0.9151,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.7119741100323624,
160
+ "grad_norm": 0.44638216495513916,
161
+ "learning_rate": 3.813376483279396e-05,
162
+ "loss": 0.8771,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.7443365695792881,
167
+ "grad_norm": 0.4109503924846649,
168
+ "learning_rate": 3.7594390507011865e-05,
169
+ "loss": 0.8672,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.7766990291262136,
174
+ "grad_norm": 0.438164085149765,
175
+ "learning_rate": 3.7055016181229774e-05,
176
+ "loss": 0.8855,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.8090614886731392,
181
+ "grad_norm": 0.4250308871269226,
182
+ "learning_rate": 3.651564185544768e-05,
183
+ "loss": 0.953,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.8414239482200647,
188
+ "grad_norm": 0.4708111882209778,
189
+ "learning_rate": 3.597626752966559e-05,
190
+ "loss": 0.9524,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.8737864077669902,
195
+ "grad_norm": 0.3774057626724243,
196
+ "learning_rate": 3.54368932038835e-05,
197
+ "loss": 0.8109,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.9061488673139159,
202
+ "grad_norm": 0.4693313539028168,
203
+ "learning_rate": 3.48975188781014e-05,
204
+ "loss": 0.9636,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.9385113268608414,
209
+ "grad_norm": 0.46089011430740356,
210
+ "learning_rate": 3.435814455231931e-05,
211
+ "loss": 0.798,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.970873786407767,
216
+ "grad_norm": 0.38023099303245544,
217
+ "learning_rate": 3.3818770226537214e-05,
218
+ "loss": 0.9137,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 1.0032362459546926,
223
+ "grad_norm": 0.40833815932273865,
224
+ "learning_rate": 3.327939590075513e-05,
225
+ "loss": 0.9773,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 1.035598705501618,
230
+ "grad_norm": 0.3781765103340149,
231
+ "learning_rate": 3.274002157497303e-05,
232
+ "loss": 0.8947,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 1.0679611650485437,
237
+ "grad_norm": 0.5418028235435486,
238
+ "learning_rate": 3.220064724919094e-05,
239
+ "loss": 0.8948,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 1.1003236245954693,
244
+ "grad_norm": 0.4426631033420563,
245
+ "learning_rate": 3.166127292340885e-05,
246
+ "loss": 0.8272,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 1.132686084142395,
251
+ "grad_norm": 0.462091863155365,
252
+ "learning_rate": 3.112189859762675e-05,
253
+ "loss": 0.8894,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 1.1650485436893203,
258
+ "grad_norm": 0.3814358413219452,
259
+ "learning_rate": 3.058252427184466e-05,
260
+ "loss": 0.9384,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 1.197411003236246,
265
+ "grad_norm": 0.4682667553424835,
266
+ "learning_rate": 3.0043149946062572e-05,
267
+ "loss": 0.9448,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 1.2297734627831716,
272
+ "grad_norm": 0.44027456641197205,
273
+ "learning_rate": 2.9503775620280478e-05,
274
+ "loss": 0.882,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 1.262135922330097,
279
+ "grad_norm": 0.33765849471092224,
280
+ "learning_rate": 2.8964401294498383e-05,
281
+ "loss": 0.8233,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 1.2944983818770226,
286
+ "grad_norm": 0.41791439056396484,
287
+ "learning_rate": 2.842502696871629e-05,
288
+ "loss": 0.8723,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 1.3268608414239482,
293
+ "grad_norm": 0.5905632376670837,
294
+ "learning_rate": 2.7885652642934197e-05,
295
+ "loss": 0.9817,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 1.3592233009708738,
300
+ "grad_norm": 0.43448638916015625,
301
+ "learning_rate": 2.7346278317152106e-05,
302
+ "loss": 0.8918,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 1.3915857605177995,
307
+ "grad_norm": 0.5770216584205627,
308
+ "learning_rate": 2.6806903991370015e-05,
309
+ "loss": 0.8559,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 1.4239482200647249,
314
+ "grad_norm": 0.4389037787914276,
315
+ "learning_rate": 2.626752966558792e-05,
316
+ "loss": 0.8687,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 1.4563106796116505,
321
+ "grad_norm": 0.5336058735847473,
322
+ "learning_rate": 2.5728155339805826e-05,
323
+ "loss": 0.9136,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 1.4886731391585761,
328
+ "grad_norm": 0.5232294797897339,
329
+ "learning_rate": 2.518878101402373e-05,
330
+ "loss": 0.9582,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 1.5210355987055015,
335
+ "grad_norm": 0.42966699600219727,
336
+ "learning_rate": 2.464940668824164e-05,
337
+ "loss": 0.9858,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 1.5533980582524272,
342
+ "grad_norm": 0.5163992643356323,
343
+ "learning_rate": 2.411003236245955e-05,
344
+ "loss": 0.916,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 1.5857605177993528,
349
+ "grad_norm": 0.47947341203689575,
350
+ "learning_rate": 2.3570658036677458e-05,
351
+ "loss": 0.8943,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 1.6181229773462782,
356
+ "grad_norm": 0.42414242029190063,
357
+ "learning_rate": 2.3031283710895363e-05,
358
+ "loss": 0.901,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 1.650485436893204,
363
+ "grad_norm": 0.4730052947998047,
364
+ "learning_rate": 2.249190938511327e-05,
365
+ "loss": 0.7823,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 1.6828478964401294,
370
+ "grad_norm": 0.4772244393825531,
371
+ "learning_rate": 2.1952535059331178e-05,
372
+ "loss": 0.8497,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 1.715210355987055,
377
+ "grad_norm": 0.5551290512084961,
378
+ "learning_rate": 2.1413160733549083e-05,
379
+ "loss": 0.8933,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 1.7475728155339807,
384
+ "grad_norm": 0.4729432761669159,
385
+ "learning_rate": 2.0873786407766992e-05,
386
+ "loss": 0.9068,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 1.779935275080906,
391
+ "grad_norm": 0.4728154242038727,
392
+ "learning_rate": 2.03344120819849e-05,
393
+ "loss": 0.8927,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 1.8122977346278317,
398
+ "grad_norm": 0.468170166015625,
399
+ "learning_rate": 1.9795037756202806e-05,
400
+ "loss": 0.8018,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 1.8446601941747574,
405
+ "grad_norm": 0.4387604892253876,
406
+ "learning_rate": 1.9255663430420712e-05,
407
+ "loss": 0.9354,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 1.8770226537216828,
412
+ "grad_norm": 0.4523237943649292,
413
+ "learning_rate": 1.871628910463862e-05,
414
+ "loss": 0.97,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 1.9093851132686084,
419
+ "grad_norm": 0.35041147470474243,
420
+ "learning_rate": 1.8176914778856526e-05,
421
+ "loss": 0.7746,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 1.941747572815534,
426
+ "grad_norm": 0.415325790643692,
427
+ "learning_rate": 1.763754045307443e-05,
428
+ "loss": 0.9324,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 1.9741100323624594,
433
+ "grad_norm": 0.4483698010444641,
434
+ "learning_rate": 1.7098166127292344e-05,
435
+ "loss": 0.9117,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 2.0064724919093853,
440
+ "grad_norm": 0.5569983720779419,
441
+ "learning_rate": 1.655879180151025e-05,
442
+ "loss": 0.7934,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 2.0388349514563107,
447
+ "grad_norm": 0.5829730033874512,
448
+ "learning_rate": 1.6019417475728158e-05,
449
+ "loss": 0.8655,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 2.071197411003236,
454
+ "grad_norm": 0.5252796411514282,
455
+ "learning_rate": 1.5480043149946064e-05,
456
+ "loss": 0.8609,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 2.103559870550162,
461
+ "grad_norm": 0.48137661814689636,
462
+ "learning_rate": 1.4940668824163969e-05,
463
+ "loss": 0.8301,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 2.1359223300970873,
468
+ "grad_norm": 0.5046062469482422,
469
+ "learning_rate": 1.440129449838188e-05,
470
+ "loss": 0.829,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 2.168284789644013,
475
+ "grad_norm": 0.4735497236251831,
476
+ "learning_rate": 1.3861920172599785e-05,
477
+ "loss": 0.9003,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 2.2006472491909386,
482
+ "grad_norm": 0.5104610323905945,
483
+ "learning_rate": 1.332254584681769e-05,
484
+ "loss": 0.9015,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 2.233009708737864,
489
+ "grad_norm": 0.5547719597816467,
490
+ "learning_rate": 1.27831715210356e-05,
491
+ "loss": 0.9294,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 2.26537216828479,
496
+ "grad_norm": 0.5612460970878601,
497
+ "learning_rate": 1.2243797195253506e-05,
498
+ "loss": 1.0278,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 2.2977346278317152,
503
+ "grad_norm": 0.5788902044296265,
504
+ "learning_rate": 1.1704422869471414e-05,
505
+ "loss": 0.9947,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 2.3300970873786406,
510
+ "grad_norm": 0.4961574375629425,
511
+ "learning_rate": 1.116504854368932e-05,
512
+ "loss": 0.8979,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 2.3624595469255665,
517
+ "grad_norm": 0.475431889295578,
518
+ "learning_rate": 1.0625674217907228e-05,
519
+ "loss": 0.7793,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 2.394822006472492,
524
+ "grad_norm": 0.44658321142196655,
525
+ "learning_rate": 1.0086299892125135e-05,
526
+ "loss": 0.8751,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 2.4271844660194173,
531
+ "grad_norm": 0.4376422166824341,
532
+ "learning_rate": 9.546925566343042e-06,
533
+ "loss": 0.8552,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 2.459546925566343,
538
+ "grad_norm": 0.49501657485961914,
539
+ "learning_rate": 9.00755124056095e-06,
540
+ "loss": 0.9324,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 2.4919093851132685,
545
+ "grad_norm": 0.5048530697822571,
546
+ "learning_rate": 8.468176914778857e-06,
547
+ "loss": 0.9295,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 2.524271844660194,
552
+ "grad_norm": 0.42359548807144165,
553
+ "learning_rate": 7.928802588996764e-06,
554
+ "loss": 0.7242,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 2.55663430420712,
559
+ "grad_norm": 0.5107340812683105,
560
+ "learning_rate": 7.389428263214672e-06,
561
+ "loss": 0.8196,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 2.588996763754045,
566
+ "grad_norm": 0.5559754967689514,
567
+ "learning_rate": 6.850053937432578e-06,
568
+ "loss": 0.8591,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 2.6213592233009706,
573
+ "grad_norm": 0.6188119053840637,
574
+ "learning_rate": 6.310679611650486e-06,
575
+ "loss": 0.8992,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 2.6537216828478964,
580
+ "grad_norm": 0.4786069691181183,
581
+ "learning_rate": 5.771305285868392e-06,
582
+ "loss": 0.8756,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 2.686084142394822,
587
+ "grad_norm": 0.4293358623981476,
588
+ "learning_rate": 5.2319309600863e-06,
589
+ "loss": 0.7657,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 2.7184466019417477,
594
+ "grad_norm": 0.5650451183319092,
595
+ "learning_rate": 4.6925566343042074e-06,
596
+ "loss": 0.8836,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 2.750809061488673,
601
+ "grad_norm": 0.5144199728965759,
602
+ "learning_rate": 4.153182308522115e-06,
603
+ "loss": 0.9088,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 2.783171521035599,
608
+ "grad_norm": 0.4247129261493683,
609
+ "learning_rate": 3.6138079827400217e-06,
610
+ "loss": 0.8326,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 2.8155339805825244,
615
+ "grad_norm": 0.5197082161903381,
616
+ "learning_rate": 3.074433656957929e-06,
617
+ "loss": 0.9088,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 2.8478964401294498,
622
+ "grad_norm": 0.5575164556503296,
623
+ "learning_rate": 2.535059331175836e-06,
624
+ "loss": 0.8786,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 2.8802588996763756,
629
+ "grad_norm": 0.49595561623573303,
630
+ "learning_rate": 1.995685005393743e-06,
631
+ "loss": 0.8106,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 2.912621359223301,
636
+ "grad_norm": 0.5974913239479065,
637
+ "learning_rate": 1.4563106796116506e-06,
638
+ "loss": 0.9802,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 2.9449838187702264,
643
+ "grad_norm": 0.4777052104473114,
644
+ "learning_rate": 9.169363538295577e-07,
645
+ "loss": 0.9028,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 2.9773462783171523,
650
+ "grad_norm": 0.4917181730270386,
651
+ "learning_rate": 3.7756202804746497e-07,
652
+ "loss": 0.889,
653
+ "step": 920
654
+ }
655
+ ],
656
+ "logging_steps": 10,
657
+ "max_steps": 927,
658
+ "num_input_tokens_seen": 0,
659
+ "num_train_epochs": 3,
660
+ "save_steps": 100,
661
+ "stateful_callbacks": {
662
+ "TrainerControl": {
663
+ "args": {
664
+ "should_epoch_stop": false,
665
+ "should_evaluate": false,
666
+ "should_log": false,
667
+ "should_save": true,
668
+ "should_training_stop": true
669
+ },
670
+ "attributes": {}
671
+ }
672
+ },
673
+ "total_flos": 2.359387306642637e+16,
674
+ "train_batch_size": 1,
675
+ "trial_name": null,
676
+ "trial_params": null
677
+ }
audit-model/checkpoint-927/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644c0428fb02cac178615829c2997bb8df92212208f0ad1949508909926dcc04
3
+ size 5112
audit-model/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
audit-model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
audit-model/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": false,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
audit-model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644c0428fb02cac178615829c2997bb8df92212208f0ad1949508909926dcc04
3
+ size 5112
requirements.txt ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.34.2
2
+ aiohappyeyeballs==2.4.0
3
+ aiohttp==3.10.5
4
+ aiosignal==1.3.1
5
+ attrs==24.2.0
6
+ bitsandbytes==0.43.3
7
+ certifi==2024.8.30
8
+ charset-normalizer==3.3.2
9
+ colorama==0.4.6
10
+ datasets==3.0.0
11
+ dill==0.3.8
12
+ docstring_parser==0.16
13
+ filelock==3.16.0
14
+ frozenlist==1.4.1
15
+ fsspec==2024.6.1
16
+ huggingface-hub==0.24.7
17
+ idna==3.10
18
+ Jinja2==3.1.4
19
+ joblib==1.4.2
20
+ markdown-it-py==3.0.0
21
+ MarkupSafe==2.1.5
22
+ mdurl==0.1.2
23
+ mpmath==1.3.0
24
+ multidict==6.1.0
25
+ multiprocess==0.70.16
26
+ neat-python==0.92
27
+ networkx==3.3
28
+ numpy==2.1.1
29
+ packaging==24.1
30
+ pandas==2.2.2
31
+ peft==0.12.0
32
+ pillow==10.2.0
33
+ psutil==6.0.0
34
+ pyarrow==17.0.0
35
+ pygame==2.6.0
36
+ Pygments==2.18.0
37
+ python-dateutil==2.9.0.post0
38
+ pytz==2024.2
39
+ PyYAML==6.0.2
40
+ regex==2024.9.11
41
+ requests==2.32.3
42
+ rich==13.8.1
43
+ safetensors==0.4.5
44
+ scikit-learn==1.5.2
45
+ scipy==1.14.1
46
+ setuptools==75.1.0
47
+ shtab==1.7.1
48
+ six==1.16.0
49
+ sympy==1.13.2
50
+ threadpoolctl==3.5.0
51
+ tokenizers==0.19.1
52
+ torch==2.4.1+cu118
53
+ torchaudio==2.4.1+cu118
54
+ torchvision==0.19.1+cu118
55
+ tqdm==4.66.5
56
+ transformers==4.44.2
57
+ trl==0.10.1
58
+ typing_extensions==4.12.2
59
+ tyro==0.8.10
60
+ tzdata==2024.1
61
+ unsloth==2024.8
62
+ urllib3==2.2.3
63
+ xxhash==3.5.0
64
+ yarl==1.11.1
65
+ streamlit
66
+ python-dotenv
67
+ PyPDF2
68
+ langchain
69
+ huggingface-hub
70
+ faiss-cpu
71
+ sentence-transformers==2.2.2
space.yaml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ title: FastAPI Space
2
+ sdk: docker
3
+ app_port: 7860