from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, pipeline
from peft import LoraConfig, get_peft_model, TaskType
from datasets import load_dataset
import gradio as gr

# ================== MODEL TRAINING PART ==================

# Load the dataset
dataset = load_dataset("msc-smart-contract-auditing/audits-with-reasons", split="train[:100]")

# Load model and tokenizer
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model_dir = "./huggingface/hub"

tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=model_dir)

# Format example for instruction-tuned model
def format_example(example):
    return f"""
### Instruction:
Please audit the following smart contract and provide a recommendation.

### Code:
{example['code']}

### Description:
{example['description']}

### Recommendation:
{example['recommendation']}
"""

# Tokenization function
def tokenize(example):
    text = format_example(example)
    tokenized = tokenizer(
        text,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )
    return {
        "input_ids": tokenized["input_ids"][0],
        "attention_mask": tokenized["attention_mask"][0],
        "labels": tokenized["input_ids"][0],
    }

# Tokenize dataset
tokenized_dataset = dataset.map(tokenize, batched=False, remove_columns=dataset.column_names)

# Load model
model = AutoModelForCausalLM.from_pretrained(model_name)

# Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# Training config
training_args = TrainingArguments(
    output_dir="./audit-model",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=5e-5,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=100,
    save_total_limit=2,
    report_to="none",
    fp16=False,
    remove_unused_columns=False,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# Train
trainer.train()

# Save model
trainer.save_model("./audit-model")
tokenizer.save_pretrained("./audit-model")

# ================== INFERENCE + GRADIO UI ==================

# Load inference pipeline
pipe = pipeline("text-generation", model="./audit-model", tokenizer="./audit-model")

# Function to audit uploaded contract
def audit_contract(file, description):
    if file is None:
        return "Please upload a Solidity contract file."
    
    # Read code
    with open(file.name, "r") as f:
        contract_code = f.read()

    # Build prompt
    prompt = f"""### Instruction:
Please audit the following smart contract and provide a recommendation.

### Code:
{contract_code}

### Description:
{description}

### Recommendation:
"""

    # Generate response
    output = pipe(prompt, max_new_tokens=100)[0]["generated_text"]
    return output

# Gradio interface
gr.Interface(
    fn=audit_contract,
    inputs=[
        gr.File(label="Upload Smart Contract (.sol)"),
        gr.Textbox(label="Contract Description", placeholder="E.g., This contract handles fund withdrawals...")
    ],
    outputs=gr.Textbox(label="Audit Recommendation"),
    title="Smart Contract Auditor",
    description="Upload a Solidity contract and get audit recommendations from the TinyLlama-powered model."
).launch()