qwen3-turkish-model / handler.py
Huseyin's picture
Update handler.py
70f5b54 verified
"""
Simplified Handler for Huseyin/qwen3-turkish-model
Hugging Face Inference Endpoints için optimize edilmiş handler
"""
import torch
import logging
from typing import Dict, List, Any
# Temel logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class EndpointHandler:
def __init__(self, path: str = ""):
"""Initialize the handler with the model"""
try:
logger.info(f"Starting handler initialization for path: {path}")
# Import gerekli kütüphaneler
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# Model paths
self.model_path = path if path else "Huseyin/qwen3-turkish-model"
self.base_model_path = "Qwen/Qwen3-8B"
logger.info(f"Loading tokenizer from {self.base_model_path}")
# Tokenizer yükle
self.tokenizer = AutoTokenizer.from_pretrained(
self.base_model_path,
trust_remote_code=True
)
# Padding token ayarla
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token
logger.info("Set pad_token to eos_token")
logger.info(f"Loading base model from {self.base_model_path}")
# Base model yükle - basit konfigürasyon
self.model = AutoModelForCausalLM.from_pretrained(
self.base_model_path,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True,
low_cpu_mem_usage=True
)
logger.info(f"Loading LoRA adapter from {self.model_path}")
# LoRA adapter yükle
self.model = PeftModel.from_pretrained(
self.model,
self.model_path
)
# Model'i eval moduna al
self.model.eval()
logger.info("Model successfully loaded and set to eval mode")
# Device bilgisi
self.device = next(self.model.parameters()).device
logger.info(f"Model loaded on device: {self.device}")
except Exception as e:
logger.error(f"Error during initialization: {str(e)}")
logger.error(f"Error type: {type(e).__name__}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
raise
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Handle the inference request
Args:
data: Dictionary containing 'inputs' and optional 'parameters'
Returns:
List containing generated text
"""
try:
logger.info("Processing inference request")
# Input verisini al
inputs = data.get("inputs", "")
parameters = data.get("parameters", {})
# String'i kontrol et
if not inputs:
logger.warning("Empty input received")
return [{"generated_text": ""}]
# Eğer liste ise ilk elemanı al
if isinstance(inputs, list):
inputs = inputs[0] if inputs else ""
logger.info(f"Input text length: {len(inputs)} chars")
# Basit prompt formatı
prompt = f"User: {inputs}\nAssistant:"
# Tokenize
encoded = self.tokenizer(
prompt,
return_tensors="pt",
truncation=True,
max_length=1024,
padding=False
)
# Device'a taşı
input_ids = encoded["input_ids"].to(self.device)
attention_mask = encoded.get("attention_mask")
if attention_mask is not None:
attention_mask = attention_mask.to(self.device)
logger.info(f"Input tokenized: {input_ids.shape}")
# Generation parametreleri
gen_params = {
"max_new_tokens": parameters.get("max_new_tokens", 256),
"temperature": parameters.get("temperature", 0.7),
"top_p": parameters.get("top_p", 0.9),
"do_sample": parameters.get("do_sample", True),
"pad_token_id": self.tokenizer.pad_token_id,
"eos_token_id": self.tokenizer.eos_token_id,
}
logger.info(f"Generation params: {gen_params}")
# Generate
with torch.no_grad():
outputs = self.model.generate(
input_ids=input_ids,
attention_mask=attention_mask,
**gen_params
)
# Decode
generated_text = self.tokenizer.decode(
outputs[0][input_ids.shape[1]:],
skip_special_tokens=True
)
logger.info(f"Generated {len(generated_text)} chars")
return [{"generated_text": generated_text.strip()}]
except Exception as e:
logger.error(f"Error during inference: {str(e)}")
logger.error(f"Error type: {type(e).__name__}")
import traceback
logger.error(f"Traceback: {traceback.format_exc()}")
return [{
"generated_text": "",
"error": str(e)
}]