""" Simplified Handler for Huseyin/qwen3-turkish-model Hugging Face Inference Endpoints için optimize edilmiş handler """ import torch import logging from typing import Dict, List, Any # Temel logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EndpointHandler: def __init__(self, path: str = ""): """Initialize the handler with the model""" try: logger.info(f"Starting handler initialization for path: {path}") # Import gerekli kütüphaneler from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # Model paths self.model_path = path if path else "Huseyin/qwen3-turkish-model" self.base_model_path = "Qwen/Qwen3-8B" logger.info(f"Loading tokenizer from {self.base_model_path}") # Tokenizer yükle self.tokenizer = AutoTokenizer.from_pretrained( self.base_model_path, trust_remote_code=True ) # Padding token ayarla if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token logger.info("Set pad_token to eos_token") logger.info(f"Loading base model from {self.base_model_path}") # Base model yükle - basit konfigürasyon self.model = AutoModelForCausalLM.from_pretrained( self.base_model_path, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, trust_remote_code=True, low_cpu_mem_usage=True ) logger.info(f"Loading LoRA adapter from {self.model_path}") # LoRA adapter yükle self.model = PeftModel.from_pretrained( self.model, self.model_path ) # Model'i eval moduna al self.model.eval() logger.info("Model successfully loaded and set to eval mode") # Device bilgisi self.device = next(self.model.parameters()).device logger.info(f"Model loaded on device: {self.device}") except Exception as e: logger.error(f"Error during initialization: {str(e)}") logger.error(f"Error type: {type(e).__name__}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") raise def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ Handle the inference request Args: data: Dictionary containing 'inputs' and optional 'parameters' Returns: List containing generated text """ try: logger.info("Processing inference request") # Input verisini al inputs = data.get("inputs", "") parameters = data.get("parameters", {}) # String'i kontrol et if not inputs: logger.warning("Empty input received") return [{"generated_text": ""}] # Eğer liste ise ilk elemanı al if isinstance(inputs, list): inputs = inputs[0] if inputs else "" logger.info(f"Input text length: {len(inputs)} chars") # Basit prompt formatı prompt = f"User: {inputs}\nAssistant:" # Tokenize encoded = self.tokenizer( prompt, return_tensors="pt", truncation=True, max_length=1024, padding=False ) # Device'a taşı input_ids = encoded["input_ids"].to(self.device) attention_mask = encoded.get("attention_mask") if attention_mask is not None: attention_mask = attention_mask.to(self.device) logger.info(f"Input tokenized: {input_ids.shape}") # Generation parametreleri gen_params = { "max_new_tokens": parameters.get("max_new_tokens", 256), "temperature": parameters.get("temperature", 0.7), "top_p": parameters.get("top_p", 0.9), "do_sample": parameters.get("do_sample", True), "pad_token_id": self.tokenizer.pad_token_id, "eos_token_id": self.tokenizer.eos_token_id, } logger.info(f"Generation params: {gen_params}") # Generate with torch.no_grad(): outputs = self.model.generate( input_ids=input_ids, attention_mask=attention_mask, **gen_params ) # Decode generated_text = self.tokenizer.decode( outputs[0][input_ids.shape[1]:], skip_special_tokens=True ) logger.info(f"Generated {len(generated_text)} chars") return [{"generated_text": generated_text.strip()}] except Exception as e: logger.error(f"Error during inference: {str(e)}") logger.error(f"Error type: {type(e).__name__}") import traceback logger.error(f"Traceback: {traceback.format_exc()}") return [{ "generated_text": "", "error": str(e) }]