| | """ |
| | Simplified Handler for Huseyin/qwen3-turkish-model |
| | Hugging Face Inference Endpoints için optimize edilmiş handler |
| | """ |
| |
|
| | import torch |
| | import logging |
| | from typing import Dict, List, Any |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | class EndpointHandler: |
| | def __init__(self, path: str = ""): |
| | """Initialize the handler with the model""" |
| | try: |
| | logger.info(f"Starting handler initialization for path: {path}") |
| | |
| | |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | from peft import PeftModel |
| | |
| | |
| | self.model_path = path if path else "Huseyin/qwen3-turkish-model" |
| | self.base_model_path = "Qwen/Qwen3-8B" |
| | |
| | logger.info(f"Loading tokenizer from {self.base_model_path}") |
| | |
| | self.tokenizer = AutoTokenizer.from_pretrained( |
| | self.base_model_path, |
| | trust_remote_code=True |
| | ) |
| | |
| | |
| | if self.tokenizer.pad_token is None: |
| | self.tokenizer.pad_token = self.tokenizer.eos_token |
| | logger.info("Set pad_token to eos_token") |
| | |
| | logger.info(f"Loading base model from {self.base_model_path}") |
| | |
| | self.model = AutoModelForCausalLM.from_pretrained( |
| | self.base_model_path, |
| | torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
| | device_map="auto" if torch.cuda.is_available() else None, |
| | trust_remote_code=True, |
| | low_cpu_mem_usage=True |
| | ) |
| | |
| | logger.info(f"Loading LoRA adapter from {self.model_path}") |
| | |
| | self.model = PeftModel.from_pretrained( |
| | self.model, |
| | self.model_path |
| | ) |
| | |
| | |
| | self.model.eval() |
| | logger.info("Model successfully loaded and set to eval mode") |
| | |
| | |
| | self.device = next(self.model.parameters()).device |
| | logger.info(f"Model loaded on device: {self.device}") |
| | |
| | except Exception as e: |
| | logger.error(f"Error during initialization: {str(e)}") |
| | logger.error(f"Error type: {type(e).__name__}") |
| | import traceback |
| | logger.error(f"Traceback: {traceback.format_exc()}") |
| | raise |
| | |
| | def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
| | """ |
| | Handle the inference request |
| | |
| | Args: |
| | data: Dictionary containing 'inputs' and optional 'parameters' |
| | |
| | Returns: |
| | List containing generated text |
| | """ |
| | try: |
| | logger.info("Processing inference request") |
| | |
| | |
| | inputs = data.get("inputs", "") |
| | parameters = data.get("parameters", {}) |
| | |
| | |
| | if not inputs: |
| | logger.warning("Empty input received") |
| | return [{"generated_text": ""}] |
| | |
| | |
| | if isinstance(inputs, list): |
| | inputs = inputs[0] if inputs else "" |
| | |
| | logger.info(f"Input text length: {len(inputs)} chars") |
| | |
| | |
| | prompt = f"User: {inputs}\nAssistant:" |
| | |
| | |
| | encoded = self.tokenizer( |
| | prompt, |
| | return_tensors="pt", |
| | truncation=True, |
| | max_length=1024, |
| | padding=False |
| | ) |
| | |
| | |
| | input_ids = encoded["input_ids"].to(self.device) |
| | attention_mask = encoded.get("attention_mask") |
| | if attention_mask is not None: |
| | attention_mask = attention_mask.to(self.device) |
| | |
| | logger.info(f"Input tokenized: {input_ids.shape}") |
| | |
| | |
| | gen_params = { |
| | "max_new_tokens": parameters.get("max_new_tokens", 256), |
| | "temperature": parameters.get("temperature", 0.7), |
| | "top_p": parameters.get("top_p", 0.9), |
| | "do_sample": parameters.get("do_sample", True), |
| | "pad_token_id": self.tokenizer.pad_token_id, |
| | "eos_token_id": self.tokenizer.eos_token_id, |
| | } |
| | |
| | logger.info(f"Generation params: {gen_params}") |
| | |
| | |
| | with torch.no_grad(): |
| | outputs = self.model.generate( |
| | input_ids=input_ids, |
| | attention_mask=attention_mask, |
| | **gen_params |
| | ) |
| | |
| | |
| | generated_text = self.tokenizer.decode( |
| | outputs[0][input_ids.shape[1]:], |
| | skip_special_tokens=True |
| | ) |
| | |
| | logger.info(f"Generated {len(generated_text)} chars") |
| | |
| | return [{"generated_text": generated_text.strip()}] |
| | |
| | except Exception as e: |
| | logger.error(f"Error during inference: {str(e)}") |
| | logger.error(f"Error type: {type(e).__name__}") |
| | import traceback |
| | logger.error(f"Traceback: {traceback.format_exc()}") |
| | |
| | return [{ |
| | "generated_text": "", |
| | "error": str(e) |
| | }] |
| |
|