Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

Mark-Lasfar commited on Sep 12

Commit

f2cc606

1 Parent(s): 9aa52ab

Update Model

Browse files

Files changed (3) hide show

api/endpoints.py +8 -18
utils/generation.py +50 -90
utils/web_search.py +7 -6

api/endpoints.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# api/endpoints.py
 import os
 import uuid
 from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
@@ -31,11 +30,11 @@ if not BACKUP_HF_TOKEN:
     logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api.cerebras.ai/v1")  # تغيير الافتراضي لـ Cerebras
-FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")  # النموذج الرئيسي
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "Qwen/Qwen2.5-0.5B-Instruct")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
@@ -88,7 +87,6 @@ async def handle_session(request: Request):
 # Helper function to enhance system prompt for Arabic language
 def enhance_system_prompt(system_prompt: str, message: str, user: Optional[User] = None) -> str:
     enhanced_prompt = system_prompt
-    # Check if the message is in Arabic
     if any(0x0600 <= ord(char) <= 0x06FF for char in message):
         enhanced_prompt += "\nRespond in Arabic with clear, concise, and accurate information tailored to the user's query."
     if user and user.additional_info:
@@ -129,7 +127,7 @@ async def model_info():
             {"alias": "audio", "description": "Audio transcription model (default)"},
             {"alias": "tts", "description": "Text-to-speech model (default)"}
         ],
-        "api_base": ROUTER_API_URL,
         "fallback_api_base": FALLBACK_API_ENDPOINT,
         "status": "online"
     }
@@ -173,11 +171,9 @@ async def chat_endpoint(
         db.add(user_msg)
         db.commit()
-    # Use user's preferred model if set
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(req.message, input_type="text", preferred_model=preferred_model)
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -227,7 +223,7 @@ async def chat_endpoint(
         if not response.strip():
             logger.error("Empty response generated.")
             raise HTTPException(status_code=500, detail="Empty response generated from model.")
-        logger.info(f"Chat response: {response[:100]}...")  # Log first 100 chars
     except Exception as e:
         logger.error(f"Chat generation failed: {e}")
         raise HTTPException(status_code=500, detail=f"Chat generation failed: {str(e)}")
@@ -280,7 +276,6 @@ async def audio_transcription_endpoint(
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -346,7 +341,6 @@ async def text_to_speech_endpoint(
     model_name, api_endpoint = select_model("text to speech", input_type="tts")
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -400,7 +394,6 @@ async def code_endpoint(
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(prompt, input_type="text", preferred_model=preferred_model)
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -473,7 +466,6 @@ async def analysis_endpoint(
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(message, input_type="text", preferred_model=preferred_model)
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -561,7 +553,6 @@ async def image_analysis_endpoint(
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model("analyze image", input_type="image", preferred_model=preferred_model)
-    # Check model availability
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -633,7 +624,7 @@ async def image_analysis_endpoint(
         raise HTTPException(status_code=500, detail=f"Image analysis failed: {str(e)}")
 @router.get("/api/test-model")
-async def test_model(model: str = MODEL_NAME, endpoint: str = ROUTER_API_URL):
     try:
         is_available, api_key, selected_endpoint = check_model_availability(model, HF_TOKEN)
         if not is_available:
@@ -766,11 +757,9 @@ async def update_user_settings(
     if not user:
         raise HTTPException(status_code=401, detail="Login required")
-    # Validate preferred_model
     if settings.preferred_model and settings.preferred_model not in MODEL_ALIASES:
         raise HTTPException(status_code=400, detail="Invalid model alias")
-    # Update user settings
     if settings.display_name is not None:
         user.display_name = settings.display_name
     if settings.preferred_model is not None:
@@ -801,3 +790,4 @@ async def update_user_settings(
         "is_active": user.is_active,
         "is_superuser": user.is_superuser
     }}

 import os
 import uuid
 from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
     logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 # Helper function to enhance system prompt for Arabic language
 def enhance_system_prompt(system_prompt: str, message: str, user: Optional[User] = None) -> str:
     enhanced_prompt = system_prompt
     if any(0x0600 <= ord(char) <= 0x06FF for char in message):
         enhanced_prompt += "\nRespond in Arabic with clear, concise, and accurate information tailored to the user's query."
     if user and user.additional_info:
             {"alias": "audio", "description": "Audio transcription model (default)"},
             {"alias": "tts", "description": "Text-to-speech model (default)"}
         ],
+        "api_base": API_ENDPOINT,
         "fallback_api_base": FALLBACK_API_ENDPOINT,
         "status": "online"
     }
         db.add(user_msg)
         db.commit()
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(req.message, input_type="text", preferred_model=preferred_model)
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
         if not response.strip():
             logger.error("Empty response generated.")
             raise HTTPException(status_code=500, detail="Empty response generated from model.")
+        logger.info(f"Chat response: {response[:100]}...")
     except Exception as e:
         logger.error(f"Chat generation failed: {e}")
         raise HTTPException(status_code=500, detail=f"Chat generation failed: {str(e)}")
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
     model_name, api_endpoint = select_model("text to speech", input_type="tts")
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(prompt, input_type="text", preferred_model=preferred_model)
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model(message, input_type="text", preferred_model=preferred_model)
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
     preferred_model = user.preferred_model if user else None
     model_name, api_endpoint = select_model("analyze image", input_type="image", preferred_model=preferred_model)
     is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
     if not is_available:
         logger.error(f"Model {model_name} is not available at {api_endpoint}")
         raise HTTPException(status_code=500, detail=f"Image analysis failed: {str(e)}")
 @router.get("/api/test-model")
+async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
     try:
         is_available, api_key, selected_endpoint = check_model_availability(model, HF_TOKEN)
         if not is_available:
     if not user:
         raise HTTPException(status_code=401, detail="Login required")
     if settings.preferred_model and settings.preferred_model not in MODEL_ALIASES:
         raise HTTPException(status_code=400, detail="Invalid model alias")
     if settings.display_name is not None:
         user.display_name = settings.display_name
     if settings.preferred_model is not None:
         "is_active": user.is_active,
         "is_superuser": user.is_superuser
     }}

utils/generation.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# utils/generation.py
 import os
 import re
 import json
@@ -31,40 +30,23 @@ LATEX_DELIMS = [
     {"left": "\\(", "right": "\\)", "display": False},
 ]
-# إعداد العميل لـ Hugging Face Router API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
-FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co")
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")  # النموذج الرئيسي
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
-TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "Qwen/Qwen2.5-0.5B-Instruct")
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
-# Provider endpoints (بدون together)
 PROVIDER_ENDPOINTS = {
-    "fireworks-ai": "https://api.fireworks.ai/inference/v1",
-    "nebius": "https://api.nebius.ai/v1",
-    "novita": "https://api.novita.ai/v1",
-    "groq": "https://api.groq.com/openai/v1",
-    "cerebras": "https://api.cerebras.ai/v1",
-    "hyperbolic": "https://api.hyperbolic.xyz/v1",
-    "nscale": "https://api.nscale.ai/v1"
-}
-# Model alias mapping
-MODEL_ALIASES = {
-    "advanced": MODEL_NAME,
-    "standard": SECONDARY_MODEL_NAME,
-    "light": TERTIARY_MODEL_NAME,
-    "image_base": CLIP_BASE_MODEL,
-    "image_advanced": CLIP_LARGE_MODEL,
-    "audio": ASR_MODEL,
-    "tts": TTS_MODEL
 }
 def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
@@ -75,23 +57,8 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
             timeout=30
         )
         if response.status_code == 200:
-            data = response.json().get("data", {})
-            providers = data.get("providers", [])
-            # Prefer "cerebras" if available
-            for provider in providers:
-                if provider.get("provider") == "cerebras" and provider.get("status") == "live":
-                    endpoint = PROVIDER_ENDPOINTS.get("cerebras", API_ENDPOINT)
-                    logger.info(f"Model {model_name} is available via preferred provider cerebras at {endpoint}")
-                    return True, api_key, endpoint
-            # Fallback to first live provider if cerebras not available
-            for provider in providers:
-                if provider.get("status") == "live":
-                    provider_name = provider.get("provider")
-                    endpoint = PROVIDER_ENDPOINTS.get(provider_name, API_ENDPOINT)
-                    logger.info(f"Model {model_name} is available via provider {provider_name} at {endpoint}")
-                    return True, api_key, endpoint
-            logger.error(f"No live providers found for model {model_name}")
-            return False, api_key, API_ENDPOINT
         elif response.status_code == 429 and BACKUP_HF_TOKEN and api_key != BACKUP_HF_TOKEN:
             logger.warning(f"Rate limit reached for token {api_key}. Switching to backup token.")
             return check_model_availability(model_name, BACKUP_HF_TOKEN)
@@ -105,7 +72,6 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
         return False, api_key, API_ENDPOINT
 def select_model(query: str, input_type: str = "text", preferred_model: Optional[str] = None) -> tuple[str, str]:
-    # If user has a preferred model, use it unless the input type requires a specific model
     if preferred_model and preferred_model in MODEL_ALIASES:
         model_name = MODEL_ALIASES[preferred_model]
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
@@ -114,15 +80,12 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
             return model_name, endpoint
     query_lower = query.lower()
-    # دعم الصوت
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
-    # دعم تحويل النص إلى صوت
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]) or input_type == "tts":
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
-    # نماذج CLIP للصور
     image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
@@ -131,7 +94,6 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
-    # اختيار النموذج بناءً على توفره
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
         (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
@@ -188,8 +150,8 @@ def request_generation(
     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
     task_type = "general"
     enhanced_system_prompt = system_prompt
-    # معالجة الصوت (ASR)
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
@@ -212,7 +174,6 @@ def request_generation(
             yield f"Error: Audio transcription failed: {e}"
             return
-    # معالجة تحويل النص إلى صوت (TTS)
     if model_name == TTS_MODEL or output_format == "audio":
         task_type = "text_to_speech"
         try:
@@ -232,7 +193,6 @@ def request_generation(
             yield f"Error: Text-to-speech failed: {e}"
             return
-    # معالجة الصور
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
@@ -263,27 +223,26 @@ def request_generation(
             yield f"Error: Image analysis failed: {e}"
             return
-    # تحسين system_prompt بناءً على نوع المهمة
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
-        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
-        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
-        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
-        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
     else:
-        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
     if len(message.split()) < 5:
-        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
@@ -323,7 +282,6 @@ def request_generation(
         saw_visible_output = False
         last_tool_name = None
         last_tool_args = None
-        buffer = ""
         for chunk in stream:
             if chunk.choices[0].delta.content:
@@ -372,7 +330,7 @@ def request_generation(
                     reasoning_closed = True
                 if not saw_visible_output:
-                    msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
@@ -386,15 +344,15 @@ def request_generation(
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
-                    cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                    yield "Response truncated due to token limit. Please refine your query or request continuation."
                 break
         if buffer:
             cached_chunks.append(buffer)
             yield buffer
-        if output_format == "audio" and buffer:
             try:
                 model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
                 processor = AutoProcessor.from_pretrained(TTS_MODEL)
@@ -413,7 +371,7 @@ def request_generation(
         cache[cache_key] = cached_chunks
     except Exception as e:
-        logger.exception(f"[Gateway] Streaming failed for model {model_name}: {e}")
         if selected_api_key != BACKUP_HF_TOKEN and BACKUP_HF_TOKEN:
             logger.warning(f"Retrying with backup token for {model_name}")
             for chunk in request_generation(
@@ -455,6 +413,7 @@ def request_generation(
                     tools=[],
                     tool_choice="none",
                 )
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
@@ -480,27 +439,27 @@ def request_generation(
                             buffer = ""
                         continue
-                        if chunk.choices[0].finish_reason in ("stop", "error", "length"):
-                            if buffer:
-                                cached_chunks.append(buffer)
-                                yield buffer
-                                buffer = ""
-                            if reasoning_started and not reasoning_closed:
-                                cached_chunks.append("assistantfinal")
-                                yield "assistantfinal"
-                                reasoning_closed = True
-                            if not saw_visible_output:
-                                cached_chunks.append("No visible output produced.")
-                                yield "No visible output produced."
-                            if chunk.choices[0].finish_reason == "error":
-                                cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
-                                yield f"Error: Unknown error with fallback model {fallback_model}"
-                            elif chunk.choices[0].finish_reason == "length":
-                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                                yield "Response truncated due to token limit. Please refine your query or request continuation."
-                            break
                 if buffer and output_format == "audio":
                     try:
@@ -521,7 +480,7 @@ def request_generation(
                 cache[cache_key] = cached_chunks
             except Exception as e2:
-                logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
                 try:
                     is_available, selected_api_key, selected_endpoint = check_model_availability(TERTIARY_MODEL_NAME, selected_api_key)
                     if not is_available:
@@ -537,6 +496,7 @@ def request_generation(
                         tools=[],
                         tool_choice="none",
                     )
                     for chunk in stream:
                         if chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
@@ -559,8 +519,8 @@ def request_generation(
                                 cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
                                 yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
                             elif chunk.choices[0].finish_reason == "length":
-                                cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
-                                yield "Response truncated due to token limit. Please refine your query or request continuation."
                             break
                     if buffer and output_format == "audio":
                         try:
@@ -579,8 +539,8 @@ def request_generation(
                             yield f"Error: Text-to-speech conversion failed: {e}"
                     cache[cache_key] = cached_chunks
                 except Exception as e3:
-                    logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
-                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
                     return
         else:
             yield f"Error: Failed to load model {model_name}: {e}"
@@ -634,7 +594,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
             "type": "function",
             "function": {
                 "name": "code_generation",
-                "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
                 "parameters": {
                     "type": "object",
                     "properties": {

 import os
 import re
 import json
     {"left": "\\(", "right": "\\)", "display": False},
 ]
+# إعداد العميل لـ Hugging Face API
 HF_TOKEN = os.getenv("HF_TOKEN")
 BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
 ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
 SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
+TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")  # استبدال Qwen بنموذج متاح
 CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
 CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
 ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
+# تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
 PROVIDER_ENDPOINTS = {
+    "huggingface": API_ENDPOINT  # استخدام Hugging Face فقط
 }
 def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
             timeout=30
         )
         if response.status_code == 200:
+            logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
+            return True, api_key, API_ENDPOINT
         elif response.status_code == 429 and BACKUP_HF_TOKEN and api_key != BACKUP_HF_TOKEN:
             logger.warning(f"Rate limit reached for token {api_key}. Switching to backup token.")
             return check_model_availability(model_name, BACKUP_HF_TOKEN)
         return False, api_key, API_ENDPOINT
 def select_model(query: str, input_type: str = "text", preferred_model: Optional[str] = None) -> tuple[str, str]:
     if preferred_model and preferred_model in MODEL_ALIASES:
         model_name = MODEL_ALIASES[preferred_model]
         is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
             return model_name, endpoint
     query_lower = query.lower()
     if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
         logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
         return ASR_MODEL, FALLBACK_API_ENDPOINT
     if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]) or input_type == "tts":
         logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
         return TTS_MODEL, FALLBACK_API_ENDPOINT
     image_patterns = [
         r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
         r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
         if re.search(pattern, query_lower, re.IGNORECASE):
             logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
             return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
     available_models = [
         (MODEL_NAME, API_ENDPOINT),
         (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
     client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
     task_type = "general"
     enhanced_system_prompt = system_prompt
+    buffer = ""  # تعريف buffer هنا لتجنب UnboundLocalError
     if model_name == ASR_MODEL and audio_data:
         task_type = "audio_transcription"
         try:
             yield f"Error: Audio transcription failed: {e}"
             return
     if model_name == TTS_MODEL or output_format == "audio":
         task_type = "text_to_speech"
         try:
             yield f"Error: Text-to-speech failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
         task_type = "image_analysis"
         try:
             yield f"Error: Image analysis failed: {e}"
             return
     if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
         task_type = "image"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
     elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
         task_type = "code"
+        enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
     elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
         task_type = "analysis"
+        enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
     elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
         task_type = "review"
+        enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
     elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
         task_type = "publish"
+        enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
     else:
+        enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
     if len(message.split()) < 5:
+        enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response."
     logger.info(f"Task type detected: {task_type}")
     input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
         saw_visible_output = False
         last_tool_name = None
         last_tool_args = None
         for chunk in stream:
             if chunk.choices[0].delta.content:
                     reasoning_closed = True
                 if not saw_visible_output:
+                    msg = "I attempted to call a tool, but tools aren't executed in this environment."
                     if last_tool_name:
                         try:
                             args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
                     cached_chunks.append(f"Error: Unknown error")
                     yield f"Error: Unknown error"
                 elif chunk.choices[0].finish_reason == "length":
+                    cached_chunks.append("Response truncated due to token limit.")
+                    yield "Response truncated due to token limit."
                 break
         if buffer:
             cached_chunks.append(buffer)
             yield buffer
+        if output_format == "audio":
             try:
                 model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
                 processor = AutoProcessor.from_pretrained(TTS_MODEL)
         cache[cache_key] = cached_chunks
     except Exception as e:
+        logger.error(f"[Gateway] Streaming failed for model {model_name}: {e}")
         if selected_api_key != BACKUP_HF_TOKEN and BACKUP_HF_TOKEN:
             logger.warning(f"Retrying with backup token for {model_name}")
             for chunk in request_generation(
                     tools=[],
                     tool_choice="none",
                 )
+                buffer = ""  # تعريف buffer للنموذج البديل
                 for chunk in stream:
                     if chunk.choices[0].delta.content:
                         content = chunk.choices[0].delta.content
                             buffer = ""
                         continue
+                    if chunk.choices[0].finish_reason in ("stop", "error", "length"):
+                        if buffer:
+                            cached_chunks.append(buffer)
+                            yield buffer
+                            buffer = ""
+                        if reasoning_started and not reasoning_closed:
+                            cached_chunks.append("assistantfinal")
+                            yield "assistantfinal"
+                            reasoning_closed = True
+                        if not saw_visible_output:
+                            cached_chunks.append("No visible output produced.")
+                            yield "No visible output produced."
+                        if chunk.choices[0].finish_reason == "error":
+                            cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
+                            yield f"Error: Unknown error with fallback model {fallback_model}"
+                        elif chunk.choices[0].finish_reason == "length":
+                            cached_chunks.append("Response truncated due to token limit.")
+                            yield "Response truncated due to token limit."
+                        break
                 if buffer and output_format == "audio":
                     try:
                 cache[cache_key] = cached_chunks
             except Exception as e2:
+                logger.error(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
                 try:
                     is_available, selected_api_key, selected_endpoint = check_model_availability(TERTIARY_MODEL_NAME, selected_api_key)
                     if not is_available:
                         tools=[],
                         tool_choice="none",
                     )
+                    buffer = ""  # تعريف buffer للنموذج الثالث
                     for chunk in stream:
                         if chunk.choices[0].delta.content:
                             content = chunk.choices[0].delta.content
                                 cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
                                 yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
                             elif chunk.choices[0].finish_reason == "length":
+                                cached_chunks.append("Response truncated due to token limit.")
+                                yield "Response truncated due to token limit."
                             break
                     if buffer and output_format == "audio":
                         try:
                             yield f"Error: Text-to-speech conversion failed: {e}"
                     cache[cache_key] = cached_chunks
                 except Exception as e3:
+                    logger.error(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
+                    yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME})."
                     return
         else:
             yield f"Error: Failed to load model {model_name}: {e}"
             "type": "function",
             "function": {
                 "name": "code_generation",
+                "description": "Generate or modify code for various frameworks",
                 "parameters": {
                     "type": "object",
                     "properties": {

utils/web_search.py CHANGED Viewed

@@ -1,9 +1,8 @@
-#web_search.py
 import os
 import requests
 from bs4 import BeautifulSoup
 import logging
-import time  # لإضافة التأخير
 logger = logging.getLogger(__name__)
@@ -14,7 +13,10 @@ def web_search(query: str) -> str:
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
-        response = requests.get(url, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
@@ -25,8 +27,8 @@ def web_search(query: str) -> str:
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
-                time.sleep(2)  # إضافة تأخير 2 ثواني بين كل طلب
-                page_response = requests.get(link, timeout=10)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
@@ -39,4 +41,3 @@ def web_search(query: str) -> str:
     except Exception as e:
         logger.exception("Web search failed")
         return f"Web search error: {e}"

 import os
 import requests
 from bs4 import BeautifulSoup
 import logging
+import time
 logger = logging.getLogger(__name__)
         if not google_api_key or not google_cse_id:
             return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
         url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+        }
+        response = requests.get(url, headers=headers, timeout=10)
         response.raise_for_status()
         results = response.json().get("items", [])
         if not results:
             snippet = item.get("snippet", "")
             link = item.get("link", "")
             try:
+                time.sleep(2)
+                page_response = requests.get(link, headers=headers, timeout=10)
                 page_response.raise_for_status()
                 soup = BeautifulSoup(page_response.text, "html.parser")
                 paragraphs = soup.find_all("p")
     except Exception as e:
         logger.exception("Web search failed")
         return f"Web search error: {e}"