Mark-Lasfar commited on
Commit
f2cc606
·
1 Parent(s): 9aa52ab

Update Model

Browse files
Files changed (3) hide show
  1. api/endpoints.py +8 -18
  2. utils/generation.py +50 -90
  3. utils/web_search.py +7 -6
api/endpoints.py CHANGED
@@ -1,4 +1,3 @@
1
- # api/endpoints.py
2
  import os
3
  import uuid
4
  from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
@@ -31,11 +30,11 @@ if not BACKUP_HF_TOKEN:
31
  logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
32
 
33
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
34
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api.cerebras.ai/v1") # تغيير الافتراضي لـ Cerebras
35
- FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co")
36
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b") # النموذج الرئيسي
37
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
38
- TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "Qwen/Qwen2.5-0.5B-Instruct")
39
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
40
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
41
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
@@ -88,7 +87,6 @@ async def handle_session(request: Request):
88
  # Helper function to enhance system prompt for Arabic language
89
  def enhance_system_prompt(system_prompt: str, message: str, user: Optional[User] = None) -> str:
90
  enhanced_prompt = system_prompt
91
- # Check if the message is in Arabic
92
  if any(0x0600 <= ord(char) <= 0x06FF for char in message):
93
  enhanced_prompt += "\nRespond in Arabic with clear, concise, and accurate information tailored to the user's query."
94
  if user and user.additional_info:
@@ -129,7 +127,7 @@ async def model_info():
129
  {"alias": "audio", "description": "Audio transcription model (default)"},
130
  {"alias": "tts", "description": "Text-to-speech model (default)"}
131
  ],
132
- "api_base": ROUTER_API_URL,
133
  "fallback_api_base": FALLBACK_API_ENDPOINT,
134
  "status": "online"
135
  }
@@ -173,11 +171,9 @@ async def chat_endpoint(
173
  db.add(user_msg)
174
  db.commit()
175
 
176
- # Use user's preferred model if set
177
  preferred_model = user.preferred_model if user else None
178
  model_name, api_endpoint = select_model(req.message, input_type="text", preferred_model=preferred_model)
179
 
180
- # Check model availability
181
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
182
  if not is_available:
183
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -227,7 +223,7 @@ async def chat_endpoint(
227
  if not response.strip():
228
  logger.error("Empty response generated.")
229
  raise HTTPException(status_code=500, detail="Empty response generated from model.")
230
- logger.info(f"Chat response: {response[:100]}...") # Log first 100 chars
231
  except Exception as e:
232
  logger.error(f"Chat generation failed: {e}")
233
  raise HTTPException(status_code=500, detail=f"Chat generation failed: {str(e)}")
@@ -280,7 +276,6 @@ async def audio_transcription_endpoint(
280
 
281
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
282
 
283
- # Check model availability
284
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
285
  if not is_available:
286
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -346,7 +341,6 @@ async def text_to_speech_endpoint(
346
 
347
  model_name, api_endpoint = select_model("text to speech", input_type="tts")
348
 
349
- # Check model availability
350
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
351
  if not is_available:
352
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -400,7 +394,6 @@ async def code_endpoint(
400
  preferred_model = user.preferred_model if user else None
401
  model_name, api_endpoint = select_model(prompt, input_type="text", preferred_model=preferred_model)
402
 
403
- # Check model availability
404
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
405
  if not is_available:
406
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -473,7 +466,6 @@ async def analysis_endpoint(
473
  preferred_model = user.preferred_model if user else None
474
  model_name, api_endpoint = select_model(message, input_type="text", preferred_model=preferred_model)
475
 
476
- # Check model availability
477
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
478
  if not is_available:
479
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -561,7 +553,6 @@ async def image_analysis_endpoint(
561
  preferred_model = user.preferred_model if user else None
562
  model_name, api_endpoint = select_model("analyze image", input_type="image", preferred_model=preferred_model)
563
 
564
- # Check model availability
565
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
566
  if not is_available:
567
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
@@ -633,7 +624,7 @@ async def image_analysis_endpoint(
633
  raise HTTPException(status_code=500, detail=f"Image analysis failed: {str(e)}")
634
 
635
  @router.get("/api/test-model")
636
- async def test_model(model: str = MODEL_NAME, endpoint: str = ROUTER_API_URL):
637
  try:
638
  is_available, api_key, selected_endpoint = check_model_availability(model, HF_TOKEN)
639
  if not is_available:
@@ -766,11 +757,9 @@ async def update_user_settings(
766
  if not user:
767
  raise HTTPException(status_code=401, detail="Login required")
768
 
769
- # Validate preferred_model
770
  if settings.preferred_model and settings.preferred_model not in MODEL_ALIASES:
771
  raise HTTPException(status_code=400, detail="Invalid model alias")
772
 
773
- # Update user settings
774
  if settings.display_name is not None:
775
  user.display_name = settings.display_name
776
  if settings.preferred_model is not None:
@@ -801,3 +790,4 @@ async def update_user_settings(
801
  "is_active": user.is_active,
802
  "is_superuser": user.is_superuser
803
  }}
 
 
 
1
  import os
2
  import uuid
3
  from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
 
30
  logger.warning("BACKUP_HF_TOKEN is not set. Fallback to secondary model will not work if primary token fails.")
31
 
32
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
33
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
34
+ FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
35
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
36
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
37
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf")
38
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
39
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
40
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
 
87
  # Helper function to enhance system prompt for Arabic language
88
  def enhance_system_prompt(system_prompt: str, message: str, user: Optional[User] = None) -> str:
89
  enhanced_prompt = system_prompt
 
90
  if any(0x0600 <= ord(char) <= 0x06FF for char in message):
91
  enhanced_prompt += "\nRespond in Arabic with clear, concise, and accurate information tailored to the user's query."
92
  if user and user.additional_info:
 
127
  {"alias": "audio", "description": "Audio transcription model (default)"},
128
  {"alias": "tts", "description": "Text-to-speech model (default)"}
129
  ],
130
+ "api_base": API_ENDPOINT,
131
  "fallback_api_base": FALLBACK_API_ENDPOINT,
132
  "status": "online"
133
  }
 
171
  db.add(user_msg)
172
  db.commit()
173
 
 
174
  preferred_model = user.preferred_model if user else None
175
  model_name, api_endpoint = select_model(req.message, input_type="text", preferred_model=preferred_model)
176
 
 
177
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
178
  if not is_available:
179
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
223
  if not response.strip():
224
  logger.error("Empty response generated.")
225
  raise HTTPException(status_code=500, detail="Empty response generated from model.")
226
+ logger.info(f"Chat response: {response[:100]}...")
227
  except Exception as e:
228
  logger.error(f"Chat generation failed: {e}")
229
  raise HTTPException(status_code=500, detail=f"Chat generation failed: {str(e)}")
 
276
 
277
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
278
 
 
279
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
280
  if not is_available:
281
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
341
 
342
  model_name, api_endpoint = select_model("text to speech", input_type="tts")
343
 
 
344
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
345
  if not is_available:
346
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
394
  preferred_model = user.preferred_model if user else None
395
  model_name, api_endpoint = select_model(prompt, input_type="text", preferred_model=preferred_model)
396
 
 
397
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
398
  if not is_available:
399
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
466
  preferred_model = user.preferred_model if user else None
467
  model_name, api_endpoint = select_model(message, input_type="text", preferred_model=preferred_model)
468
 
 
469
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
470
  if not is_available:
471
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
553
  preferred_model = user.preferred_model if user else None
554
  model_name, api_endpoint = select_model("analyze image", input_type="image", preferred_model=preferred_model)
555
 
 
556
  is_available, api_key, selected_endpoint = check_model_availability(model_name, HF_TOKEN)
557
  if not is_available:
558
  logger.error(f"Model {model_name} is not available at {api_endpoint}")
 
624
  raise HTTPException(status_code=500, detail=f"Image analysis failed: {str(e)}")
625
 
626
  @router.get("/api/test-model")
627
+ async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
628
  try:
629
  is_available, api_key, selected_endpoint = check_model_availability(model, HF_TOKEN)
630
  if not is_available:
 
757
  if not user:
758
  raise HTTPException(status_code=401, detail="Login required")
759
 
 
760
  if settings.preferred_model and settings.preferred_model not in MODEL_ALIASES:
761
  raise HTTPException(status_code=400, detail="Invalid model alias")
762
 
 
763
  if settings.display_name is not None:
764
  user.display_name = settings.display_name
765
  if settings.preferred_model is not None:
 
790
  "is_active": user.is_active,
791
  "is_superuser": user.is_superuser
792
  }}
793
+
utils/generation.py CHANGED
@@ -1,4 +1,3 @@
1
- # utils/generation.py
2
  import os
3
  import re
4
  import json
@@ -31,40 +30,23 @@ LATEX_DELIMS = [
31
  {"left": "\\(", "right": "\\)", "display": False},
32
  ]
33
 
34
- # إعداد العميل لـ Hugging Face Router API
35
  HF_TOKEN = os.getenv("HF_TOKEN")
36
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
37
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
38
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
39
- FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co")
40
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b") # النموذج الرئيسي
41
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
42
- TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "Qwen/Qwen2.5-0.5B-Instruct")
43
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
44
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
45
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
46
  TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
47
 
48
- # Provider endpoints (بدون together)
49
  PROVIDER_ENDPOINTS = {
50
- "fireworks-ai": "https://api.fireworks.ai/inference/v1",
51
- "nebius": "https://api.nebius.ai/v1",
52
- "novita": "https://api.novita.ai/v1",
53
- "groq": "https://api.groq.com/openai/v1",
54
- "cerebras": "https://api.cerebras.ai/v1",
55
- "hyperbolic": "https://api.hyperbolic.xyz/v1",
56
- "nscale": "https://api.nscale.ai/v1"
57
- }
58
-
59
- # Model alias mapping
60
- MODEL_ALIASES = {
61
- "advanced": MODEL_NAME,
62
- "standard": SECONDARY_MODEL_NAME,
63
- "light": TERTIARY_MODEL_NAME,
64
- "image_base": CLIP_BASE_MODEL,
65
- "image_advanced": CLIP_LARGE_MODEL,
66
- "audio": ASR_MODEL,
67
- "tts": TTS_MODEL
68
  }
69
 
70
  def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
@@ -75,23 +57,8 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
75
  timeout=30
76
  )
77
  if response.status_code == 200:
78
- data = response.json().get("data", {})
79
- providers = data.get("providers", [])
80
- # Prefer "cerebras" if available
81
- for provider in providers:
82
- if provider.get("provider") == "cerebras" and provider.get("status") == "live":
83
- endpoint = PROVIDER_ENDPOINTS.get("cerebras", API_ENDPOINT)
84
- logger.info(f"Model {model_name} is available via preferred provider cerebras at {endpoint}")
85
- return True, api_key, endpoint
86
- # Fallback to first live provider if cerebras not available
87
- for provider in providers:
88
- if provider.get("status") == "live":
89
- provider_name = provider.get("provider")
90
- endpoint = PROVIDER_ENDPOINTS.get(provider_name, API_ENDPOINT)
91
- logger.info(f"Model {model_name} is available via provider {provider_name} at {endpoint}")
92
- return True, api_key, endpoint
93
- logger.error(f"No live providers found for model {model_name}")
94
- return False, api_key, API_ENDPOINT
95
  elif response.status_code == 429 and BACKUP_HF_TOKEN and api_key != BACKUP_HF_TOKEN:
96
  logger.warning(f"Rate limit reached for token {api_key}. Switching to backup token.")
97
  return check_model_availability(model_name, BACKUP_HF_TOKEN)
@@ -105,7 +72,6 @@ def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str,
105
  return False, api_key, API_ENDPOINT
106
 
107
  def select_model(query: str, input_type: str = "text", preferred_model: Optional[str] = None) -> tuple[str, str]:
108
- # If user has a preferred model, use it unless the input type requires a specific model
109
  if preferred_model and preferred_model in MODEL_ALIASES:
110
  model_name = MODEL_ALIASES[preferred_model]
111
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
@@ -114,15 +80,12 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
114
  return model_name, endpoint
115
 
116
  query_lower = query.lower()
117
- # دعم الصوت
118
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
119
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
120
  return ASR_MODEL, FALLBACK_API_ENDPOINT
121
- # دعم تحويل النص إلى صوت
122
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]) or input_type == "tts":
123
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
124
  return TTS_MODEL, FALLBACK_API_ENDPOINT
125
- # نماذج CLIP للصور
126
  image_patterns = [
127
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
128
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
@@ -131,7 +94,6 @@ def select_model(query: str, input_type: str = "text", preferred_model: Optional
131
  if re.search(pattern, query_lower, re.IGNORECASE):
132
  logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
133
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
134
- # اختيار النموذج بناءً على توفره
135
  available_models = [
136
  (MODEL_NAME, API_ENDPOINT),
137
  (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
@@ -188,8 +150,8 @@ def request_generation(
188
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
189
  task_type = "general"
190
  enhanced_system_prompt = system_prompt
 
191
 
192
- # معالجة الصوت (ASR)
193
  if model_name == ASR_MODEL and audio_data:
194
  task_type = "audio_transcription"
195
  try:
@@ -212,7 +174,6 @@ def request_generation(
212
  yield f"Error: Audio transcription failed: {e}"
213
  return
214
 
215
- # معالجة تحويل النص إلى صوت (TTS)
216
  if model_name == TTS_MODEL or output_format == "audio":
217
  task_type = "text_to_speech"
218
  try:
@@ -232,7 +193,6 @@ def request_generation(
232
  yield f"Error: Text-to-speech failed: {e}"
233
  return
234
 
235
- # معالجة الصور
236
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
237
  task_type = "image_analysis"
238
  try:
@@ -263,27 +223,26 @@ def request_generation(
263
  yield f"Error: Image analysis failed: {e}"
264
  return
265
 
266
- # تحسين system_prompt بناءً على نوع المهمة
267
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
268
  task_type = "image"
269
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query. Continue until the query is fully addressed."
270
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
271
  task_type = "code"
272
- enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations. Support frameworks like React, Django, Flask, and others. Format code with triple backticks (```) and specify the language. Continue until the task is fully addressed."
273
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
274
  task_type = "analysis"
275
- enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights. Continue until all aspects of the query are thoroughly covered."
276
  elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
277
  task_type = "review"
278
- enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations. Ensure the response is complete and detailed."
279
  elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
280
  task_type = "publish"
281
- enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices. Provide a complete and detailed response."
282
  else:
283
- enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable. Continue generating content until the query is fully answered, leveraging the full capacity of the model."
284
 
285
  if len(message.split()) < 5:
286
- enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response with examples, explanations, and additional context to ensure completeness."
287
 
288
  logger.info(f"Task type detected: {task_type}")
289
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
@@ -323,7 +282,6 @@ def request_generation(
323
  saw_visible_output = False
324
  last_tool_name = None
325
  last_tool_args = None
326
- buffer = ""
327
 
328
  for chunk in stream:
329
  if chunk.choices[0].delta.content:
@@ -372,7 +330,7 @@ def request_generation(
372
  reasoning_closed = True
373
 
374
  if not saw_visible_output:
375
- msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
376
  if last_tool_name:
377
  try:
378
  args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
@@ -386,15 +344,15 @@ def request_generation(
386
  cached_chunks.append(f"Error: Unknown error")
387
  yield f"Error: Unknown error"
388
  elif chunk.choices[0].finish_reason == "length":
389
- cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
390
- yield "Response truncated due to token limit. Please refine your query or request continuation."
391
  break
392
 
393
  if buffer:
394
  cached_chunks.append(buffer)
395
  yield buffer
396
 
397
- if output_format == "audio" and buffer:
398
  try:
399
  model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
400
  processor = AutoProcessor.from_pretrained(TTS_MODEL)
@@ -413,7 +371,7 @@ def request_generation(
413
  cache[cache_key] = cached_chunks
414
 
415
  except Exception as e:
416
- logger.exception(f"[Gateway] Streaming failed for model {model_name}: {e}")
417
  if selected_api_key != BACKUP_HF_TOKEN and BACKUP_HF_TOKEN:
418
  logger.warning(f"Retrying with backup token for {model_name}")
419
  for chunk in request_generation(
@@ -455,6 +413,7 @@ def request_generation(
455
  tools=[],
456
  tool_choice="none",
457
  )
 
458
  for chunk in stream:
459
  if chunk.choices[0].delta.content:
460
  content = chunk.choices[0].delta.content
@@ -480,27 +439,27 @@ def request_generation(
480
  buffer = ""
481
  continue
482
 
483
- if chunk.choices[0].finish_reason in ("stop", "error", "length"):
484
- if buffer:
485
- cached_chunks.append(buffer)
486
- yield buffer
487
- buffer = ""
488
-
489
- if reasoning_started and not reasoning_closed:
490
- cached_chunks.append("assistantfinal")
491
- yield "assistantfinal"
492
- reasoning_closed = True
493
 
494
- if not saw_visible_output:
495
- cached_chunks.append("No visible output produced.")
496
- yield "No visible output produced."
497
- if chunk.choices[0].finish_reason == "error":
498
- cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
499
- yield f"Error: Unknown error with fallback model {fallback_model}"
500
- elif chunk.choices[0].finish_reason == "length":
501
- cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
502
- yield "Response truncated due to token limit. Please refine your query or request continuation."
503
- break
 
 
 
 
 
504
 
505
  if buffer and output_format == "audio":
506
  try:
@@ -521,7 +480,7 @@ def request_generation(
521
  cache[cache_key] = cached_chunks
522
 
523
  except Exception as e2:
524
- logger.exception(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
525
  try:
526
  is_available, selected_api_key, selected_endpoint = check_model_availability(TERTIARY_MODEL_NAME, selected_api_key)
527
  if not is_available:
@@ -537,6 +496,7 @@ def request_generation(
537
  tools=[],
538
  tool_choice="none",
539
  )
 
540
  for chunk in stream:
541
  if chunk.choices[0].delta.content:
542
  content = chunk.choices[0].delta.content
@@ -559,8 +519,8 @@ def request_generation(
559
  cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
560
  yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
561
  elif chunk.choices[0].finish_reason == "length":
562
- cached_chunks.append("Response truncated due to token limit. Please refine your query or request continuation.")
563
- yield "Response truncated due to token limit. Please refine your query or request continuation."
564
  break
565
  if buffer and output_format == "audio":
566
  try:
@@ -579,8 +539,8 @@ def request_generation(
579
  yield f"Error: Text-to-speech conversion failed: {e}"
580
  cache[cache_key] = cached_chunks
581
  except Exception as e3:
582
- logger.exception(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
583
- yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME}). Please check your model configurations."
584
  return
585
  else:
586
  yield f"Error: Failed to load model {model_name}: {e}"
@@ -634,7 +594,7 @@ def generate(message, history, system_prompt, temperature, reasoning_effort, ena
634
  "type": "function",
635
  "function": {
636
  "name": "code_generation",
637
- "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
638
  "parameters": {
639
  "type": "object",
640
  "properties": {
 
 
1
  import os
2
  import re
3
  import json
 
30
  {"left": "\\(", "right": "\\)", "display": False},
31
  ]
32
 
33
+ # إعداد العميل لـ Hugging Face API
34
  HF_TOKEN = os.getenv("HF_TOKEN")
35
  BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
36
  ROUTER_API_URL = os.getenv("ROUTER_API_URL", "https://router.huggingface.co")
37
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
38
+ FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co/v1")
39
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
40
  SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
41
+ TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "meta-llama/Llama-3-8b-chat-hf") # استبدال Qwen بنموذج متاح
42
  CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
43
  CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
44
  ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
45
  TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
46
 
47
+ # تعطيل PROVIDER_ENDPOINTS لأننا بنستخدم Hugging Face فقط
48
  PROVIDER_ENDPOINTS = {
49
+ "huggingface": API_ENDPOINT # استخدام Hugging Face فقط
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
 
52
  def check_model_availability(model_name: str, api_key: str) -> tuple[bool, str, str]:
 
57
  timeout=30
58
  )
59
  if response.status_code == 200:
60
+ logger.info(f"Model {model_name} is available at {API_ENDPOINT}")
61
+ return True, api_key, API_ENDPOINT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  elif response.status_code == 429 and BACKUP_HF_TOKEN and api_key != BACKUP_HF_TOKEN:
63
  logger.warning(f"Rate limit reached for token {api_key}. Switching to backup token.")
64
  return check_model_availability(model_name, BACKUP_HF_TOKEN)
 
72
  return False, api_key, API_ENDPOINT
73
 
74
  def select_model(query: str, input_type: str = "text", preferred_model: Optional[str] = None) -> tuple[str, str]:
 
75
  if preferred_model and preferred_model in MODEL_ALIASES:
76
  model_name = MODEL_ALIASES[preferred_model]
77
  is_available, _, endpoint = check_model_availability(model_name, HF_TOKEN)
 
80
  return model_name, endpoint
81
 
82
  query_lower = query.lower()
 
83
  if input_type == "audio" or any(keyword in query_lower for keyword in ["voice", "audio", "speech", "صوت", "تحويل صوت"]):
84
  logger.info(f"Selected {ASR_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for audio input")
85
  return ASR_MODEL, FALLBACK_API_ENDPOINT
 
86
  if any(keyword in query_lower for keyword in ["text-to-speech", "tts", "تحويل نص إلى صوت"]) or input_type == "tts":
87
  logger.info(f"Selected {TTS_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for text-to-speech")
88
  return TTS_MODEL, FALLBACK_API_ENDPOINT
 
89
  image_patterns = [
90
  r"\bimage\b", r"\bpicture\b", r"\bphoto\b", r"\bvisual\b", r"\bصورة\b", r"\bتحليل\s+صورة\b",
91
  r"\bimage\s+analysis\b", r"\bimage\s+classification\b", r"\bimage\s+description\b"
 
94
  if re.search(pattern, query_lower, re.IGNORECASE):
95
  logger.info(f"Selected {CLIP_BASE_MODEL} with endpoint {FALLBACK_API_ENDPOINT} for image-related query: {query}")
96
  return CLIP_BASE_MODEL, FALLBACK_API_ENDPOINT
 
97
  available_models = [
98
  (MODEL_NAME, API_ENDPOINT),
99
  (SECONDARY_MODEL_NAME, FALLBACK_API_ENDPOINT),
 
150
  client = OpenAI(api_key=selected_api_key, base_url=selected_endpoint, timeout=120.0)
151
  task_type = "general"
152
  enhanced_system_prompt = system_prompt
153
+ buffer = "" # تعريف buffer هنا لتجنب UnboundLocalError
154
 
 
155
  if model_name == ASR_MODEL and audio_data:
156
  task_type = "audio_transcription"
157
  try:
 
174
  yield f"Error: Audio transcription failed: {e}"
175
  return
176
 
 
177
  if model_name == TTS_MODEL or output_format == "audio":
178
  task_type = "text_to_speech"
179
  try:
 
193
  yield f"Error: Text-to-speech failed: {e}"
194
  return
195
 
 
196
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL] and image_data:
197
  task_type = "image_analysis"
198
  try:
 
223
  yield f"Error: Image analysis failed: {e}"
224
  return
225
 
 
226
  if model_name in [CLIP_BASE_MODEL, CLIP_LARGE_MODEL]:
227
  task_type = "image"
228
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert in image analysis and description. Provide detailed descriptions, classifications, or analysis of images based on the query."
229
  elif any(keyword in message.lower() for keyword in ["code", "programming", "python", "javascript", "react", "django", "flask"]):
230
  task_type = "code"
231
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with comprehensive examples and detailed explanations."
232
  elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
233
  task_type = "analysis"
234
+ enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
235
  elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
236
  task_type = "review"
237
+ enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
238
  elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
239
  task_type = "publish"
240
+ enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
241
  else:
242
+ enhanced_system_prompt = f"{system_prompt}\nFor general queries, provide comprehensive, detailed responses with examples and explanations where applicable."
243
 
244
  if len(message.split()) < 5:
245
+ enhanced_system_prompt += "\nEven for short or general queries, provide a detailed, in-depth response."
246
 
247
  logger.info(f"Task type detected: {task_type}")
248
  input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
 
282
  saw_visible_output = False
283
  last_tool_name = None
284
  last_tool_args = None
 
285
 
286
  for chunk in stream:
287
  if chunk.choices[0].delta.content:
 
330
  reasoning_closed = True
331
 
332
  if not saw_visible_output:
333
+ msg = "I attempted to call a tool, but tools aren't executed in this environment."
334
  if last_tool_name:
335
  try:
336
  args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
 
344
  cached_chunks.append(f"Error: Unknown error")
345
  yield f"Error: Unknown error"
346
  elif chunk.choices[0].finish_reason == "length":
347
+ cached_chunks.append("Response truncated due to token limit.")
348
+ yield "Response truncated due to token limit."
349
  break
350
 
351
  if buffer:
352
  cached_chunks.append(buffer)
353
  yield buffer
354
 
355
+ if output_format == "audio":
356
  try:
357
  model = ParlerTTSForConditionalGeneration.from_pretrained(TTS_MODEL)
358
  processor = AutoProcessor.from_pretrained(TTS_MODEL)
 
371
  cache[cache_key] = cached_chunks
372
 
373
  except Exception as e:
374
+ logger.error(f"[Gateway] Streaming failed for model {model_name}: {e}")
375
  if selected_api_key != BACKUP_HF_TOKEN and BACKUP_HF_TOKEN:
376
  logger.warning(f"Retrying with backup token for {model_name}")
377
  for chunk in request_generation(
 
413
  tools=[],
414
  tool_choice="none",
415
  )
416
+ buffer = "" # تعريف buffer للنموذج البديل
417
  for chunk in stream:
418
  if chunk.choices[0].delta.content:
419
  content = chunk.choices[0].delta.content
 
439
  buffer = ""
440
  continue
441
 
442
+ if chunk.choices[0].finish_reason in ("stop", "error", "length"):
443
+ if buffer:
444
+ cached_chunks.append(buffer)
445
+ yield buffer
446
+ buffer = ""
 
 
 
 
 
447
 
448
+ if reasoning_started and not reasoning_closed:
449
+ cached_chunks.append("assistantfinal")
450
+ yield "assistantfinal"
451
+ reasoning_closed = True
452
+
453
+ if not saw_visible_output:
454
+ cached_chunks.append("No visible output produced.")
455
+ yield "No visible output produced."
456
+ if chunk.choices[0].finish_reason == "error":
457
+ cached_chunks.append(f"Error: Unknown error with fallback model {fallback_model}")
458
+ yield f"Error: Unknown error with fallback model {fallback_model}"
459
+ elif chunk.choices[0].finish_reason == "length":
460
+ cached_chunks.append("Response truncated due to token limit.")
461
+ yield "Response truncated due to token limit."
462
+ break
463
 
464
  if buffer and output_format == "audio":
465
  try:
 
480
  cache[cache_key] = cached_chunks
481
 
482
  except Exception as e2:
483
+ logger.error(f"[Gateway] Streaming failed for fallback model {fallback_model}: {e2}")
484
  try:
485
  is_available, selected_api_key, selected_endpoint = check_model_availability(TERTIARY_MODEL_NAME, selected_api_key)
486
  if not is_available:
 
496
  tools=[],
497
  tool_choice="none",
498
  )
499
+ buffer = "" # تعريف buffer للنموذج الثالث
500
  for chunk in stream:
501
  if chunk.choices[0].delta.content:
502
  content = chunk.choices[0].delta.content
 
519
  cached_chunks.append(f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}")
520
  yield f"Error: Unknown error with tertiary model {TERTIARY_MODEL_NAME}"
521
  elif chunk.choices[0].finish_reason == "length":
522
+ cached_chunks.append("Response truncated due to token limit.")
523
+ yield "Response truncated due to token limit."
524
  break
525
  if buffer and output_format == "audio":
526
  try:
 
539
  yield f"Error: Text-to-speech conversion failed: {e}"
540
  cache[cache_key] = cached_chunks
541
  except Exception as e3:
542
+ logger.error(f"[Gateway] Streaming failed for tertiary model {TERTIARY_MODEL_NAME}: {e3}")
543
+ yield f"Error: Failed to load all models: Primary ({model_name}), Secondary ({fallback_model}), Tertiary ({TERTIARY_MODEL_NAME})."
544
  return
545
  else:
546
  yield f"Error: Failed to load model {model_name}: {e}"
 
594
  "type": "function",
595
  "function": {
596
  "name": "code_generation",
597
+ "description": "Generate or modify code for various frameworks",
598
  "parameters": {
599
  "type": "object",
600
  "properties": {
utils/web_search.py CHANGED
@@ -1,9 +1,8 @@
1
- #web_search.py
2
  import os
3
  import requests
4
  from bs4 import BeautifulSoup
5
  import logging
6
- import time # لإضافة التأخير
7
 
8
  logger = logging.getLogger(__name__)
9
 
@@ -14,7 +13,10 @@ def web_search(query: str) -> str:
14
  if not google_api_key or not google_cse_id:
15
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
16
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
17
- response = requests.get(url, timeout=10)
 
 
 
18
  response.raise_for_status()
19
  results = response.json().get("items", [])
20
  if not results:
@@ -25,8 +27,8 @@ def web_search(query: str) -> str:
25
  snippet = item.get("snippet", "")
26
  link = item.get("link", "")
27
  try:
28
- time.sleep(2) # إضافة تأخير 2 ثواني بين كل طلب
29
- page_response = requests.get(link, timeout=10)
30
  page_response.raise_for_status()
31
  soup = BeautifulSoup(page_response.text, "html.parser")
32
  paragraphs = soup.find_all("p")
@@ -39,4 +41,3 @@ def web_search(query: str) -> str:
39
  except Exception as e:
40
  logger.exception("Web search failed")
41
  return f"Web search error: {e}"
42
-
 
 
1
  import os
2
  import requests
3
  from bs4 import BeautifulSoup
4
  import logging
5
+ import time
6
 
7
  logger = logging.getLogger(__name__)
8
 
 
13
  if not google_api_key or not google_cse_id:
14
  return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
15
  url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
16
+ headers = {
17
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
18
+ }
19
+ response = requests.get(url, headers=headers, timeout=10)
20
  response.raise_for_status()
21
  results = response.json().get("items", [])
22
  if not results:
 
27
  snippet = item.get("snippet", "")
28
  link = item.get("link", "")
29
  try:
30
+ time.sleep(2)
31
+ page_response = requests.get(link, headers=headers, timeout=10)
32
  page_response.raise_for_status()
33
  soup = BeautifulSoup(page_response.text, "html.parser")
34
  paragraphs = soup.find_all("p")
 
41
  except Exception as e:
42
  logger.exception("Web search failed")
43
  return f"Web search error: {e}"