MGZON commited on
Commit
705031b
·
1 Parent(s): ece4f32

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +426 -0
main.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import logging
4
+ import gradio as gr
5
+ from openai import OpenAI
6
+ from pydoc import html
7
+ from typing import List, Generator, Optional
8
+ import requests
9
+ from bs4 import BeautifulSoup
10
+ import re
11
+
12
+ # تعريف LATEX_DELIMS
13
+ LATEX_DELIMS = [
14
+ {"left": "$$", "right": "$$", "display": True},
15
+ {"left": "$", "right": "$", "display": False},
16
+ {"left": "\\[", "right": "\\]", "display": True},
17
+ {"left": "\\(", "right": "\\)", "display": False},
18
+ ]
19
+
20
+ # إعداد التسجيل
21
+ logging.basicConfig(level=logging.INFO)
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # تحقق من الملفات في /app/ (للتصحيح)
25
+ logger.info("Files in /app/: %s", os.listdir("/app"))
26
+
27
+ # إعداد العميل لـ Hugging Face Inference API
28
+ HF_TOKEN = os.getenv("HF_TOKEN")
29
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co/v1")
30
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:cerebras")
31
+ SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "MGZON/mgzon-flan-t5-base")
32
+ if not HF_TOKEN:
33
+ logger.error("HF_TOKEN is not set in environment variables.")
34
+ raise ValueError("HF_TOKEN is required for Inference API.")
35
+ client = OpenAI(api_key=HF_TOKEN, base_url=API_ENDPOINT)
36
+
37
+ # إعدادات الـ queue
38
+ QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", 80))
39
+ CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
40
+
41
+ # كلمات مفتاحية لتحديد إذا كان السؤال متعلق بـ MGZon
42
+ MGZON_KEYWORDS = [
43
+ "mgzon", "mgzon products", "mgzon services", "mgzon data", "mgzon platform",
44
+ "mgzon features", "mgzon mission", "mgzon technology", "mgzon solutions"
45
+ ]
46
+
47
+ # دالة لاختيار النموذج تلقائيًا
48
+ def select_model(query: str) -> str:
49
+ """Selects the appropriate model based on the query content."""
50
+ query_lower = query.lower()
51
+ for keyword in MGZON_KEYWORDS:
52
+ if keyword in query_lower:
53
+ logger.info(f"Selected {SECONDARY_MODEL_NAME} for MGZon-related query: {query}")
54
+ return SECONDARY_MODEL_NAME
55
+ logger.info(f"Selected {MODEL_NAME} for general query: {query}")
56
+ return MODEL_NAME
57
+
58
+ # دالة بحث ويب محسنة
59
+ def web_search(query: str) -> str:
60
+ try:
61
+ google_api_key = os.getenv("GOOGLE_API_KEY")
62
+ google_cse_id = os.getenv("GOOGLE_CSE_ID")
63
+ if not google_api_key or not google_cse_id:
64
+ return "Web search requires GOOGLE_API_KEY and GOOGLE_CSE_ID to be set."
65
+
66
+ url = f"https://www.googleapis.com/customsearch/v1?key={google_api_key}&cx={google_cse_id}&q={query}"
67
+ response = requests.get(url)
68
+ response.raise_for_status()
69
+ results = response.json().get("items", [])
70
+ if not results:
71
+ return "No web results found."
72
+
73
+ # جمع النتايج
74
+ search_results = []
75
+ for i, item in enumerate(results[:3]): # نأخذ أول 3 نتايج
76
+ title = item.get("title", "")
77
+ snippet = item.get("snippet", "")
78
+ link = item.get("link", "")
79
+ # محاولة استخراج محتوى الصفحة
80
+ try:
81
+ page_response = requests.get(link, timeout=5)
82
+ page_response.raise_for_status()
83
+ soup = BeautifulSoup(page_response.text, "html.parser")
84
+ # استخراج النصوص من الصفحة (فقط الفقرات)
85
+ paragraphs = soup.find_all("p")
86
+ page_content = " ".join([p.get_text() for p in paragraphs][:500]) # نأخذ أول 500 حرف
87
+ except Exception as e:
88
+ logger.warning(f"Failed to fetch page content for {link}: {e}")
89
+ page_content = snippet
90
+ search_results.append(f"Result {i+1}:\nTitle: {title}\nLink: {link}\nContent: {page_content}\n")
91
+
92
+ return "\n".join(search_results)
93
+ except Exception as e:
94
+ logger.exception("Web search failed")
95
+ return f"Web search error: {e}"
96
+
97
+ # دالة request_generation (محدثة لدعم المهام المتعددة)
98
+ def request_generation(
99
+ api_key: str,
100
+ api_base: str,
101
+ message: str,
102
+ system_prompt: str,
103
+ model_name: str,
104
+ chat_history: Optional[List[dict]] = None,
105
+ temperature: float = 0.9,
106
+ max_new_tokens: int = 2048,
107
+ reasoning_effort: str = "off",
108
+ tools: Optional[List[dict]] = None,
109
+ tool_choice: Optional[str] = None,
110
+ deep_search: bool = False,
111
+ ) -> Generator[str, None, None]:
112
+ """Streams Responses API events. Emits:
113
+ - "analysis" sentinel once, then raw reasoning deltas
114
+ - "assistantfinal" sentinel once, then visible output deltas
115
+ If no visible deltas, emits a tool-call fallback message."""
116
+ client = OpenAI(api_key=api_key, base_url=api_base)
117
+
118
+ # تحديد نوع المهمة بناءً على السؤال
119
+ task_type = "general"
120
+ if "code" in message.lower() or "programming" in message.lower() or any(ext in message.lower() for ext in ["python", "javascript", "react", "django", "flask"]):
121
+ task_type = "code"
122
+ enhanced_system_prompt = f"{system_prompt}\nYou are an expert programmer. Provide accurate, well-commented code with examples and explanations. Support frameworks like React, Django, Flask, and others as needed."
123
+ elif any(keyword in message.lower() for keyword in ["analyze", "analysis", "تحليل"]):
124
+ task_type = "analysis"
125
+ enhanced_system_prompt = f"{system_prompt}\nProvide detailed analysis with step-by-step reasoning, examples, and data-driven insights."
126
+ elif any(keyword in message.lower() for keyword in ["review", "مراجعة"]):
127
+ task_type = "review"
128
+ enhanced_system_prompt = f"{system_prompt}\nReview the provided content thoroughly, identify issues, and suggest improvements with detailed explanations."
129
+ elif any(keyword in message.lower() for keyword in ["publish", "نشر"]):
130
+ task_type = "publish"
131
+ enhanced_system_prompt = f"{system_prompt}\nPrepare content for publishing, ensuring clarity, professionalism, and adherence to best practices."
132
+ else:
133
+ enhanced_system_prompt = f"{system_prompt}\nPlease provide detailed and comprehensive responses, including explanations, examples, and relevant details where applicable."
134
+
135
+ logger.info(f"Task type detected: {task_type}")
136
+
137
+ # تنظيف الـ messages من metadata
138
+ input_messages: List[dict] = [{"role": "system", "content": enhanced_system_prompt}]
139
+ if chat_history:
140
+ for msg in chat_history:
141
+ clean_msg = {"role": msg.get("role"), "content": msg.get("content")}
142
+ if clean_msg["content"]:
143
+ input_messages.append(clean_msg)
144
+
145
+ # إذا كان DeepSearch مفعّل أو السؤال عام، أضف نتائج البحث
146
+ if deep_search or model_name == MODEL_NAME:
147
+ search_result = web_search(message)
148
+ input_messages.append({"role": "user", "content": f"User query: {message}\nWeb search context: {search_result}"})
149
+ else:
150
+ input_messages.append({"role": "user", "content": message})
151
+
152
+ # إعداد tools و tool_choice (فقط لـ GPT-based models)
153
+ tools = tools if tools and "gpt-oss" in model_name else []
154
+ tool_choice = tool_choice if tool_choice in ["auto", "none", "any", "required"] and "gpt-oss" in model_name else "none"
155
+
156
+ try:
157
+ stream = client.chat.completions.create(
158
+ model=model_name,
159
+ messages=input_messages,
160
+ temperature=temperature,
161
+ max_tokens=max_new_tokens,
162
+ stream=True,
163
+ tools=tools,
164
+ tool_choice=tool_choice,
165
+ )
166
+
167
+ reasoning_started = False
168
+ reasoning_closed = False
169
+ saw_visible_output = False
170
+ last_tool_name = None
171
+ last_tool_args = None
172
+ buffer = ""
173
+
174
+ for chunk in stream:
175
+ if chunk.choices[0].delta.content:
176
+ content = chunk.choices[0].delta.content
177
+ if content == "<|channel|>analysis<|message|>":
178
+ if not reasoning_started:
179
+ yield "analysis"
180
+ reasoning_started = True
181
+ continue
182
+ if content == "<|channel|>final<|message|>":
183
+ if reasoning_started and not reasoning_closed:
184
+ yield "assistantfinal"
185
+ reasoning_closed = True
186
+ continue
187
+
188
+ saw_visible_output = True
189
+ buffer += content
190
+
191
+ if "\n" in buffer or len(buffer) > 150:
192
+ yield buffer
193
+ buffer = ""
194
+ continue
195
+
196
+ if chunk.choices[0].delta.tool_calls and "gpt-oss" in model_name:
197
+ tool_call = chunk.choices[0].delta.tool_calls[0]
198
+ name = getattr(tool_call, "function", {}).get("name", None)
199
+ args = getattr(tool_call, "function", {}).get("arguments", None)
200
+ if name:
201
+ last_tool_name = name
202
+ if args:
203
+ last_tool_args = args
204
+ continue
205
+
206
+ if chunk.choices[0].finish_reason in ("stop", "tool_calls", "error"):
207
+ if buffer:
208
+ yield buffer
209
+ buffer = ""
210
+
211
+ if reasoning_started and not reasoning_closed:
212
+ yield "assistantfinal"
213
+ reasoning_closed = True
214
+
215
+ if not saw_visible_output:
216
+ msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced."
217
+ if last_tool_name:
218
+ try:
219
+ args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str)
220
+ except Exception:
221
+ args_text = str(last_tool_args)
222
+ msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`"
223
+ yield msg
224
+
225
+ if chunk.choices[0].finish_reason == "error":
226
+ yield f"Error: Unknown error"
227
+ break
228
+
229
+ if buffer:
230
+ yield buffer
231
+
232
+ except Exception as e:
233
+ logger.exception("[Gateway] Streaming failed")
234
+ yield f"Error: {e}"
235
+
236
+ # وظيفة التنسيق النهائي
237
+ def format_final(analysis_text: str, visible_text: str) -> str:
238
+ """Render final message with collapsible analysis + normal Markdown answer."""
239
+ reasoning_safe = html.escape((analysis_text or "").strip())
240
+ response = (visible_text or "").strip()
241
+ return (
242
+ "<details><summary><strong>🤔 Analysis</strong></summary>\n"
243
+ "<pre style='white-space:pre-wrap;'>"
244
+ f"{reasoning_safe}"
245
+ "</pre>\n</details>\n\n"
246
+ "**💬 Response:**\n\n"
247
+ f"{response}"
248
+ )
249
+
250
+ # وظيفة التوليد مع محاكاة streaming
251
+ def generate(message, history, system_prompt, temperature, reasoning_effort, enable_browsing, max_new_tokens):
252
+ if not message.strip():
253
+ yield "Please enter a prompt."
254
+ return
255
+
256
+ # اختيار النموذج تلقائيًا
257
+ model_name = select_model(message)
258
+
259
+ # Flatten gradio history وتنظيف metadata
260
+ chat_history = []
261
+ for h in history:
262
+ if isinstance(h, dict):
263
+ clean_msg = {"role": h.get("role"), "content": h.get("content")}
264
+ if clean_msg["content"]:
265
+ chat_history.append(clean_msg)
266
+ elif isinstance(h, (list, tuple)) and len(h) == 2:
267
+ u, a = h
268
+ if u: chat_history.append({"role": "user", "content": u})
269
+ if a: chat_history.append({"role": "assistant", "content": a})
270
+
271
+ # إعداد الأدوات
272
+ tools = [
273
+ {
274
+ "type": "function",
275
+ "function": {
276
+ "name": "web_search_preview",
277
+ "description": "Perform a web search to gather additional context",
278
+ "parameters": {
279
+ "type": "object",
280
+ "properties": {"query": {"type": "string", "description": "Search query"}},
281
+ "required": ["query"],
282
+ },
283
+ },
284
+ },
285
+ {
286
+ "type": "function",
287
+ "function": {
288
+ "name": "code_generation",
289
+ "description": "Generate or modify code for various frameworks (React, Django, Flask, etc.)",
290
+ "parameters": {
291
+ "type": "object",
292
+ "properties": {
293
+ "code": {"type": "string", "description": "Existing code to modify or empty for new code"},
294
+ "framework": {"type": "string", "description": "Framework (e.g., React, Django, Flask)"},
295
+ "task": {"type": "string", "description": "Task description (e.g., create a component, fix a bug)"},
296
+ },
297
+ "required": ["task"],
298
+ },
299
+ },
300
+ }
301
+ ] if "gpt-oss" in model_name else []
302
+ tool_choice = "auto" if "gpt-oss" in model_name else "none"
303
+
304
+ in_analysis = False
305
+ in_visible = False
306
+ raw_analysis = ""
307
+ raw_visible = ""
308
+ raw_started = False
309
+ last_flush_len = 0
310
+
311
+ def make_raw_preview() -> str:
312
+ return (
313
+ "```text\n"
314
+ "Analysis (live):\n"
315
+ f"{raw_analysis}\n\n"
316
+ "Response (draft):\n"
317
+ f"{raw_visible}\n"
318
+ "```"
319
+ )
320
+
321
+ try:
322
+ # استدعاء request_generation
323
+ stream = request_generation(
324
+ api_key=HF_TOKEN,
325
+ api_base=API_ENDPOINT,
326
+ message=message,
327
+ system_prompt=system_prompt,
328
+ model_name=model_name,
329
+ chat_history=chat_history,
330
+ temperature=temperature,
331
+ max_new_tokens=max_new_tokens,
332
+ tools=tools,
333
+ tool_choice=tool_choice,
334
+ deep_search=enable_browsing or model_name == MODEL_NAME,
335
+ )
336
+
337
+ for chunk in stream:
338
+ if chunk == "analysis":
339
+ in_analysis, in_visible = True, False
340
+ if not raw_started:
341
+ raw_started = True
342
+ yield make_raw_preview()
343
+ continue
344
+ if chunk == "assistantfinal":
345
+ in_analysis, in_visible = False, True
346
+ if not raw_started:
347
+ raw_started = True
348
+ yield make_raw_preview()
349
+ continue
350
+
351
+ if in_analysis:
352
+ raw_analysis += chunk
353
+ elif in_visible:
354
+ raw_visible += chunk
355
+ else:
356
+ raw_visible += chunk
357
+
358
+ total_len = len(raw_analysis) + len(raw_visible)
359
+ if total_len - last_flush_len >= 120 or "\n" in chunk:
360
+ last_flush_len = total_len
361
+ yield make_raw_preview()
362
+
363
+ final_markdown = format_final(raw_analysis, raw_visible)
364
+ if final_markdown.count("$") % 2:
365
+ final_markdown += "$"
366
+ yield final_markdown
367
+
368
+ except Exception as e:
369
+ logger.exception("Stream failed")
370
+ yield f"❌ Error: {e}"
371
+
372
+ # إعداد CSS
373
+ css = """
374
+ .gradio-container { max-width: 800px; margin: auto; }
375
+ .chatbot { border: 1px solid #ccc; border-radius: 10px; }
376
+ .input-textbox { font-size: 16px; }
377
+ """
378
+
379
+ # إعداد واجهة Gradio
380
+ chatbot_ui = gr.ChatInterface(
381
+ fn=generate,
382
+ type="messages",
383
+ chatbot=gr.Chatbot(
384
+ label="MGZon Chatbot",
385
+ type="messages",
386
+ height=600,
387
+ latex_delimiters=LATEX_DELIMS,
388
+ ),
389
+ additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=True),
390
+ additional_inputs=[
391
+ gr.Textbox(label="System prompt", value="You are a helpful assistant capable of code generation, analysis, review, and more.", lines=2),
392
+ gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.9),
393
+ gr.Radio(label="Reasoning Effort", choices=["low", "medium", "high"], value="medium"),
394
+ gr.Checkbox(label="Enable DeepSearch (web browsing)", value=True),
395
+ gr.Slider(label="Max New Tokens", minimum=50, maximum=2048, step=50, value=2048),
396
+ ],
397
+ stop_btn="Stop",
398
+ examples=[
399
+ ["Explain the difference between supervised and unsupervised learning."],
400
+ ["Generate a React component for a login form."],
401
+ ["Review this Python code: print('Hello World')"],
402
+ ["Analyze the performance of a Django REST API."],
403
+ ["Tell me about MGZon products and services."],
404
+ ["Create a Flask route for user authentication."],
405
+ ["What are the latest trends in AI?"],
406
+ ["Provide guidelines for publishing a technical blog post."],
407
+ ],
408
+ title="MGZon Chatbot",
409
+ description="A versatile chatbot powered by GPT-OSS-120B and MGZon-Flan-T5-Base (auto-selected based on query). Supports code generation, analysis, review, web search, and MGZon-specific queries. Licensed under Apache 2.0. ***DISCLAIMER:*** Analysis may contain internal thoughts not suitable for final response.",
410
+ theme="gradio/soft",
411
+ css=css,
412
+ )
413
+
414
+ # دمج FastAPI مع Gradio
415
+ from fastapi import FastAPI
416
+ from gradio import mount_gradio_app
417
+
418
+ app = FastAPI(title="MGZon Chatbot API")
419
+ app = mount_gradio_app(app, chatbot_ui, path="/")
420
+
421
+ # تشغيل الخادم
422
+ if __name__ == "__main__":
423
+ import uvicorn
424
+ chatbot_ui.queue(max_size=QUEUE_SIZE, concurrency_count=CONCURRENCY_LIMIT).launch(
425
+ server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)), share=False
426
+ )