Spaces:

MGZON
/

mgzon-app

Running

App Files Files Community

ibrahimlasfar commited on Aug 31

Commit

41c40b7

1 Parent(s): ae2582f

Update chatbot with audio/image support and fixed models

Browse files

Files changed (4) hide show

Dockerfile +1 -2
README.md +1 -1
api/endpoints.py +22 -23
requirements.txt +3 -3

Dockerfile CHANGED Viewed

@@ -3,13 +3,12 @@ FROM python:3.10-slim
 # Set working directory
 WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
-    ffmpeg \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

 # Set working directory
 WORKDIR /app
+# Install chromium-driver and build dependencies
 RUN apt-get update && apt-get install -y \
     chromium-driver \
     git \
     gcc \
     libc-dev \
     && apt-get clean && rm -rf /var/lib/apt/lists/*
 # Update pip

README.md CHANGED Viewed

@@ -3,7 +3,7 @@ title: MGZON Chat
 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
-sdk: gradio
 app_file: main.py
 pinned: false
 ---

 emoji: "🤖"
 colorFrom: "blue"
 colorTo: "green"
+sdk: docker
 app_file: main.py
 pinned: false
 ---

api/endpoints.py CHANGED Viewed

@@ -1,25 +1,22 @@
 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
-from fastapi.responses import StreamingResponse
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 from utils.web_search import web_search
-import io
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
-BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
-API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
-MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
-        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct"),
-        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
@@ -48,14 +45,16 @@ async def chat_endpoint(req: QueryRequest):
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
-    response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
     return {"response": response}
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
@@ -65,7 +64,7 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
-    ) if isinstance(chunk, str)])
     return {"transcription": response}
 @router.post("/api/text-to-speech")
@@ -82,7 +81,7 @@ async def text_to_speech_endpoint(req: dict):
         max_new_tokens=128000,
         input_type="text",
     )
-    audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
@@ -92,7 +91,7 @@ async def code_endpoint(req: dict):
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
@@ -100,14 +99,14 @@ async def code_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    ) if isinstance(chunk, str)])
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
@@ -115,24 +114,24 @@ async def analysis_endpoint(req: dict):
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-    ) if isinstance(chunk, str)])
     return {"analysis": response}
 @router.post("/api/image-analysis")
-async def image_analysis_endpoint(file: UploadFile = File(...)):
-    model_name, api_endpoint = select_model("image analysis", input_type="image")
-    image_data = await file.read()
-    response = "".join([chunk for chunk in request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
-        message="Analyze this image",
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
-        input_type="image",
-        image_data=image_data,
-    ) if isinstance(chunk, str)])
     return {"image_analysis": response}
 @router.get("/api/test-model")

 import os
 from fastapi import APIRouter, HTTPException, UploadFile, File
 from openai import OpenAI
 from api.models import QueryRequest
 from utils.generation import request_generation, select_model
 from utils.web_search import web_search
 router = APIRouter()
 HF_TOKEN = os.getenv("HF_TOKEN")
+API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
 @router.get("/api/model-info")
 def model_info():
     return {
         "model_name": MODEL_NAME,
+        "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
+        "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
         "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
         "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
         "api_base": API_ENDPOINT,
         max_new_tokens=req.max_new_tokens,
         deep_search=req.enable_browsing,
     )
+    response = "".join(list(stream))
     return {"response": response}
+# في api/endpoints.py
 @router.post("/api/audio-transcription")
 async def audio_transcription_endpoint(file: UploadFile = File(...)):
     model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
     audio_data = await file.read()
+    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message="Transcribe audio",
         max_new_tokens=128000,
         input_type="audio",
         audio_data=audio_data,
+    )))
     return {"transcription": response}
 @router.post("/api/text-to-speech")
         max_new_tokens=128000,
         input_type="text",
     )
+    audio_data = b"".join(list(response))
     return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
 @router.post("/api/code")
     code = req.get("code", "")
     prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
     model_name, api_endpoint = select_model(prompt)
+    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=prompt,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    )))
     return {"generated_code": response}
 @router.post("/api/analysis")
 async def analysis_endpoint(req: dict):
     message = req.get("text", "")
     model_name, api_endpoint = select_model(message)
+    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
         message=message,
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    )))
     return {"analysis": response}
 @router.post("/api/image-analysis")
+async def image_analysis_endpoint(req: dict):
+    image_url = req.get("image_url", "")
+    task = req.get("task", "describe")
+    prompt = f"Perform the following task on the image at {image_url}: {task}"
+    model_name, api_endpoint = select_model(prompt)
+    response = "".join(list(request_generation(
         api_key=HF_TOKEN,
         api_base=api_endpoint,
+        message=prompt,
         system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
         model_name=model_name,
         temperature=0.7,
         max_new_tokens=128000,
+    )))
     return {"image_analysis": response}
 @router.get("/api/test-model")

requirements.txt CHANGED Viewed

@@ -5,7 +5,7 @@ openai==1.42.0
 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
-requests==2.32.3
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
@@ -15,10 +15,10 @@ cachetools==5.5.0
 pydub==0.25.1
 ffmpeg-python==0.2.0
 numpy==1.26.4
-parler-tts==0.2.0
 torch==2.4.1
 torchaudio==2.4.1
-transformers==4.45.1
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7

 httpx==0.27.0
 python-dotenv==1.0.1
 pydocstyle==6.3.0
+requests==2.32.5
 beautifulsoup4==4.12.3
 tenacity==8.5.0
 selenium==4.25.0
 pydub==0.25.1
 ffmpeg-python==0.2.0
 numpy==1.26.4
+parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
 torch==2.4.1
 torchaudio==2.4.1
+transformers==4.43.3
 webrtcvad==2.0.10
 Pillow==10.4.0
 urllib3==2.0.7