Commit
·
41c40b7
1
Parent(s):
ae2582f
Update chatbot with audio/image support and fixed models
Browse files- Dockerfile +1 -2
- README.md +1 -1
- api/endpoints.py +22 -23
- requirements.txt +3 -3
Dockerfile
CHANGED
|
@@ -3,13 +3,12 @@ FROM python:3.10-slim
|
|
| 3 |
# Set working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
-
# Install
|
| 7 |
RUN apt-get update && apt-get install -y \
|
| 8 |
chromium-driver \
|
| 9 |
git \
|
| 10 |
gcc \
|
| 11 |
libc-dev \
|
| 12 |
-
ffmpeg \
|
| 13 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 14 |
|
| 15 |
# Update pip
|
|
|
|
| 3 |
# Set working directory
|
| 4 |
WORKDIR /app
|
| 5 |
|
| 6 |
+
# Install chromium-driver and build dependencies
|
| 7 |
RUN apt-get update && apt-get install -y \
|
| 8 |
chromium-driver \
|
| 9 |
git \
|
| 10 |
gcc \
|
| 11 |
libc-dev \
|
|
|
|
| 12 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
| 13 |
|
| 14 |
# Update pip
|
README.md
CHANGED
|
@@ -3,7 +3,7 @@ title: MGZON Chat
|
|
| 3 |
emoji: "🤖"
|
| 4 |
colorFrom: "blue"
|
| 5 |
colorTo: "green"
|
| 6 |
-
sdk:
|
| 7 |
app_file: main.py
|
| 8 |
pinned: false
|
| 9 |
---
|
|
|
|
| 3 |
emoji: "🤖"
|
| 4 |
colorFrom: "blue"
|
| 5 |
colorTo: "green"
|
| 6 |
+
sdk: docker
|
| 7 |
app_file: main.py
|
| 8 |
pinned: false
|
| 9 |
---
|
api/endpoints.py
CHANGED
|
@@ -1,25 +1,22 @@
|
|
| 1 |
import os
|
| 2 |
from fastapi import APIRouter, HTTPException, UploadFile, File
|
| 3 |
-
from fastapi.responses import StreamingResponse
|
| 4 |
from openai import OpenAI
|
| 5 |
from api.models import QueryRequest
|
| 6 |
from utils.generation import request_generation, select_model
|
| 7 |
from utils.web_search import web_search
|
| 8 |
-
import io
|
| 9 |
|
| 10 |
router = APIRouter()
|
| 11 |
|
| 12 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
|
| 16 |
|
| 17 |
@router.get("/api/model-info")
|
| 18 |
def model_info():
|
| 19 |
return {
|
| 20 |
"model_name": MODEL_NAME,
|
| 21 |
-
"secondary_model": os.getenv("SECONDARY_MODEL_NAME", "
|
| 22 |
-
"tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-
|
| 23 |
"clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
|
| 24 |
"clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
|
| 25 |
"api_base": API_ENDPOINT,
|
|
@@ -48,14 +45,16 @@ async def chat_endpoint(req: QueryRequest):
|
|
| 48 |
max_new_tokens=req.max_new_tokens,
|
| 49 |
deep_search=req.enable_browsing,
|
| 50 |
)
|
| 51 |
-
response = "".join(
|
| 52 |
return {"response": response}
|
| 53 |
|
|
|
|
|
|
|
| 54 |
@router.post("/api/audio-transcription")
|
| 55 |
async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
| 56 |
model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
|
| 57 |
audio_data = await file.read()
|
| 58 |
-
response = "".join(
|
| 59 |
api_key=HF_TOKEN,
|
| 60 |
api_base=api_endpoint,
|
| 61 |
message="Transcribe audio",
|
|
@@ -65,7 +64,7 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
|
| 65 |
max_new_tokens=128000,
|
| 66 |
input_type="audio",
|
| 67 |
audio_data=audio_data,
|
| 68 |
-
)
|
| 69 |
return {"transcription": response}
|
| 70 |
|
| 71 |
@router.post("/api/text-to-speech")
|
|
@@ -82,7 +81,7 @@ async def text_to_speech_endpoint(req: dict):
|
|
| 82 |
max_new_tokens=128000,
|
| 83 |
input_type="text",
|
| 84 |
)
|
| 85 |
-
audio_data = b"".join(
|
| 86 |
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
|
| 87 |
|
| 88 |
@router.post("/api/code")
|
|
@@ -92,7 +91,7 @@ async def code_endpoint(req: dict):
|
|
| 92 |
code = req.get("code", "")
|
| 93 |
prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
|
| 94 |
model_name, api_endpoint = select_model(prompt)
|
| 95 |
-
response = "".join(
|
| 96 |
api_key=HF_TOKEN,
|
| 97 |
api_base=api_endpoint,
|
| 98 |
message=prompt,
|
|
@@ -100,14 +99,14 @@ async def code_endpoint(req: dict):
|
|
| 100 |
model_name=model_name,
|
| 101 |
temperature=0.7,
|
| 102 |
max_new_tokens=128000,
|
| 103 |
-
)
|
| 104 |
return {"generated_code": response}
|
| 105 |
|
| 106 |
@router.post("/api/analysis")
|
| 107 |
async def analysis_endpoint(req: dict):
|
| 108 |
message = req.get("text", "")
|
| 109 |
model_name, api_endpoint = select_model(message)
|
| 110 |
-
response = "".join(
|
| 111 |
api_key=HF_TOKEN,
|
| 112 |
api_base=api_endpoint,
|
| 113 |
message=message,
|
|
@@ -115,24 +114,24 @@ async def analysis_endpoint(req: dict):
|
|
| 115 |
model_name=model_name,
|
| 116 |
temperature=0.7,
|
| 117 |
max_new_tokens=128000,
|
| 118 |
-
)
|
| 119 |
return {"analysis": response}
|
| 120 |
|
| 121 |
@router.post("/api/image-analysis")
|
| 122 |
-
async def image_analysis_endpoint(
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
|
|
|
|
|
|
| 126 |
api_key=HF_TOKEN,
|
| 127 |
api_base=api_endpoint,
|
| 128 |
-
message=
|
| 129 |
system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
|
| 130 |
model_name=model_name,
|
| 131 |
temperature=0.7,
|
| 132 |
max_new_tokens=128000,
|
| 133 |
-
|
| 134 |
-
image_data=image_data,
|
| 135 |
-
) if isinstance(chunk, str)])
|
| 136 |
return {"image_analysis": response}
|
| 137 |
|
| 138 |
@router.get("/api/test-model")
|
|
|
|
| 1 |
import os
|
| 2 |
from fastapi import APIRouter, HTTPException, UploadFile, File
|
|
|
|
| 3 |
from openai import OpenAI
|
| 4 |
from api.models import QueryRequest
|
| 5 |
from utils.generation import request_generation, select_model
|
| 6 |
from utils.web_search import web_search
|
|
|
|
| 7 |
|
| 8 |
router = APIRouter()
|
| 9 |
|
| 10 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 11 |
+
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
|
| 12 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
|
|
|
|
| 13 |
|
| 14 |
@router.get("/api/model-info")
|
| 15 |
def model_info():
|
| 16 |
return {
|
| 17 |
"model_name": MODEL_NAME,
|
| 18 |
+
"secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
|
| 19 |
+
"tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
|
| 20 |
"clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
|
| 21 |
"clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
|
| 22 |
"api_base": API_ENDPOINT,
|
|
|
|
| 45 |
max_new_tokens=req.max_new_tokens,
|
| 46 |
deep_search=req.enable_browsing,
|
| 47 |
)
|
| 48 |
+
response = "".join(list(stream))
|
| 49 |
return {"response": response}
|
| 50 |
|
| 51 |
+
|
| 52 |
+
# في api/endpoints.py
|
| 53 |
@router.post("/api/audio-transcription")
|
| 54 |
async def audio_transcription_endpoint(file: UploadFile = File(...)):
|
| 55 |
model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
|
| 56 |
audio_data = await file.read()
|
| 57 |
+
response = "".join(list(request_generation(
|
| 58 |
api_key=HF_TOKEN,
|
| 59 |
api_base=api_endpoint,
|
| 60 |
message="Transcribe audio",
|
|
|
|
| 64 |
max_new_tokens=128000,
|
| 65 |
input_type="audio",
|
| 66 |
audio_data=audio_data,
|
| 67 |
+
)))
|
| 68 |
return {"transcription": response}
|
| 69 |
|
| 70 |
@router.post("/api/text-to-speech")
|
|
|
|
| 81 |
max_new_tokens=128000,
|
| 82 |
input_type="text",
|
| 83 |
)
|
| 84 |
+
audio_data = b"".join(list(response))
|
| 85 |
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
|
| 86 |
|
| 87 |
@router.post("/api/code")
|
|
|
|
| 91 |
code = req.get("code", "")
|
| 92 |
prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
|
| 93 |
model_name, api_endpoint = select_model(prompt)
|
| 94 |
+
response = "".join(list(request_generation(
|
| 95 |
api_key=HF_TOKEN,
|
| 96 |
api_base=api_endpoint,
|
| 97 |
message=prompt,
|
|
|
|
| 99 |
model_name=model_name,
|
| 100 |
temperature=0.7,
|
| 101 |
max_new_tokens=128000,
|
| 102 |
+
)))
|
| 103 |
return {"generated_code": response}
|
| 104 |
|
| 105 |
@router.post("/api/analysis")
|
| 106 |
async def analysis_endpoint(req: dict):
|
| 107 |
message = req.get("text", "")
|
| 108 |
model_name, api_endpoint = select_model(message)
|
| 109 |
+
response = "".join(list(request_generation(
|
| 110 |
api_key=HF_TOKEN,
|
| 111 |
api_base=api_endpoint,
|
| 112 |
message=message,
|
|
|
|
| 114 |
model_name=model_name,
|
| 115 |
temperature=0.7,
|
| 116 |
max_new_tokens=128000,
|
| 117 |
+
)))
|
| 118 |
return {"analysis": response}
|
| 119 |
|
| 120 |
@router.post("/api/image-analysis")
|
| 121 |
+
async def image_analysis_endpoint(req: dict):
|
| 122 |
+
image_url = req.get("image_url", "")
|
| 123 |
+
task = req.get("task", "describe")
|
| 124 |
+
prompt = f"Perform the following task on the image at {image_url}: {task}"
|
| 125 |
+
model_name, api_endpoint = select_model(prompt)
|
| 126 |
+
response = "".join(list(request_generation(
|
| 127 |
api_key=HF_TOKEN,
|
| 128 |
api_base=api_endpoint,
|
| 129 |
+
message=prompt,
|
| 130 |
system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
|
| 131 |
model_name=model_name,
|
| 132 |
temperature=0.7,
|
| 133 |
max_new_tokens=128000,
|
| 134 |
+
)))
|
|
|
|
|
|
|
| 135 |
return {"image_analysis": response}
|
| 136 |
|
| 137 |
@router.get("/api/test-model")
|
requirements.txt
CHANGED
|
@@ -5,7 +5,7 @@ openai==1.42.0
|
|
| 5 |
httpx==0.27.0
|
| 6 |
python-dotenv==1.0.1
|
| 7 |
pydocstyle==6.3.0
|
| 8 |
-
requests==2.32.
|
| 9 |
beautifulsoup4==4.12.3
|
| 10 |
tenacity==8.5.0
|
| 11 |
selenium==4.25.0
|
|
@@ -15,10 +15,10 @@ cachetools==5.5.0
|
|
| 15 |
pydub==0.25.1
|
| 16 |
ffmpeg-python==0.2.0
|
| 17 |
numpy==1.26.4
|
| 18 |
-
parler-tts
|
| 19 |
torch==2.4.1
|
| 20 |
torchaudio==2.4.1
|
| 21 |
-
transformers==4.
|
| 22 |
webrtcvad==2.0.10
|
| 23 |
Pillow==10.4.0
|
| 24 |
urllib3==2.0.7
|
|
|
|
| 5 |
httpx==0.27.0
|
| 6 |
python-dotenv==1.0.1
|
| 7 |
pydocstyle==6.3.0
|
| 8 |
+
requests==2.32.5
|
| 9 |
beautifulsoup4==4.12.3
|
| 10 |
tenacity==8.5.0
|
| 11 |
selenium==4.25.0
|
|
|
|
| 15 |
pydub==0.25.1
|
| 16 |
ffmpeg-python==0.2.0
|
| 17 |
numpy==1.26.4
|
| 18 |
+
parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
|
| 19 |
torch==2.4.1
|
| 20 |
torchaudio==2.4.1
|
| 21 |
+
transformers==4.43.3
|
| 22 |
webrtcvad==2.0.10
|
| 23 |
Pillow==10.4.0
|
| 24 |
urllib3==2.0.7
|