ibrahimlasfar commited on
Commit
41c40b7
·
1 Parent(s): ae2582f

Update chatbot with audio/image support and fixed models

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -2
  2. README.md +1 -1
  3. api/endpoints.py +22 -23
  4. requirements.txt +3 -3
Dockerfile CHANGED
@@ -3,13 +3,12 @@ FROM python:3.10-slim
3
  # Set working directory
4
  WORKDIR /app
5
 
6
- # Install system dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
12
- ffmpeg \
13
  && apt-get clean && rm -rf /var/lib/apt/lists/*
14
 
15
  # Update pip
 
3
  # Set working directory
4
  WORKDIR /app
5
 
6
+ # Install chromium-driver and build dependencies
7
  RUN apt-get update && apt-get install -y \
8
  chromium-driver \
9
  git \
10
  gcc \
11
  libc-dev \
 
12
  && apt-get clean && rm -rf /var/lib/apt/lists/*
13
 
14
  # Update pip
README.md CHANGED
@@ -3,7 +3,7 @@ title: MGZON Chat
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
- sdk: gradio
7
  app_file: main.py
8
  pinned: false
9
  ---
 
3
  emoji: "🤖"
4
  colorFrom: "blue"
5
  colorTo: "green"
6
+ sdk: docker
7
  app_file: main.py
8
  pinned: false
9
  ---
api/endpoints.py CHANGED
@@ -1,25 +1,22 @@
1
  import os
2
  from fastapi import APIRouter, HTTPException, UploadFile, File
3
- from fastapi.responses import StreamingResponse
4
  from openai import OpenAI
5
  from api.models import QueryRequest
6
  from utils.generation import request_generation, select_model
7
  from utils.web_search import web_search
8
- import io
9
 
10
  router = APIRouter()
11
 
12
  HF_TOKEN = os.getenv("HF_TOKEN")
13
- BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
14
- API_ENDPOINT = os.getenv("API_ENDPOINT", "https://api-inference.huggingface.co")
15
- MODEL_NAME = os.getenv("MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1")
16
 
17
  @router.get("/api/model-info")
18
  def model_info():
19
  return {
20
  "model_name": MODEL_NAME,
21
- "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "meta-llama/Meta-Llama-3-8B-Instruct"),
22
- "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x22B-Instruct-v0.1"),
23
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
24
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
25
  "api_base": API_ENDPOINT,
@@ -48,14 +45,16 @@ async def chat_endpoint(req: QueryRequest):
48
  max_new_tokens=req.max_new_tokens,
49
  deep_search=req.enable_browsing,
50
  )
51
- response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
52
  return {"response": response}
53
 
 
 
54
  @router.post("/api/audio-transcription")
55
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
56
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
57
  audio_data = await file.read()
58
- response = "".join([chunk for chunk in request_generation(
59
  api_key=HF_TOKEN,
60
  api_base=api_endpoint,
61
  message="Transcribe audio",
@@ -65,7 +64,7 @@ async def audio_transcription_endpoint(file: UploadFile = File(...)):
65
  max_new_tokens=128000,
66
  input_type="audio",
67
  audio_data=audio_data,
68
- ) if isinstance(chunk, str)])
69
  return {"transcription": response}
70
 
71
  @router.post("/api/text-to-speech")
@@ -82,7 +81,7 @@ async def text_to_speech_endpoint(req: dict):
82
  max_new_tokens=128000,
83
  input_type="text",
84
  )
85
- audio_data = b"".join([chunk for chunk in response if isinstance(chunk, bytes)])
86
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
87
 
88
  @router.post("/api/code")
@@ -92,7 +91,7 @@ async def code_endpoint(req: dict):
92
  code = req.get("code", "")
93
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
94
  model_name, api_endpoint = select_model(prompt)
95
- response = "".join([chunk for chunk in request_generation(
96
  api_key=HF_TOKEN,
97
  api_base=api_endpoint,
98
  message=prompt,
@@ -100,14 +99,14 @@ async def code_endpoint(req: dict):
100
  model_name=model_name,
101
  temperature=0.7,
102
  max_new_tokens=128000,
103
- ) if isinstance(chunk, str)])
104
  return {"generated_code": response}
105
 
106
  @router.post("/api/analysis")
107
  async def analysis_endpoint(req: dict):
108
  message = req.get("text", "")
109
  model_name, api_endpoint = select_model(message)
110
- response = "".join([chunk for chunk in request_generation(
111
  api_key=HF_TOKEN,
112
  api_base=api_endpoint,
113
  message=message,
@@ -115,24 +114,24 @@ async def analysis_endpoint(req: dict):
115
  model_name=model_name,
116
  temperature=0.7,
117
  max_new_tokens=128000,
118
- ) if isinstance(chunk, str)])
119
  return {"analysis": response}
120
 
121
  @router.post("/api/image-analysis")
122
- async def image_analysis_endpoint(file: UploadFile = File(...)):
123
- model_name, api_endpoint = select_model("image analysis", input_type="image")
124
- image_data = await file.read()
125
- response = "".join([chunk for chunk in request_generation(
 
 
126
  api_key=HF_TOKEN,
127
  api_base=api_endpoint,
128
- message="Analyze this image",
129
  system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
130
  model_name=model_name,
131
  temperature=0.7,
132
  max_new_tokens=128000,
133
- input_type="image",
134
- image_data=image_data,
135
- ) if isinstance(chunk, str)])
136
  return {"image_analysis": response}
137
 
138
  @router.get("/api/test-model")
 
1
  import os
2
  from fastapi import APIRouter, HTTPException, UploadFile, File
 
3
  from openai import OpenAI
4
  from api.models import QueryRequest
5
  from utils.generation import request_generation, select_model
6
  from utils.web_search import web_search
 
7
 
8
  router = APIRouter()
9
 
10
  HF_TOKEN = os.getenv("HF_TOKEN")
11
+ API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
12
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-20b:fireworks-ai")
 
13
 
14
  @router.get("/api/model-info")
15
  def model_info():
16
  return {
17
  "model_name": MODEL_NAME,
18
+ "secondary_model": os.getenv("SECONDARY_MODEL_NAME", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"),
19
+ "tertiary_model": os.getenv("TERTIARY_MODEL_NAME", "mistralai/Mixtral-8x7B-Instruct-v0.1"),
20
  "clip_base_model": os.getenv("CLIP_BASE_MODEL", "openai/clip-vit-base-patch32"),
21
  "clip_large_model": os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14"),
22
  "api_base": API_ENDPOINT,
 
45
  max_new_tokens=req.max_new_tokens,
46
  deep_search=req.enable_browsing,
47
  )
48
+ response = "".join(list(stream))
49
  return {"response": response}
50
 
51
+
52
+ # في api/endpoints.py
53
  @router.post("/api/audio-transcription")
54
  async def audio_transcription_endpoint(file: UploadFile = File(...)):
55
  model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
56
  audio_data = await file.read()
57
+ response = "".join(list(request_generation(
58
  api_key=HF_TOKEN,
59
  api_base=api_endpoint,
60
  message="Transcribe audio",
 
64
  max_new_tokens=128000,
65
  input_type="audio",
66
  audio_data=audio_data,
67
+ )))
68
  return {"transcription": response}
69
 
70
  @router.post("/api/text-to-speech")
 
81
  max_new_tokens=128000,
82
  input_type="text",
83
  )
84
+ audio_data = b"".join(list(response))
85
  return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
86
 
87
  @router.post("/api/code")
 
91
  code = req.get("code", "")
92
  prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
93
  model_name, api_endpoint = select_model(prompt)
94
+ response = "".join(list(request_generation(
95
  api_key=HF_TOKEN,
96
  api_base=api_endpoint,
97
  message=prompt,
 
99
  model_name=model_name,
100
  temperature=0.7,
101
  max_new_tokens=128000,
102
+ )))
103
  return {"generated_code": response}
104
 
105
  @router.post("/api/analysis")
106
  async def analysis_endpoint(req: dict):
107
  message = req.get("text", "")
108
  model_name, api_endpoint = select_model(message)
109
+ response = "".join(list(request_generation(
110
  api_key=HF_TOKEN,
111
  api_base=api_endpoint,
112
  message=message,
 
114
  model_name=model_name,
115
  temperature=0.7,
116
  max_new_tokens=128000,
117
+ )))
118
  return {"analysis": response}
119
 
120
  @router.post("/api/image-analysis")
121
+ async def image_analysis_endpoint(req: dict):
122
+ image_url = req.get("image_url", "")
123
+ task = req.get("task", "describe")
124
+ prompt = f"Perform the following task on the image at {image_url}: {task}"
125
+ model_name, api_endpoint = select_model(prompt)
126
+ response = "".join(list(request_generation(
127
  api_key=HF_TOKEN,
128
  api_base=api_endpoint,
129
+ message=prompt,
130
  system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
131
  model_name=model_name,
132
  temperature=0.7,
133
  max_new_tokens=128000,
134
+ )))
 
 
135
  return {"image_analysis": response}
136
 
137
  @router.get("/api/test-model")
requirements.txt CHANGED
@@ -5,7 +5,7 @@ openai==1.42.0
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
- requests==2.32.3
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
@@ -15,10 +15,10 @@ cachetools==5.5.0
15
  pydub==0.25.1
16
  ffmpeg-python==0.2.0
17
  numpy==1.26.4
18
- parler-tts==0.2.0
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
- transformers==4.45.1
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7
 
5
  httpx==0.27.0
6
  python-dotenv==1.0.1
7
  pydocstyle==6.3.0
8
+ requests==2.32.5
9
  beautifulsoup4==4.12.3
10
  tenacity==8.5.0
11
  selenium==4.25.0
 
15
  pydub==0.25.1
16
  ffmpeg-python==0.2.0
17
  numpy==1.26.4
18
+ parler-tts @ git+https://github.com/huggingface/parler-tts.git@5d0aca9753ab74ded179732f5bd797f7a8c6f8ee
19
  torch==2.4.1
20
  torchaudio==2.4.1
21
+ transformers==4.43.3
22
  webrtcvad==2.0.10
23
  Pillow==10.4.0
24
  urllib3==2.0.7