mgzon-app / api /endpoints.py
ibrahimlasfar's picture
Update endpoints and session support
1b86d8a
raw
history blame
12.1 kB
# api/endpoints.py
import os
import uuid
from fastapi import APIRouter, Depends, HTTPException, Request, status, UploadFile, File
from fastapi.responses import StreamingResponse
from api.models import QueryRequest
from api.auth import current_active_user
from api.database import get_db
from sqlalchemy.orm import Session
from utils.generation import request_generation, select_model
from utils.web_search import web_search
import io
from openai import OpenAI
router = APIRouter()
HF_TOKEN = os.getenv("HF_TOKEN")
BACKUP_HF_TOKEN = os.getenv("BACKUP_HF_TOKEN")
API_ENDPOINT = os.getenv("API_ENDPOINT", "https://router.huggingface.co/v1")
FALLBACK_API_ENDPOINT = os.getenv("FALLBACK_API_ENDPOINT", "https://api-inference.huggingface.co")
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b:together")
SECONDARY_MODEL_NAME = os.getenv("SECONDARY_MODEL_NAME", "mistralai/Mistral-7B-Instruct-v0.2:featherless-ai")
TERTIARY_MODEL_NAME = os.getenv("TERTIARY_MODEL_NAME", "openai/gpt-oss-20b:together")
CLIP_BASE_MODEL = os.getenv("CLIP_BASE_MODEL", "Salesforce/blip-image-captioning-large")
CLIP_LARGE_MODEL = os.getenv("CLIP_LARGE_MODEL", "openai/clip-vit-large-patch14")
ASR_MODEL = os.getenv("ASR_MODEL", "openai/whisper-large-v3")
TTS_MODEL = os.getenv("TTS_MODEL", "facebook/mms-tts-ara")
# تخزين عدد الرسائل لكل جلسة (بديل مؤقت)
session_message_counts = {}
@router.get("/api/model-info")
def model_info():
return {
"model_name": MODEL_NAME,
"secondary_model": SECONDARY_MODEL_NAME,
"tertiary_model": TERTIARY_MODEL_NAME,
"clip_base_model": CLIP_BASE_MODEL,
"clip_large_model": CLIP_LARGE_MODEL,
"asr_model": ASR_MODEL,
"tts_model": TTS_MODEL,
"api_base": API_ENDPOINT,
"fallback_api_base": FALLBACK_API_ENDPOINT,
"status": "online"
}
@router.get("/api/performance")
async def performance_stats():
return {
"queue_size": int(os.getenv("QUEUE_SIZE", 80)),
"concurrency_limit": int(os.getenv("CONCURRENCY_LIMIT", 20)),
"uptime": os.popen("uptime").read().strip()
}
@router.post("/api/chat")
async def chat_endpoint(
request: Request,
req: QueryRequest,
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
model_name, api_endpoint = select_model(req.message, input_type="text")
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message=req.message,
system_prompt=req.system_prompt,
model_name=model_name,
chat_history=req.history,
temperature=req.temperature,
max_new_tokens=req.max_new_tokens,
deep_search=req.enable_browsing,
input_type="text",
output_format=req.output_format
)
if req.output_format == "audio":
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
return {"response": response}
@router.post("/api/audio-transcription")
async def audio_transcription_endpoint(
request: Request,
file: UploadFile = File(...),
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
model_name, api_endpoint = select_model("transcribe audio", input_type="audio")
audio_data = await file.read()
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message="Transcribe audio",
system_prompt="Transcribe the provided audio using Whisper.",
model_name=model_name,
temperature=0.7,
max_new_tokens=128000,
input_type="audio",
audio_data=audio_data,
output_format="text"
)
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
return {"transcription": response}
@router.post("/api/text-to-speech")
async def text_to_speech_endpoint(
request: Request,
req: dict,
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
text = req.get("text", "")
model_name, api_endpoint = select_model("text to speech", input_type="tts")
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message=text,
system_prompt="Convert the provided text to speech using a text-to-speech model.",
model_name=model_name,
temperature=0.7,
max_new_tokens=128000,
input_type="tts",
output_format="audio"
)
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
@router.post("/api/code")
async def code_endpoint(
request: Request,
req: dict,
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
framework = req.get("framework")
task = req.get("task")
code = req.get("code", "")
output_format = req.get("output_format", "text")
prompt = f"Generate code for task: {task} using {framework}. Existing code: {code}"
model_name, api_endpoint = select_model(prompt, input_type="text")
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message=prompt,
system_prompt="You are a coding expert. Provide detailed, well-commented code with examples and explanations.",
model_name=model_name,
temperature=0.7,
max_new_tokens=128000,
input_type="text",
output_format=output_format
)
if output_format == "audio":
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
return {"generated_code": response}
@router.post("/api/analysis")
async def analysis_endpoint(
request: Request,
req: dict,
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
message = req.get("text", "")
output_format = req.get("output_format", "text")
model_name, api_endpoint = select_model(message, input_type="text")
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message=message,
system_prompt="You are an expert analyst. Provide detailed analysis with step-by-step reasoning and examples.",
model_name=model_name,
temperature=0.7,
max_new_tokens=128000,
input_type="text",
output_format=output_format
)
if output_format == "audio":
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
return {"analysis": response}
@router.post("/api/image-analysis")
async def image_analysis_endpoint(
request: Request,
file: UploadFile = File(...),
output_format: str = "text",
user: str = Depends(current_active_user, use_cache=False),
db: Session = Depends(get_db)
):
session_id = request.session.get("session_id")
if not user and not session_id:
session_id = str(uuid.uuid4())
request.session["session_id"] = session_id
session_message_counts[session_id] = 0
if not user:
session_message_counts[session_id] = session_message_counts.get(session_id, 0) + 1
if session_message_counts[session_id] > 4:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Message limit reached. Please log in to continue."
)
model_name, api_endpoint = select_model("analyze image", input_type="image")
image_data = await file.read()
stream = request_generation(
api_key=HF_TOKEN,
api_base=api_endpoint,
message="Analyze this image",
system_prompt="You are an expert in image analysis. Provide detailed descriptions or classifications based on the query.",
model_name=model_name,
temperature=0.7,
max_new_tokens=128000,
input_type="image",
image_data=image_data,
output_format=output_format
)
if output_format == "audio":
audio_data = b"".join([chunk for chunk in stream if isinstance(chunk, bytes)])
return StreamingResponse(io.BytesIO(audio_data), media_type="audio/wav")
response = "".join([chunk for chunk in stream if isinstance(chunk, str)])
return {"image_analysis": response}
@router.get("/api/test-model")
async def test_model(model: str = MODEL_NAME, endpoint: str = API_ENDPOINT):
try:
client = OpenAI(api_key=HF_TOKEN, base_url=endpoint, timeout=60.0)
response = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": "Test"}],
max_tokens=50
)
return {"status": "success", "response": response.choices[0].message.content}
except Exception as e:
return {"status": "error", "message": str(e)}