handseal-server / app.py
Jon Z Cai
add error showing arg
baed50e
import os
import time
import json
import base64
import numpy as np
import cv2
import onnxruntime as ort
import gradio as gr
from PIL import Image
# -----------------------------
# Config
# -----------------------------
CLASS_NAMES = [
"Rat", "Ox", "Tiger", "Hare", "Dragon", "Snake",
"Horse", "Ram", "Monkey", "Bird", "Dog", "Boar"
]
# Put the model under a writable/cache folder.
# HF Spaces provides a persistent cache mount in many runtimes; at worst it re-downloads on restart.
CACHE_DIR = os.environ.get("HF_HOME", ".")
MODEL_DIR = os.path.join(CACHE_DIR, "models", "yolox_nano")
MODEL_NAME = "yolox_nano.onnx"
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
# Your Google Drive direct download
MODEL_DOWNLOAD_URL = "https://drive.google.com/uc?id=1xeh3rrIhSqH0BAv7jLCGPZf1waQ4tlXY"
def ensure_model():
"""Download the ONNX model once if not present."""
os.makedirs(MODEL_DIR, exist_ok=True)
if os.path.exists(MODEL_PATH) and os.path.getsize(MODEL_PATH) > 1024 * 1024:
return MODEL_PATH
print(f"[Model] Downloading to: {MODEL_PATH}")
import gdown
# If you suspect corruption, delete then re-download
if os.path.exists(MODEL_PATH):
try:
os.remove(MODEL_PATH)
except Exception:
pass
gdown.download(MODEL_DOWNLOAD_URL, MODEL_PATH, quiet=False)
return MODEL_PATH
# ==========================================
# YOLOX INFERENCE CLASS (ported from your code)
# ==========================================
class YoloxONNX(object):
def __init__(self, model_path, class_names, score_thr=0.6, nms_thr=0.45, input_shape=(416, 416)):
self.input_shape = input_shape
self.score_thr = score_thr
self.nms_thr = nms_thr
self.class_names = class_names
print(f"[Model] Loading ONNX from {model_path}...")
self.session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
self.input_name = self.session.get_inputs()[0].name
def inference(self, image_bgr):
input_image, ratio = self.preprocess(image_bgr, self.input_shape)
ort_inputs = {self.input_name: input_image[None, :, :, :]}
outputs = self.session.run(None, ort_inputs)
predictions = outputs[0][0] # batch=1
boxes_xyxy, scores, class_ids = self.postprocess(predictions, ratio)
return boxes_xyxy, scores, class_ids
def preprocess(self, image, input_size):
if len(image.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
else:
padded_img = np.ones(input_size, dtype=np.uint8) * 114
r = min(input_size[0] / image.shape[0], input_size[1] / image.shape[1])
resized_img = cv2.resize(
image,
(int(image.shape[1] * r), int(image.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
)
padded_img[: int(image.shape[0] * r), : int(image.shape[1] * r)] = resized_img
image = padded_img.transpose(2, 0, 1) # CHW
image = np.ascontiguousarray(image, dtype=np.float32)
return image, r
def postprocess(self, predictions, ratio):
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
boxes_xyxy /= ratio
dets = self.multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thr, score_thr=self.score_thr)
if dets is None:
return [], [], []
final_boxes = dets[:, :4]
final_scores = dets[:, 4]
final_cls_inds = dets[:, 5]
return final_boxes, final_scores, final_cls_inds
def multiclass_nms(self, boxes, scores, nms_thr, score_thr):
final_dets = []
num_classes = scores.shape[1]
for cls_ind in range(num_classes):
cls_scores = scores[:, cls_ind]
valid_score_mask = cls_scores > score_thr
if valid_score_mask.sum() == 0:
continue
valid_boxes = boxes[valid_score_mask]
valid_scores = cls_scores[valid_score_mask]
keep = self.nms_cpu(valid_boxes, valid_scores, nms_thr)
for i in keep:
final_dets.append([
valid_boxes[i][0], valid_boxes[i][1], valid_boxes[i][2], valid_boxes[i][3],
valid_scores[i], cls_ind
])
if len(final_dets) == 0:
return None
return np.array(final_dets)
def nms_cpu(self, boxes, scores, nms_thr):
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= nms_thr)[0]
order = order[inds + 1]
return keep
# -----------------------------
# Load model once at startup
# -----------------------------
_detector = None
def get_detector(score_thr=0.6, nms_thr=0.45, input_size=416):
global _detector
if _detector is None:
path = ensure_model()
_detector = YoloxONNX(
path,
CLASS_NAMES,
score_thr=float(score_thr),
nms_thr=float(nms_thr),
input_shape=(int(input_size), int(input_size)),
)
return _detector
def draw_detections(img_bgr, results):
out = img_bgr.copy()
for r in results:
x1, y1, x2, y2 = map(int, r["box"])
label = r["label"]
score = r["score"]
cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(
out,
f"{label} {score:.2f}",
(x1, max(0, y1 - 8)),
cv2.FONT_HERSHEY_SIMPLEX,
0.6,
(0, 255, 0),
2,
cv2.LINE_AA,
)
return out
def predict_ui(image_pil, score_thr, nms_thr, input_size):
if image_pil is None:
return None, [], "{}"
# Gradio gives PIL (RGB). Convert to BGR for OpenCV/your pipeline
img_rgb = np.array(image_pil)
img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
detector = get_detector(score_thr=score_thr, nms_thr=nms_thr, input_size=input_size)
t0 = time.time()
boxes, scores, class_ids = detector.inference(img_bgr)
dt_ms = (time.time() - t0) * 1000
results = []
for i in range(len(boxes)):
box = boxes[i]
score = float(scores[i])
label_id = int(class_ids[i])
label = CLASS_NAMES[label_id]
results.append({
"label": label,
"score": score,
"box": [float(box[0]), float(box[1]), float(box[2]), float(box[3])]
})
annotated_bgr = draw_detections(img_bgr, results)
annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)
annotated_pil = Image.fromarray(annotated_rgb)
table = [
[r["label"], r["score"], *r["box"]]
for r in results
]
meta = {"inference_ms": round(dt_ms, 2), "num_detections": len(results), "results": results}
return annotated_pil, table, json.dumps(meta, indent=2)
with gr.Blocks(title="Shinobi Hand Seal Detector (YOLOX Nano ONNX)") as demo:
gr.Markdown(
"Upload an image. The Space runs YOLOX Nano (ONNXRuntime CPU) and returns detected hand seals."
)
with gr.Row():
inp = gr.Image(type="pil", label="Input Image")
out_img = gr.Image(type="pil", label="Annotated Output")
with gr.Row():
score_thr = gr.Slider(0.1, 0.95, value=0.6, step=0.05, label="Score threshold")
nms_thr = gr.Slider(0.1, 0.95, value=0.45, step=0.05, label="NMS threshold")
input_size = gr.Radio([320, 416, 512], value=416, label="Input size")
btn = gr.Button("Detect")
out_table = gr.Dataframe(
headers=["label", "score", "x1", "y1", "x2", "y2"],
datatype=["str", "number", "number", "number", "number", "number"],
label="Detections",
interactive=False,
)
out_json = gr.Code(label="Raw JSON", language="json")
btn.click(
predict_ui,
inputs=[inp, score_thr, nms_thr, input_size],
outputs=[out_img, out_table, out_json],
api_name="detect", # <-- add this
)
demo.queue().launch(show_error=True)