Spaces:

jonzcai
/

handseal-server

Sleeping

handseal-server / app.py

Jon Z Cai

add error showing arg

baed50e 22 days ago

8.94 kB

	import os
	import time
	import json
	import base64
	import numpy as np
	import cv2
	import onnxruntime as ort
	import gradio as gr
	from PIL import Image

	# -----------------------------
	# Config
	# -----------------------------
	CLASS_NAMES = [
	"Rat", "Ox", "Tiger", "Hare", "Dragon", "Snake",
	"Horse", "Ram", "Monkey", "Bird", "Dog", "Boar"
	]

	# Put the model under a writable/cache folder.
	# HF Spaces provides a persistent cache mount in many runtimes; at worst it re-downloads on restart.
	CACHE_DIR = os.environ.get("HF_HOME", ".")
	MODEL_DIR = os.path.join(CACHE_DIR, "models", "yolox_nano")
	MODEL_NAME = "yolox_nano.onnx"
	MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)

	# Your Google Drive direct download
	MODEL_DOWNLOAD_URL = "https://drive.google.com/uc?id=1xeh3rrIhSqH0BAv7jLCGPZf1waQ4tlXY"


	def ensure_model():
	"""Download the ONNX model once if not present."""
	os.makedirs(MODEL_DIR, exist_ok=True)
	if os.path.exists(MODEL_PATH) and os.path.getsize(MODEL_PATH) > 1024 * 1024:
	return MODEL_PATH

	print(f"[Model] Downloading to: {MODEL_PATH}")
	import gdown
	# If you suspect corruption, delete then re-download
	if os.path.exists(MODEL_PATH):
	try:
	os.remove(MODEL_PATH)
	except Exception:
	pass

	gdown.download(MODEL_DOWNLOAD_URL, MODEL_PATH, quiet=False)
	return MODEL_PATH


	# ==========================================
	# YOLOX INFERENCE CLASS (ported from your code)
	# ==========================================
	class YoloxONNX(object):
	def __init__(self, model_path, class_names, score_thr=0.6, nms_thr=0.45, input_shape=(416, 416)):
	self.input_shape = input_shape
	self.score_thr = score_thr
	self.nms_thr = nms_thr
	self.class_names = class_names

	print(f"[Model] Loading ONNX from {model_path}...")
	self.session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
	self.input_name = self.session.get_inputs()[0].name

	def inference(self, image_bgr):
	input_image, ratio = self.preprocess(image_bgr, self.input_shape)

	ort_inputs = {self.input_name: input_image[None, :, :, :]}
	outputs = self.session.run(None, ort_inputs)
	predictions = outputs[0][0] # batch=1

	boxes_xyxy, scores, class_ids = self.postprocess(predictions, ratio)
	return boxes_xyxy, scores, class_ids

	def preprocess(self, image, input_size):
	if len(image.shape) == 3:
	padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
	else:
	padded_img = np.ones(input_size, dtype=np.uint8) * 114

	r = min(input_size[0] / image.shape[0], input_size[1] / image.shape[1])
	resized_img = cv2.resize(
	image,
	(int(image.shape[1] * r), int(image.shape[0] * r)),
	interpolation=cv2.INTER_LINEAR,
	)
	padded_img[: int(image.shape[0] * r), : int(image.shape[1] * r)] = resized_img

	image = padded_img.transpose(2, 0, 1) # CHW
	image = np.ascontiguousarray(image, dtype=np.float32)
	return image, r

	def postprocess(self, predictions, ratio):
	boxes = predictions[:, :4]
	scores = predictions[:, 4:5] * predictions[:, 5:]

	boxes_xyxy = np.ones_like(boxes)
	boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
	boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
	boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
	boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0

	boxes_xyxy /= ratio

	dets = self.multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thr, score_thr=self.score_thr)
	if dets is None:
	return [], [], []

	final_boxes = dets[:, :4]
	final_scores = dets[:, 4]
	final_cls_inds = dets[:, 5]
	return final_boxes, final_scores, final_cls_inds

	def multiclass_nms(self, boxes, scores, nms_thr, score_thr):
	final_dets = []
	num_classes = scores.shape[1]
	for cls_ind in range(num_classes):
	cls_scores = scores[:, cls_ind]
	valid_score_mask = cls_scores > score_thr
	if valid_score_mask.sum() == 0:
	continue

	valid_boxes = boxes[valid_score_mask]
	valid_scores = cls_scores[valid_score_mask]

	keep = self.nms_cpu(valid_boxes, valid_scores, nms_thr)
	for i in keep:
	final_dets.append([
	valid_boxes[i][0], valid_boxes[i][1], valid_boxes[i][2], valid_boxes[i][3],
	valid_scores[i], cls_ind
	])

	if len(final_dets) == 0:
	return None
	return np.array(final_dets)

	def nms_cpu(self, boxes, scores, nms_thr):
	x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
	areas = (x2 - x1 + 1) * (y2 - y1 + 1)
	order = scores.argsort()[::-1]

	keep = []
	while order.size > 0:
	i = order[0]
	keep.append(i)

	xx1 = np.maximum(x1[i], x1[order[1:]])
	yy1 = np.maximum(y1[i], y1[order[1:]])
	xx2 = np.minimum(x2[i], x2[order[1:]])
	yy2 = np.minimum(y2[i], y2[order[1:]])

	w = np.maximum(0.0, xx2 - xx1 + 1)
	h = np.maximum(0.0, yy2 - yy1 + 1)
	inter = w * h
	ovr = inter / (areas[i] + areas[order[1:]] - inter)

	inds = np.where(ovr <= nms_thr)[0]
	order = order[inds + 1]

	return keep


	# -----------------------------
	# Load model once at startup
	# -----------------------------
	_detector = None

	def get_detector(score_thr=0.6, nms_thr=0.45, input_size=416):
	global _detector
	if _detector is None:
	path = ensure_model()
	_detector = YoloxONNX(
	path,
	CLASS_NAMES,
	score_thr=float(score_thr),
	nms_thr=float(nms_thr),
	input_shape=(int(input_size), int(input_size)),
	)
	return _detector


	def draw_detections(img_bgr, results):
	out = img_bgr.copy()
	for r in results:
	x1, y1, x2, y2 = map(int, r["box"])
	label = r["label"]
	score = r["score"]
	cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(
	out,
	f"{label} {score:.2f}",
	(x1, max(0, y1 - 8)),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.6,
	(0, 255, 0),
	2,
	cv2.LINE_AA,
	)
	return out


	def predict_ui(image_pil, score_thr, nms_thr, input_size):
	if image_pil is None:
	return None, [], "{}"

	# Gradio gives PIL (RGB). Convert to BGR for OpenCV/your pipeline
	img_rgb = np.array(image_pil)
	img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)

	detector = get_detector(score_thr=score_thr, nms_thr=nms_thr, input_size=input_size)

	t0 = time.time()
	boxes, scores, class_ids = detector.inference(img_bgr)
	dt_ms = (time.time() - t0) * 1000

	results = []
	for i in range(len(boxes)):
	box = boxes[i]
	score = float(scores[i])
	label_id = int(class_ids[i])
	label = CLASS_NAMES[label_id]
	results.append({
	"label": label,
	"score": score,
	"box": [float(box[0]), float(box[1]), float(box[2]), float(box[3])]
	})

	annotated_bgr = draw_detections(img_bgr, results)
	annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB)
	annotated_pil = Image.fromarray(annotated_rgb)

	table = [
	[r["label"], r["score"], *r["box"]]
	for r in results
	]
	meta = {"inference_ms": round(dt_ms, 2), "num_detections": len(results), "results": results}

	return annotated_pil, table, json.dumps(meta, indent=2)


	with gr.Blocks(title="Shinobi Hand Seal Detector (YOLOX Nano ONNX)") as demo:
	gr.Markdown(
	"Upload an image. The Space runs YOLOX Nano (ONNXRuntime CPU) and returns detected hand seals."
	)

	with gr.Row():
	inp = gr.Image(type="pil", label="Input Image")
	out_img = gr.Image(type="pil", label="Annotated Output")

	with gr.Row():
	score_thr = gr.Slider(0.1, 0.95, value=0.6, step=0.05, label="Score threshold")
	nms_thr = gr.Slider(0.1, 0.95, value=0.45, step=0.05, label="NMS threshold")
	input_size = gr.Radio([320, 416, 512], value=416, label="Input size")

	btn = gr.Button("Detect")
	out_table = gr.Dataframe(
	headers=["label", "score", "x1", "y1", "x2", "y2"],
	datatype=["str", "number", "number", "number", "number", "number"],
	label="Detections",
	interactive=False,
	)
	out_json = gr.Code(label="Raw JSON", language="json")

	btn.click(
	predict_ui,
	inputs=[inp, score_thr, nms_thr, input_size],
	outputs=[out_img, out_table, out_json],
	api_name="detect", # <-- add this
	)

	demo.queue().launch(show_error=True)