Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import json | |
| import base64 | |
| import numpy as np | |
| import cv2 | |
| import onnxruntime as ort | |
| import gradio as gr | |
| from PIL import Image | |
| # ----------------------------- | |
| # Config | |
| # ----------------------------- | |
| CLASS_NAMES = [ | |
| "Rat", "Ox", "Tiger", "Hare", "Dragon", "Snake", | |
| "Horse", "Ram", "Monkey", "Bird", "Dog", "Boar" | |
| ] | |
| # Put the model under a writable/cache folder. | |
| # HF Spaces provides a persistent cache mount in many runtimes; at worst it re-downloads on restart. | |
| CACHE_DIR = os.environ.get("HF_HOME", ".") | |
| MODEL_DIR = os.path.join(CACHE_DIR, "models", "yolox_nano") | |
| MODEL_NAME = "yolox_nano.onnx" | |
| MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME) | |
| # Your Google Drive direct download | |
| MODEL_DOWNLOAD_URL = "https://drive.google.com/uc?id=1xeh3rrIhSqH0BAv7jLCGPZf1waQ4tlXY" | |
| def ensure_model(): | |
| """Download the ONNX model once if not present.""" | |
| os.makedirs(MODEL_DIR, exist_ok=True) | |
| if os.path.exists(MODEL_PATH) and os.path.getsize(MODEL_PATH) > 1024 * 1024: | |
| return MODEL_PATH | |
| print(f"[Model] Downloading to: {MODEL_PATH}") | |
| import gdown | |
| # If you suspect corruption, delete then re-download | |
| if os.path.exists(MODEL_PATH): | |
| try: | |
| os.remove(MODEL_PATH) | |
| except Exception: | |
| pass | |
| gdown.download(MODEL_DOWNLOAD_URL, MODEL_PATH, quiet=False) | |
| return MODEL_PATH | |
| # ========================================== | |
| # YOLOX INFERENCE CLASS (ported from your code) | |
| # ========================================== | |
| class YoloxONNX(object): | |
| def __init__(self, model_path, class_names, score_thr=0.6, nms_thr=0.45, input_shape=(416, 416)): | |
| self.input_shape = input_shape | |
| self.score_thr = score_thr | |
| self.nms_thr = nms_thr | |
| self.class_names = class_names | |
| print(f"[Model] Loading ONNX from {model_path}...") | |
| self.session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) | |
| self.input_name = self.session.get_inputs()[0].name | |
| def inference(self, image_bgr): | |
| input_image, ratio = self.preprocess(image_bgr, self.input_shape) | |
| ort_inputs = {self.input_name: input_image[None, :, :, :]} | |
| outputs = self.session.run(None, ort_inputs) | |
| predictions = outputs[0][0] # batch=1 | |
| boxes_xyxy, scores, class_ids = self.postprocess(predictions, ratio) | |
| return boxes_xyxy, scores, class_ids | |
| def preprocess(self, image, input_size): | |
| if len(image.shape) == 3: | |
| padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 | |
| else: | |
| padded_img = np.ones(input_size, dtype=np.uint8) * 114 | |
| r = min(input_size[0] / image.shape[0], input_size[1] / image.shape[1]) | |
| resized_img = cv2.resize( | |
| image, | |
| (int(image.shape[1] * r), int(image.shape[0] * r)), | |
| interpolation=cv2.INTER_LINEAR, | |
| ) | |
| padded_img[: int(image.shape[0] * r), : int(image.shape[1] * r)] = resized_img | |
| image = padded_img.transpose(2, 0, 1) # CHW | |
| image = np.ascontiguousarray(image, dtype=np.float32) | |
| return image, r | |
| def postprocess(self, predictions, ratio): | |
| boxes = predictions[:, :4] | |
| scores = predictions[:, 4:5] * predictions[:, 5:] | |
| boxes_xyxy = np.ones_like(boxes) | |
| boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0 | |
| boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0 | |
| boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0 | |
| boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0 | |
| boxes_xyxy /= ratio | |
| dets = self.multiclass_nms(boxes_xyxy, scores, nms_thr=self.nms_thr, score_thr=self.score_thr) | |
| if dets is None: | |
| return [], [], [] | |
| final_boxes = dets[:, :4] | |
| final_scores = dets[:, 4] | |
| final_cls_inds = dets[:, 5] | |
| return final_boxes, final_scores, final_cls_inds | |
| def multiclass_nms(self, boxes, scores, nms_thr, score_thr): | |
| final_dets = [] | |
| num_classes = scores.shape[1] | |
| for cls_ind in range(num_classes): | |
| cls_scores = scores[:, cls_ind] | |
| valid_score_mask = cls_scores > score_thr | |
| if valid_score_mask.sum() == 0: | |
| continue | |
| valid_boxes = boxes[valid_score_mask] | |
| valid_scores = cls_scores[valid_score_mask] | |
| keep = self.nms_cpu(valid_boxes, valid_scores, nms_thr) | |
| for i in keep: | |
| final_dets.append([ | |
| valid_boxes[i][0], valid_boxes[i][1], valid_boxes[i][2], valid_boxes[i][3], | |
| valid_scores[i], cls_ind | |
| ]) | |
| if len(final_dets) == 0: | |
| return None | |
| return np.array(final_dets) | |
| def nms_cpu(self, boxes, scores, nms_thr): | |
| x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] | |
| areas = (x2 - x1 + 1) * (y2 - y1 + 1) | |
| order = scores.argsort()[::-1] | |
| keep = [] | |
| while order.size > 0: | |
| i = order[0] | |
| keep.append(i) | |
| xx1 = np.maximum(x1[i], x1[order[1:]]) | |
| yy1 = np.maximum(y1[i], y1[order[1:]]) | |
| xx2 = np.minimum(x2[i], x2[order[1:]]) | |
| yy2 = np.minimum(y2[i], y2[order[1:]]) | |
| w = np.maximum(0.0, xx2 - xx1 + 1) | |
| h = np.maximum(0.0, yy2 - yy1 + 1) | |
| inter = w * h | |
| ovr = inter / (areas[i] + areas[order[1:]] - inter) | |
| inds = np.where(ovr <= nms_thr)[0] | |
| order = order[inds + 1] | |
| return keep | |
| # ----------------------------- | |
| # Load model once at startup | |
| # ----------------------------- | |
| _detector = None | |
| def get_detector(score_thr=0.6, nms_thr=0.45, input_size=416): | |
| global _detector | |
| if _detector is None: | |
| path = ensure_model() | |
| _detector = YoloxONNX( | |
| path, | |
| CLASS_NAMES, | |
| score_thr=float(score_thr), | |
| nms_thr=float(nms_thr), | |
| input_shape=(int(input_size), int(input_size)), | |
| ) | |
| return _detector | |
| def draw_detections(img_bgr, results): | |
| out = img_bgr.copy() | |
| for r in results: | |
| x1, y1, x2, y2 = map(int, r["box"]) | |
| label = r["label"] | |
| score = r["score"] | |
| cv2.rectangle(out, (x1, y1), (x2, y2), (0, 255, 0), 2) | |
| cv2.putText( | |
| out, | |
| f"{label} {score:.2f}", | |
| (x1, max(0, y1 - 8)), | |
| cv2.FONT_HERSHEY_SIMPLEX, | |
| 0.6, | |
| (0, 255, 0), | |
| 2, | |
| cv2.LINE_AA, | |
| ) | |
| return out | |
| def predict_ui(image_pil, score_thr, nms_thr, input_size): | |
| if image_pil is None: | |
| return None, [], "{}" | |
| # Gradio gives PIL (RGB). Convert to BGR for OpenCV/your pipeline | |
| img_rgb = np.array(image_pil) | |
| img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) | |
| detector = get_detector(score_thr=score_thr, nms_thr=nms_thr, input_size=input_size) | |
| t0 = time.time() | |
| boxes, scores, class_ids = detector.inference(img_bgr) | |
| dt_ms = (time.time() - t0) * 1000 | |
| results = [] | |
| for i in range(len(boxes)): | |
| box = boxes[i] | |
| score = float(scores[i]) | |
| label_id = int(class_ids[i]) | |
| label = CLASS_NAMES[label_id] | |
| results.append({ | |
| "label": label, | |
| "score": score, | |
| "box": [float(box[0]), float(box[1]), float(box[2]), float(box[3])] | |
| }) | |
| annotated_bgr = draw_detections(img_bgr, results) | |
| annotated_rgb = cv2.cvtColor(annotated_bgr, cv2.COLOR_BGR2RGB) | |
| annotated_pil = Image.fromarray(annotated_rgb) | |
| table = [ | |
| [r["label"], r["score"], *r["box"]] | |
| for r in results | |
| ] | |
| meta = {"inference_ms": round(dt_ms, 2), "num_detections": len(results), "results": results} | |
| return annotated_pil, table, json.dumps(meta, indent=2) | |
| with gr.Blocks(title="Shinobi Hand Seal Detector (YOLOX Nano ONNX)") as demo: | |
| gr.Markdown( | |
| "Upload an image. The Space runs YOLOX Nano (ONNXRuntime CPU) and returns detected hand seals." | |
| ) | |
| with gr.Row(): | |
| inp = gr.Image(type="pil", label="Input Image") | |
| out_img = gr.Image(type="pil", label="Annotated Output") | |
| with gr.Row(): | |
| score_thr = gr.Slider(0.1, 0.95, value=0.6, step=0.05, label="Score threshold") | |
| nms_thr = gr.Slider(0.1, 0.95, value=0.45, step=0.05, label="NMS threshold") | |
| input_size = gr.Radio([320, 416, 512], value=416, label="Input size") | |
| btn = gr.Button("Detect") | |
| out_table = gr.Dataframe( | |
| headers=["label", "score", "x1", "y1", "x2", "y2"], | |
| datatype=["str", "number", "number", "number", "number", "number"], | |
| label="Detections", | |
| interactive=False, | |
| ) | |
| out_json = gr.Code(label="Raw JSON", language="json") | |
| btn.click( | |
| predict_ui, | |
| inputs=[inp, score_thr, nms_thr, input_size], | |
| outputs=[out_img, out_table, out_json], | |
| api_name="detect", # <-- add this | |
| ) | |
| demo.queue().launch(show_error=True) |