Spaces:

TeeA
/

Datum-3D

Sleeping

App Files Files Community

TeeA commited on May 27, 2025

Commit

d6cfb5e

1 Parent(s): 9a19e9e

refactor

Browse files

Files changed (5) hide show

app.py +563 -541
encode_image.py +29 -0
llm_service.py +258 -0
mv_utils_zs.py +483 -0
string_utils.py +69 -0

app.py CHANGED Viewed

@@ -1,352 +1,116 @@
-# app.py
-import os
-import subprocess
 import asyncio
-import base64
-import io
 import random
-import string
-import re
-import zipfile
-import xml.etree.ElementTree as ET
 import tempfile
-from urllib.parse import urlparse
-from typing import Tuple, Dict, Any, Union, List
-import gradio as gr
-import trimesh
 import numpy as np
 import torch
-from openai import AsyncOpenAI
-from PIL import Image
 from loguru import logger
 from sklearn.metrics.pairwise import cosine_similarity
 from torch import Tensor
-import torchvision.transforms.functional as TF
-from torch_scatter import scatter
-from llama_index.embeddings.clip import ClipEmbedding
-from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode
-# Tải API Key từ biến môi trường (sẽ được set trong Hugging Face Secrets)
-OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
-# ==============================================================================
-# PHẦN 1: CÁC HÀM TIỆN ÍCH VÀ KHAI BÁO (TỪ NOTEBOOK)
-# ==============================================================================
-# Các hằng số định dạng file
 GRADIO_3D_MODEL_DEFAULT_FORMAT = [".obj", ".glb", ".gltf", ".stl", ".splat", ".ply"]
 USER_REQUIRE_FORMAT = [".3dxml", ".step"]
 FREECAD_LOW_LEVEL_FORMAT = [".step", ".igs", ".iges"]
 FREECAD_NATIVE_FORMAT = [".fcstd"]
-VALID_FILE_TYPES = list(
-    set(
-        GRADIO_3D_MODEL_DEFAULT_FORMAT
-        + USER_REQUIRE_FORMAT
-        + FREECAD_NATIVE_FORMAT
-        + FREECAD_LOW_LEVEL_FORMAT
-    )
-)
-VALID_FILE_TYPES = VALID_FILE_TYPES + [t.upper() for t in VALID_FILE_TYPES]
-# Realistic Projection Parameters (from mv_utils_zs.py)
-TRANS = -1.5
-params = {
-    "maxpoolz": 1,
-    "maxpoolxy": 7,
-    "maxpoolpadz": 0,
-    "maxpoolpadxy": 2,
-    "convz": 1,
-    "convxy": 3,
-    "convsigmaxy": 3,
-    "convsigmaz": 1,
-    "convpadz": 0,
-    "convpadxy": 1,
-    "imgbias": 0.0,
-    "depth_bias": 0.2,
-    "obj_ratio": 0.8,
-    "bg_clr": 0.0,
-    "resolution": 122,
-    "depth": 8,
-    "grid_height": 64,
-    "grid_width": 64,
-}
-def get2DGaussianKernel(ksize, sigma=0):
-    center = ksize // 2
-    xs = np.arange(ksize, dtype=np.float32) - center
-    kernel1d = np.exp(-(xs**2) / (2 * sigma**2))
-    kernel = kernel1d[..., None] @ kernel1d[None, ...]
-    kernel = torch.from_numpy(kernel)
-    kernel = kernel / kernel.sum()
-    return kernel
-def get3DGaussianKernel(ksize, depth, sigma=2, zsigma=2):
-    kernel2d = get2DGaussianKernel(ksize, sigma)
-    zs = np.arange(depth, dtype=np.float32) - depth // 2
-    zkernel = np.exp(-(zs**2) / (2 * zsigma**2))
-    kernel3d = np.repeat(kernel2d[None, :, :], depth, axis=0) * zkernel[:, None, None]
-    kernel3d = kernel3d / torch.sum(kernel3d)
-    return kernel3d
-def euler2mat(angle):
-    if len(angle.size()) == 1:
-        x, y, z = angle[0], angle[1], angle[2]
-        _dim, _view = 0, [3, 3]
-    else:
-        b, _ = angle.size()
-        x, y, z = angle[:, 0], angle[:, 1], angle[:, 2]
-        _dim, _view = 1, [b, 3, 3]
-    zero, one = z.detach() * 0, z.detach() * 0 + 1
-    cosz, sinz = torch.cos(z), torch.sin(z)
-    zmat = torch.stack(
-        [cosz, -sinz, zero, sinz, cosz, zero, zero, zero, one], dim=_dim
-    ).reshape(_view)
-    cosy, siny = torch.cos(y), torch.sin(y)
-    ymat = torch.stack(
-        [cosy, zero, siny, zero, one, zero, -siny, zero, cosy], dim=_dim
-    ).reshape(_view)
-    cosx, sinx = torch.cos(x), torch.sin(x)
-    xmat = torch.stack(
-        [one, zero, zero, zero, cosx, -sinx, zero, sinx, cosx], dim=_dim
-    ).reshape(_view)
-    return xmat @ ymat @ zmat
-def points2grid(points, resolution=params["resolution"], depth=params["depth"]):
-    batch, pnum, _ = points.shape
-    pmax, pmin = points.max(dim=1)[0], points.min(dim=1)[0]
-    pcent = (pmax + pmin) / 2
-    pcent = pcent[:, None, :]
-    prange = (pmax - pmin).max(dim=-1)[0][:, None, None]
-    points = (points - pcent) / prange * 2.0
-    points[:, :, :2] = points[:, :, :2] * params["obj_ratio"]
-    _x = (points[:, :, 0] + 1) / 2 * resolution
-    _y = (points[:, :, 1] + 1) / 2 * resolution
-    _z = (
-        ((points[:, :, 2] + 1) / 2 + params["depth_bias"])
-        / (1 + params["depth_bias"])
-        * (depth - 2)
-    )
-    _x.ceil_(), _y.ceil_()
-    z_int = _z.ceil()
-    _x, _y, _z = (
-        torch.clip(_x, 1, resolution - 2),
-        torch.clip(_y, 1, resolution - 2),
-        torch.clip(_z, 1, depth - 2),
-    )
-    coordinates = z_int * resolution * resolution + _y * resolution + _x
-    grid = (
-        torch.ones([batch, depth, resolution, resolution], device=points.device).view(
-            batch, -1
-        )
-        * params["bg_clr"]
-    )
-    grid = scatter(_z, coordinates.long(), dim=1, out=grid, reduce="max")
-    grid = grid.reshape((batch, depth, resolution, resolution)).permute((0, 1, 3, 2))
-    return grid
-class Grid2Image(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.maxpool = torch.nn.MaxPool3d(
-            (params["maxpoolz"], params["maxpoolxy"], params["maxpoolxy"]),
-            stride=1,
-            padding=(
-                params["maxpoolpadz"],
-                params["maxpoolpadxy"],
-                params["maxpoolpadxy"],
-            ),
-        )
-        self.conv = torch.nn.Conv3d(
-            1,
-            1,
-            kernel_size=(params["convz"], params["convxy"], params["convxy"]),
-            stride=1,
-            padding=(params["convpadz"], params["convpadxy"], params["convpadxy"]),
-            bias=True,
-        )
-        kn3d = get3DGaussianKernel(
-            params["convxy"],
-            params["convz"],
-            sigma=params["convsigmaxy"],
-            zsigma=params["convsigmaz"],
-        )
-        self.conv.weight.data = torch.Tensor(kn3d).repeat(1, 1, 1, 1, 1)
-        self.conv.bias.data.fill_(0)
-    def forward(self, x):
-        x = self.maxpool(x.unsqueeze(1))
-        x = self.conv(x)
-        img = torch.max(x, dim=2)[0]
-        img = img / torch.max(torch.max(img, dim=-1)[0], dim=-1)[0][:, :, None, None]
-        img = 1 - img
-        img = img.repeat(1, 3, 1, 1)
-        return img
-class Realistic_Projection:
-    def __init__(self):
-        _views = np.asarray([
-            [[np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[3 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[5 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[7 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[0, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[np.pi, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[3 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[0, -np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]],
-            [[0, np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]],
-        ])
-        _views_bias = np.asarray([
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 15, 0], [-0.5, 0, TRANS]],
-            [[0, np.pi / 15, 0], [-0.5, 0, TRANS]],
-        ])
-        angle, angle2 = (
-            torch.tensor(_views[:, 0, :]).float(),
-            torch.tensor(_views_bias[:, 0, :]).float(),
-        )
-        self.rot_mat, self.rot_mat2 = (
-            euler2mat(angle).transpose(1, 2),
-            euler2mat(angle2).transpose(1, 2),
-        )
-        self.translation = torch.tensor(_views[:, 1, :]).float().unsqueeze(1)
-        self.grid2image = Grid2Image()
-    def get_img(self, points):
-        b, _, _ = points.shape
-        v = self.translation.shape[0]
-        _points = self.point_transform(
-            torch.repeat_interleave(points, v, dim=0),
-            self.rot_mat.repeat(b, 1, 1),
-            self.rot_mat2.repeat(b, 1, 1),
-            self.translation.repeat(b, 1, 1),
-        )
-        grid = points2grid(
-            _points, resolution=params["resolution"], depth=params["depth"]
-        ).squeeze()
-        return self.grid2image(grid)
-    @staticmethod
-    def point_transform(points, rot_mat, rot_mat2, translation):
-        rot_mat, rot_mat2, translation = (
-            rot_mat.to(points.device),
-            rot_mat2.to(points.device),
-            translation.to(points.device),
-        )
-        points = torch.matmul(points, rot_mat)
-        points = torch.matmul(points, rot_mat2)
-        return points - translation
-# OpenAI Service
-class OpenAIService:
-    def __init__(self):
-        self.model_name = "gpt-4o"
-        self.temperature = 0.3
-        self.client = AsyncOpenAI(api_key=OPENAI_API_KEY)
-    @staticmethod
-    def encode_image(image: Union[str, np.ndarray]) -> str:
-        if isinstance(image, str):
-            with open(image, "rb") as image_file:
-                return base64.b64encode(image_file.read()).decode("utf-8")
-        elif isinstance(image, np.ndarray):
-            _, buffer = cv2.imencode(".jpg", image)
-            return base64.b64encode(buffer).decode("utf-8")
-        raise TypeError("Input must be a file path or a NumPy array.")
-    async def chat_with_image(
-        self, prompt: str, image: str, retry_left: int = 3
-    ) -> str:
-        base64_image = self.encode_image(image=image)
-        model_kwargs = {
-            "model": self.model_name,
-            "temperature": self.temperature,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "text", "text": prompt},
-                        {
-                            "type": "image_url",
-                            "image_url": {
-                                "url": f"data:image/jpeg;base64,{base64_image}"
-                            },
-                        },
-                    ],
-                }
-            ],
-        }
-        try:
-            response = await self.client.chat.completions.create(**model_kwargs)
-            return response.choices[0].message.content
-        except Exception as e:
-            if retry_left > 0:
-                logger.warning(f"OpenAI API failed: {e}. Retrying.")
-                await asyncio.sleep(1)
-                return await self.chat_with_image(prompt, image, retry_left - 1)
-            logger.error(f"OpenAI API failed: {e}. Returning empty string.")
-            return ""
-# ==============================================================================
-# PHẦN 2: LOGIC XỬ LÝ 3D (TỪ NOTEBOOK)
-# ==============================================================================
-# Khởi tạo các model và service
-llm_service = OpenAIService()
-pc_views = Realistic_Projection()
-clip_embedding_model = ClipEmbedding(embed_batch_size=1536)
-text_embedding_model = OpenAIEmbedding(
-    mode=OpenAIEmbeddingMode.TEXT_SEARCH_MODE,
-    model="text-embedding-3-small",
-    api_key=OPENAI_API_KEY,
-    dimensions=1536,
-)
-# Chuyển đổi file STEP/FCStd sang OBJ
 def convert_step_to_obj_with_freecad(step_path, obj_path):
-    freecad_executable = "/usr/bin/freecadcmd"
     _, ext = os.path.splitext(step_path)
     ext = ext.lower()
-    script_template = ""
     if ext in FREECAD_LOW_LEVEL_FORMAT:
-        script_template = "import FreeCAD, Part, Mesh; doc = FreeCAD.newDocument(); shape = Part.read('{step}'); obj = doc.addObject('Part::Feature', 'MyPart'); obj.Shape = shape; doc.recompute(); Mesh.export([obj], '{obj}')"
     elif ext in FREECAD_NATIVE_FORMAT:
-        script_template = "import FreeCAD, Mesh; doc = FreeCAD.open('{step}'); to_export = [o for o in doc.Objects if hasattr(o, 'Shape')]; Mesh.export(to_export, '{obj}')"
     else:
-        raise Exception(f"Unsupported format for conversion: {ext}")
-    python_script = script_template.format(step=step_path, obj=obj_path)
     command = [freecad_executable, "-c", python_script]
     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     stdout, stderr = process.communicate()
-    if process.returncode != 0:
-        logger.error(
-            f"FreeCAD conversion failed for {step_path}. Stderr: {stderr.decode()}"
-        )
 def convert_to_obj(file: str) -> str:
     if file is None:
         return None
     logger.info(f"Converting {file} to .obj")
     prefix_path, ext = os.path.splitext(file)
     ext = ext.lower()
     if ext in FREECAD_LOW_LEVEL_FORMAT + FREECAD_NATIVE_FORMAT:
@@ -355,61 +119,364 @@ def convert_to_obj(file: str) -> str:
             convert_step_to_obj_with_freecad(file, response_path)
         return response_path
     elif ext in GRADIO_3D_MODEL_DEFAULT_FORMAT:
-        return file
-    raise Exception(f"Cannot convert file type {ext}")
-# Render ảnh chiều sâu
 def render_depth_images_from_obj(obj_path: str, imsize: int = 512) -> List[np.ndarray]:
     mesh = trimesh.load_mesh(obj_path)
-    points: Tensor = torch.tensor(mesh.vertices).float().unsqueeze(0)
     images: Tensor = pc_views.get_img(points)
     images = torch.nn.functional.interpolate(
         images, size=(imsize, imsize), mode="bilinear", align_corners=True
     )
-    return [np.array(TF.to_pil_image(img.cpu())) for img in images]
 def aggregate_images(
     np_images: list[np.ndarray], n_rows: int = 2, n_cols: int = 5
 ) -> np.ndarray:
-    img_h, img_w, channels = np_images[0].shape
-    agg_img = np.zeros(
-        (img_h * n_rows, img_w * n_cols, channels), dtype=np_images[0].dtype
     )
     for i, img in enumerate(np_images):
-        row, col = i // n_cols, i % n_cols
-        agg_img[row * img_h : (row + 1) * img_h, col * img_w : (col + 1) * img_w] = img
-    return agg_img
-# Tạo mô tả từ ảnh
-DESCRIPTION_AGGREGATED_DEPTH_MAP_PROMPT = "You are a manufacturing expert. Given these multi-view depth maps, extract all possible special features relevant to manufacturing. Provide a detailed, structured analysis covering geometry, materials, manufacturing processes, and assembly features. If a feature is not visible, state 'Not visible'."
 async def generate_description_from_aggregated_depth_map(np_image: np.ndarray) -> str:
-    return await llm_service.chat_with_image(
-        prompt=DESCRIPTION_AGGREGATED_DEPTH_MAP_PROMPT, image=np_image
-    )
 async def aget_image_embedding_from_np_image(np_image: np.ndarray):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
-        Image.fromarray(np_image).save(temp_file.name)
-        image_embedding = await clip_embedding_model.aget_image_embedding(
-            temp_file.name
-        )
-    os.remove(temp_file.name)
     return image_embedding
-# Embedding 3D Object
 async def embedding_3d_object(obj_path: str) -> Dict[str, Any]:
     depth_images = render_depth_images_from_obj(obj_path=obj_path)
     aggregated_image = aggregate_images(depth_images)
     description = await generate_description_from_aggregated_depth_map(
         np_image=aggregated_image
     )
     image_embedding = await aget_image_embedding_from_np_image(
         np_image=aggregated_image
     )
@@ -421,250 +488,205 @@ async def embedding_3d_object(obj_path: str) -> Dict[str, Any]:
     }
-# Trích xuất metadata
-def extract_step_metadata(file_path):
-    metadata = {}
-    try:
-        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
-            content = f.read()
-        desc_match = re.search(
-            r"FILE_DESCRIPTION\s*\(\s*\((.*?)\),\s*'(.*?)'\);", content, re.DOTALL
-        )
-        if desc_match:
-            metadata["Description"] = desc_match.group(1).replace("'", "")
-        name_match = re.search(
-            r"FILE_NAME\s*\(\s*'(.*?)',.*?,'(.*?)'", content, re.DOTALL
-        )
-        if name_match:
-            metadata["FileName"], metadata["OriginatingSystem"] = (
-                name_match.group(1),
-                name_match.group(2),
-            )
-    except Exception as e:
-        logger.error(f"Failed to read STEP file: {e}")
-    return metadata
-def dict_to_markdown(metadata: dict) -> str:
-    return "\\n".join(f"{key}: {value}" for key, value in metadata.items())
-def parse_3d_file(original_filepath: str):
-    if original_filepath is None:
-        return "No file selected."
-    if original_filepath.lower().endswith((".step")):
-        meta = extract_step_metadata(original_filepath)
-        return dict_to_markdown(meta) if meta else "No metadata found in STEP file."
-    logger.warning(f"No metadata parser for file {original_filepath}")
-    return "No metadata found."
-# ==============================================================================
-# PHẦN 3: LOGIC CỦA GRADIO APP
-# ==============================================================================
 async def accumulate_and_embedding(input_files, file_list, embedding_dict):
     if not isinstance(input_files, list):
         input_files = [input_files]
-    new_files = [
-        f.name for f in input_files if f.name not in [fi.name for fi in file_list]
-    ]
-    for file_path in new_files:
-        logger.info(f"Processing new upload file: {file_path}")
-        try:
-            obj_path = convert_to_obj(file_path)
-            embeddings = await embedding_3d_object(obj_path)
-            if obj_path not in embedding_dict:
-                embedding_dict[obj_path] = {}
-            embedding_dict[obj_path].update(embeddings)
-        except Exception as e:
-            logger.error(f"Failed to process {file_path}: {e}")
-            gr.Warning(f"Could not process file: {os.path.basename(file_path)}")
-    all_file_paths = [f.name for f in input_files]
-    return (
-        input_files,
-        gr.update(
-            choices=all_file_paths, value=all_file_paths[-1] if all_file_paths else None
-        ),
-        embedding_dict,
-    )
-def render_3D_object(filepath) -> Tuple[str, str]:
-    if not filepath:
-        return None, None
-    try:
-        obj_path = convert_to_obj(filepath)
-        return obj_path, filepath
-    except Exception as e:
-        logger.error(f"Failed to render {filepath}: {e}")
-        gr.Warning(f"Could not render file: {os.path.basename(filepath)}")
-        return None, None
-def render_3D_metadata(
-    original_filepath: str, obj_path: str, embedding_dict: dict
-) -> Tuple[str, str]:
-    if not original_filepath or not obj_path:
-        return "No file selected.", "No description found."
-    metadata = parse_3d_file(original_filepath=original_filepath)
-    description = embedding_dict.get(obj_path, {}).get(
-        "description", "Description not generated yet."
-    )
-    return metadata, description
-def find_top_k_similar(query_embedding, embedding_dict, key, top_k=4):
-    valid_items = [
-        (path, data[key]) for path, data in embedding_dict.items() if key in data
-    ]
-    if not valid_items:
-        gr.Warning("No embeddings available for search.")
-        return [None] * top_k + ["-"] * top_k
-    filepaths = [item[0] for item in valid_items]
-    feature_matrix = np.array([item[1] for item in valid_items])
-    similarities = cosine_similarity(query_embedding.reshape(1, -1), feature_matrix)[0]
-    scores = sorted(
-        list(zip(filepaths, similarities)), key=lambda x: x[1], reverse=True
-    )
-    results = [s[0] for s in scores[:top_k]]
-    result_names = [os.path.basename(s[0]) for s in scores[:top_k]]
-    # Pad with Nones if less than top_k results
-    while len(results) < top_k:
-        results.append(None)
-        result_names.append("-")
-    return results + result_names
-def search_3D_similarity(filepath: str, embedding_dict: dict, top_k: int = 4):
-    if (
-        not filepath
-        or filepath not in embedding_dict
-        or "image_embedding" not in embedding_dict[filepath]
     ):
-        gr.Warning("Please select a file with a generated embedding first.")
-        return [None] * top_k + ["-"] * top_k
-    query_embedding = np.array(embedding_dict[filepath]["image_embedding"])
-    # Exclude the query file itself from the search
-    search_dict = {k: v for k, v in embedding_dict.items() if k != filepath}
-    return find_top_k_similar(query_embedding, search_dict, "image_embedding", top_k)
-def query_3D_object(query: str, embedding_dict: dict, top_k: int = 4):
-    if not query.strip():
-        gr.Warning("Query cannot be empty!")
-        return [None] * top_k + ["-"] * top_k
-    if len(embedding_dict) < 1:
-        gr.Warning("Please upload and process at least one 3D file.")
-        return [None] * top_k + ["-"] * top_k
-    query_embedding = np.array(text_embedding_model.get_text_embedding(text=query))
-    return find_top_k_similar(query_embedding, embedding_dict, "text_embedding", top_k)
-# ==============================================================================
-# PHẦN 4: GIAO DIỆN GRADIO
-# ==============================================================================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Demo Tìm kiếm và Truy vấn CAD 3D")
-    gr.Markdown(
-        "Tải lên các file 3D (STEP, FCStd, OBJ, etc.), hệ thống sẽ tự động 'hiểu' và cho phép bạn tìm kiếm theo hình dạng hoặc mô tả văn bản."
     )
-    # State variables
-    file_state = gr.State([])
-    embedding_store = gr.State({})
-    with gr.Row():
-        with gr.Column(scale=1):
-            file_input = gr.File(
-                file_count="multiple",
-                label="1. Tải lên File 3D",
-                file_types=VALID_FILE_TYPES,
-            )
-            file_dropdown = gr.Dropdown(
-                label="2. Chọn File để xem và tìm kiếm", interactive=True
-            )
-            sim_button = gr.Button("🔍 Tìm kiếm Tương tự", variant="primary")
-            query_input = gr.Textbox(
-                label="Hoặc, truy vấn bằng văn bản",
-                placeholder="ví dụ: một bộ phận có hai lỗ xuyên...",
-            )
-            query_button = gr.Button("💬 Tìm kiếm theo Văn bản", variant="primary")
-        with gr.Column(scale=2):
-            gr.Markdown("### **Trình xem và Thông tin Chi tiết**")
-            model_render = gr.Model3D(label="Mô hình 3D", height=400, interactive=False)
-            model_hidden_filepath = gr.Textbox(visible=False)
-            original_hidden_filepath = gr.Textbox(visible=False)
-            with gr.Accordion("📝 Mô tả & Metadata", open=False):
-                description_render = gr.Textbox(label="Mô tả (tạo bởi AI)", lines=8)
-                metadata_render = gr.Textbox(
-                    label="Metadata (trích xuất từ file)", lines=4
-                )
     with gr.Row():
-        gr.Markdown("---")
-        gr.Markdown("### **Kết quả Tìm kiếm**")
     with gr.Row():
         with gr.Column():
-            gr.Markdown("#### Tương tự về Hình dạng")
             with gr.Row():
-                model_s_1 = gr.Model3D(label="Top 1", interactive=False)
-                model_s_2 = gr.Model3D(label="Top 2", interactive=False)
             with gr.Row():
-                model_s_3 = gr.Model3D(label="Top 3", interactive=False)
-                model_s_4 = gr.Model3D(label="Top 4", interactive=False)
         with gr.Column():
-            gr.Markdown("#### Tương tự về Văn bản")
             with gr.Row():
-                model_q_1 = gr.Model3D(label="Top 1", interactive=False)
-                model_q_2 = gr.Model3D(label="Top 2", interactive=False)
             with gr.Row():
-                model_q_3 = gr.Model3D(label="Top 3", interactive=False)
-                model_q_4 = gr.Model3D(label="Top 4", interactive=False)
-    # Event Handlers
-    file_input.upload(
         fn=accumulate_and_embedding,
         inputs=[file_input, file_state, embedding_store],
         outputs=[file_state, file_dropdown, embedding_store],
     )
-    file_dropdown.change(
-        fn=render_3D_object,
-        inputs=file_dropdown,
-        outputs=[model_render, original_hidden_filepath],
-    ).then(
-        fn=render_3D_metadata,
-        inputs=[original_hidden_filepath, model_render, embedding_store],
-        outputs=[metadata_render, description_render],
     )
     sim_button.click(
-        fn=search_3D_similarity,
-        inputs=[model_render, embedding_store],
-        outputs=[
             model_s_1,
             model_s_2,
             model_s_3,
             model_s_4,
-        ],  # Chỉ cần cập nhật model, không cần button
     )
-    query_button.click(
-        fn=query_3D_object,
-        inputs=[query_input, embedding_store],
-        outputs=[model_q_1, model_q_2, model_q_3, model_q_4],
     )
 if __name__ == "__main__":
-    demo.launch()

 import asyncio
+import os
+import platform
 import random
+import subprocess  # used to connect to FreeCAD via terminal sub process
+import sys
 import tempfile
+from typing import Any, Dict, List, Tuple
+import gradio as gr  # demo with gradio
 import numpy as np
 import torch
+import torchvision.transforms.functional as TF
+import trimesh
+from llama_index.embeddings.clip import ClipEmbedding
+from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode
 from loguru import logger
+from PIL import Image
 from sklearn.metrics.pairwise import cosine_similarity
 from torch import Tensor
+from llm_service import LLMService
+from mv_utils_zs import Realistic_Projection
+os.environ.get("GRADIO_TEMP_DIR", "gradio_cache")  # You must set it in `.env` file also
+os_name = platform.system()
+if os_name == "Linux":
+    print("Running on Linux")
+elif os_name == "Darwin":
+    print("Running on macOS")
+else:
+    print(f"Running on an unsupported OS: {os_name}")
+# The Gradio 3D Model component default accept
 GRADIO_3D_MODEL_DEFAULT_FORMAT = [".obj", ".glb", ".gltf", ".stl", ".splat", ".ply"]
 USER_REQUIRE_FORMAT = [".3dxml", ".step"]
 FREECAD_LOW_LEVEL_FORMAT = [".step", ".igs", ".iges"]
 FREECAD_NATIVE_FORMAT = [".fcstd"]
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+####################################################################################################################
+# Transform high-level to low-level
+####################################################################################################################
+# 3D Component of Gradio only allow some kind of format to render in the UI. We need to transform if need it.
 def convert_step_to_obj_with_freecad(step_path, obj_path):
+    # Path to the FreeCAD executable
+    global os_name
+    if os_name == "Linux":
+        freecad_executable = "/usr/bin/freecadcmd"  # freecadcmd
+    elif os_name == "Darwin":
+        freecad_executable = "/Applications/FreeCAD.app/Contents/MacOS/FreeCAD"
+    # Python script to be executed by FreeCAD
     _, ext = os.path.splitext(step_path)
     ext = ext.lower()
     if ext in FREECAD_LOW_LEVEL_FORMAT:
+        python_script = """
+import FreeCAD
+import Part
+import Mesh
+doc = FreeCAD.newDocument()
+shape = Part.read("{step_path}")
+obj = doc.addObject("Part::Feature", "MyPart")
+obj.Shape = shape
+doc.recompute()
+Mesh.export([obj], "{obj_path}")
+    """.format(step_path=step_path, obj_path=obj_path)
     elif ext in FREECAD_NATIVE_FORMAT:
+        python_script = """
+import FreeCAD
+import Part
+import Mesh
+doc = FreeCAD.open("{step_path}")
+to_export = [o for o in doc.Objects if hasattr(o, 'Shape')]
+Mesh.export(to_export, "{obj_path}")
+    """.format(step_path=step_path, obj_path=obj_path)
     else:
+        logger.error(f"Not support {ext} format")
+        raise Exception(f"Not support {ext} format")
+    # Command to run FreeCAD in headless mode with the provided Python script
     command = [freecad_executable, "-c", python_script]
+    # Run the command using subprocess
     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    # Capture the output and errors
     stdout, stderr = process.communicate()
+    return stdout.decode(), stderr.decode()
+# input_path = "/Users/tridoan/Spartan/Datum/service-ai/poc/resources/notebooks/3d_files/Switches/TS6-THT_H-5.0.step" # ok
+# input_path = "/Users/tridoan/Spartan/Datum/service-ai/poc/resources/notebooks/3d_files/engrenagens-5.snapshot.6/Engre_con_Z16_mod_1_5-Body.stl" # ok
+# input_path = "/Users/tridoan/Spartan/Datum/service-ai/poc/resources/notebooks/3d_files/nema-17-stepper-motors-coaxial-60-48-39-23mm-1.snapshot.3/NEMA 17 Stepper Motor 23mm-NEMA 17 Stepper Motor 23mm.step" # ok
+# input_path = "/Users/tridoan/Spartan/Datum/service-ai/poc/resources/notebooks/3d_files/engrenagens-5.snapshot.6/Engre_con_Z16_mod_1_5.FCStd" # ok
+# input_path = "/Users/tridoan/Spartan/Datum/service-ai/poc/resources/notebooks/3d_files/engrenagens-5.snapshot.6/Engre_reta_Z_15_mod_1.FCStd" # ok
+# input_path = "/content/TS6-THT_H-5.0.step"
+# print(".".join(input_path.split(".")[:-1]) + ".obj")
+# stdout, stderr = convert_step_to_obj_with_freecad(input_path, ".".join(input_path.split(".")[:-1]) + ".obj")
+# stderr
+# Dummy converter from STEP/3DXML to OBJ (replace with real converter)
 def convert_to_obj(file: str) -> str:
     if file is None:
         return None
     logger.info(f"Converting {file} to .obj")
+    response_path = file
     prefix_path, ext = os.path.splitext(file)
     ext = ext.lower()
     if ext in FREECAD_LOW_LEVEL_FORMAT + FREECAD_NATIVE_FORMAT:
             convert_step_to_obj_with_freecad(file, response_path)
         return response_path
     elif ext in GRADIO_3D_MODEL_DEFAULT_FORMAT:
+        return response_path
+    else:
+        logger.warning(f"Do nothing at convert_to_obj with file {file}")
+        raise Exception(f"Do nothing at convert_to_obj with file {file}")
+####################################################################################################################
+# Feature Extraction
+####################################################################################################################
+# We have 2 approaches to extract 3D's features:
+# - By algorithm which extract something like volume, surface
+# - By 3D deep learning model, which embed the 3D object into vector representing 3D's features
+def extract_geometric_features(obj_path: str):  # depricated
+    try:
+        mesh = trimesh.load(obj_path)
+        volume = mesh.volume  # type: ignore
+        surface_area = mesh.area  # type: ignore
+        print("volume", volume)
+        print("surface_area", surface_area)
+        # Add other features depending on your needs
+        features = np.array([volume, surface_area]).reshape(1, -1)
+        return features
+    except Exception as e:
+        print(f"Error reading file {obj_path}: {e}")
+        return None
+####################################################################################################################
+# Similarity Search
+####################################################################################################################
+def search_3D_similarity(filepath: str, embedding_dict: dict, top_k: int = 4):
+    if len(embedding_dict) < 5:
+        raise gr.Error("Require at least 5 3D files to search similarity")
+    if (
+        filepath not in embedding_dict
+        or "image_embedding" not in embedding_dict[filepath]
+    ):
+        raise ValueError(f"No embedding found for {filepath}")
+    features1 = np.array(embedding_dict[filepath]["image_embedding"]).reshape(1, -1)
+    # List to store (path, similarity)
+    valid_items = [
+        (fp, data["image_embedding"])
+        for fp, data in embedding_dict.items()
+        if "image_embedding" in data and fp != filepath
+    ]
+    filepaths = [fp for fp, _ in valid_items]
+    feature_matrix = np.array([feat for _, feat in valid_items])  # shape: (N, D)
+    similarities = cosine_similarity(features1, feature_matrix)[0]  # shape: (N,)
+    scores = list(zip(filepaths, similarities))
+    # Sort by similarity in descending order
+    scores.sort(key=lambda x: x[1], reverse=True)
+    if len(scores) < 4:
+        scores.append(("", 0.0))
+    # Return top_k results
+    return [x[0] for x in scores[:top_k]] + [
+        os.path.basename(x[0]) for x in scores[:top_k]
+    ]
+####################################################################################################################
+# Text-based Query
+####################################################################################################################
+def query_3D_object(query: str, embedding_dict: dict, top_k: int = 4):
+    if query == "":
+        raise gr.Error("Query cannot be empty!")
+    if len(embedding_dict) < 4:
+        raise gr.Error("Require at least 4 3D files to query by features")
+    features1 = np.array(text_embedding_model.get_text_embedding(text=query)).reshape(
+        1, -1
+    )
+    # List to store (path, similarity)
+    valid_items = [
+        (fp, data["text_embedding"])
+        for fp, data in embedding_dict.items()
+        if "text_embedding" in data
+    ]
+    filepaths = [fp for fp, _ in valid_items]
+    feature_matrix = np.array([feat for _, feat in valid_items])  # shape: (N, D)
+    similarities = cosine_similarity(features1, feature_matrix)[0]  # shape: (N,)
+    scores = list(zip(filepaths, similarities))
+    # Sort by similarity in descending order
+    scores.sort(key=lambda x: x[1], reverse=True)
+    if len(scores) < 4:
+        scores.append(("", 0.0))
+    # Return top_k results
+    return [x[0] for x in scores[:top_k]] + [
+        os.path.basename(x[0]) for x in scores[:top_k]
+    ]
+####################################################################################################################
+# Metadata Extraction
+####################################################################################################################
+import os
+import xml.etree.ElementTree as ET
+import zipfile
+def extract_header_from_3dxml(file_path):
+    header_info = {}
+    # Step 1: Unzip the .3DXML file
+    with zipfile.ZipFile(file_path, "r") as zip_ref:
+        zip_ref.extractall("tmp_3dxml_extract")
+    # Step 2: Find and parse the XML containing <Header>
+    for root, dirs, files in os.walk("tmp_3dxml_extract"):
+        for file in files:
+            if file.endswith((".3dxml", ".xml")):
+                xml_path = os.path.join(root, file)
+                try:
+                    tree = ET.parse(xml_path)
+                    root_el = tree.getroot()
+                    ns = {
+                        "ns": root_el.tag.split("}")[0].strip("{")
+                    }  # Extract namespace
+                    header = root_el.find("ns:Header", ns)
+                    if header is not None:
+                        for child in header:
+                            tag = child.tag.split("}")[-1]  # Remove namespace
+                            value = child.text.strip() if child.text else ""
+                            header_info[tag] = value
+                except Exception as e:
+                    print(f"Failed to parse {file}: {e}")
+    return header_info
+#######################################################################################################################
+import re
+def extract_step_metadata(file_path):
+    metadata = {}
+    try:
+        with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
+            content = f.read()
+            # Extract FILE_DESCRIPTION
+            desc_match = re.search(
+                r"FILE_DESCRIPTION\s*\(\s*\((.*?)\),\s*\'(.*?)\'\);", content, re.DOTALL
+            )
+            if desc_match:
+                metadata["Description"] = desc_match.group(1).replace("'", "")
+                metadata["Description_Level"] = desc_match.group(2)
+            # Extract FILE_NAME
+            name_match = re.search(
+                r"FILE_NAME\s*\(\s*'(.*?)',\s*'(.*?)',\s*\((.*?)\),\s*\((.*?)\),\s*'(.*?)',\s*'(.*?)',\s*'(.*?)'\s*\);",
+                content,
+                re.DOTALL,
+            )
+            if name_match:
+                metadata["FileName"] = name_match.group(1)
+                metadata["Created"] = name_match.group(2)
+                metadata["Authors"] = name_match.group(3).replace("'", "")
+                metadata["Organizations"] = name_match.group(4).replace("'", "")
+                metadata["Preprocessor"] = name_match.group(5)
+                metadata["OriginatingSystem"] = name_match.group(6)
+                metadata["Authorization"] = name_match.group(7)
+            # Extract FILE_SCHEMA
+            schema_match = re.search(
+                r"FILE_SCHEMA\s*\(\s*\((.*?)\)\s*\);", content, re.DOTALL
+            )
+            if schema_match:
+                metadata["Schema"] = schema_match.group(1).replace("'", "")
+    except Exception as e:
+        logger.error(f"Failed to read STEP file: {e}")
+    return metadata
+#######################################################################################################################
+def dict_to_markdown(metadata: dict) -> str:
+    return "\n".join(f"{key}: {value}" for key, value in metadata.items())
+#######################################################################################################################
+# Dummy parser - Replace with real parser
+def parse_3d_file(original_filepath: str):
+    if original_filepath is None:
+        return "No file"
+    if original_filepath.endswith((".3dxml", ".3DXML")):
+        meta = extract_header_from_3dxml(original_filepath)
+        text = dict_to_markdown(meta)
+        return f"Parsed metadata: {text}"
+    elif original_filepath.endswith((".step", ".STEP")):
+        meta = extract_step_metadata(original_filepath)
+        text = dict_to_markdown(meta)
+        return f"Parsed metadata: {text}"
+    logger.warning(f"No metadata found in the file {original_filepath}")
+    return "No metadata found!"
+def render_3D_metadata(
+    original_filepath: str, obj_path: str, embedding_dict: dict
+) -> Tuple[str, str]:
+    return parse_3d_file(original_filepath=original_filepath), embedding_dict.get(
+        obj_path, {}
+    ).get("description", "No description found!")
+#######################################################################################################################
+# https://github.com/yangyangyang127/PointCLIP_V2/blob/main/zeroshot_cls/trainers/zeroshot.py#L64
+#######################################################################################################################
+pc_views = Realistic_Projection()
 def render_depth_images_from_obj(obj_path: str, imsize: int = 512) -> List[np.ndarray]:
     mesh = trimesh.load_mesh(obj_path)
+    points: Tensor = torch.tensor(mesh.vertices).float()
+    if points.ndim == 2:
+        points = points.unsqueeze(0)  # (1, N, 3)
     images: Tensor = pc_views.get_img(points)
     images = torch.nn.functional.interpolate(
         images, size=(imsize, imsize), mode="bilinear", align_corners=True
     )
+    np_images: List[np.ndarray] = []
+    for tensor_image in images:
+        np_images.append(np.array(TF.to_pil_image(tensor_image.cpu())))
+    return np_images
 def aggregate_images(
     np_images: list[np.ndarray], n_rows: int = 2, n_cols: int = 5
 ) -> np.ndarray:
+    img_height, img_width = np_images[0].shape[:2]
+    aggregate_img = np.zeros(
+        (img_height * n_rows, img_width * n_cols, np_images[0].shape[2]),
+        dtype=np_images[0].dtype,
     )
     for i, img in enumerate(np_images):
+        row = i // n_cols
+        col = i % n_cols
+        aggregate_img[
+            row * img_height : (row + 1) * img_height,
+            col * img_width : (col + 1) * img_width,
+        ] = img
+    return aggregate_img
+llm_service = LLMService.from_partner()
+# llm_service.model_name = "o3-mini"
+DESCRIPTION_AGGREGATED_DEPTH_MAP_PROMPT = """You are a manufacturing expert analyzing 3D objects for production purposes. Given a set of multi-view depth maps of a single object, extract all possible special features relevant to manufacturing.
+Your output must follow the structured format provided below and be as complete and specific as possible, even if some features are inferred or uncertain.
+```
+🔎 Extracted Manufacturing Features from Depth Maps
+1. Geometric Features
+   Dimensions: <!-- List key dimensions such as height, width, depth, thickness, or aspect ratios. Use units if possible. Mention estimated ranges if exact values are unclear. -->
+   Notable Shapes: <!-- Describe the overall shape and form (e.g., cylindrical body with a tapered end, flat rectangular base, spherical top). Mention symmetry or irregularities. -->
+   Holes: <!-- Count and describe hole types (e.g., through-holes, blind holes), location if visible, and their arrangement or pattern (e.g., circular array, linear slot). -->
+   Surface Features: <!-- Include textures, fillets, chamfers, ribs, grooves, steps, and engravings. Identify raised or recessed areas that are not part of the base shape. -->
+   Other: <!-- Any other geometric characteristics not covered above (e.g., draft angles, deformation, cutouts). -->
+2. Material-Related Inferences
+   Likely Material: <!-- Infer from shape, thickness, or typical use cases (e.g., plastic, aluminum, cast iron). State if uncertain or not visible. -->
+   Surface Texture: <!-- Describe the expected finish (e.g., rough, matte, polished) based on depth gradients or edge sharpness. -->
+   Durability Hints: <!-- Mention any features that suggest mechanical strength or wear resistance (e.g., thick load-bearing sections, reinforcement patterns). -->
+3. Manufacturing-Related Features
+   Manufacturing Process: <!-- Suggest most likely processes (e.g., injection molding, CNC milling, casting) based on geometry and typical industry practices. -->
+   Draft Angles: <!-- Indicate presence and estimate angles if the object appears designed for mold release. -->
+   Undercuts: <!-- Identify any undercut areas that may require complex tooling or multi-part molds. -->
+   Mold Flow Considerations: <!-- Comment on how the material might flow during molding or casting, and whether the geometry supports or hinders it. -->
+4. Functional and Assembly Features
+   Mounting Points: <!-- Identify places where fasteners or brackets might attach (e.g., holes, bosses, flanges). -->
+   Jointing Features: <!-- Describe features used to join with other parts, such as snap fits, tabs, slots, dovetails, etc. -->
+   Alignment Aids: <!-- Note features like pins, grooves, or guide rails that help align components during assembly. -->
+   Modularity: <!-- Assess whether the object is likely part of a modular system based on interface shapes or repeated features. -->
+5. Inspection and Quality Features
+   Critical Dimensions: <!-- Highlight any dimensions likely to be functionally critical or require tight tolerance. -->
+   Surface Finish Zones: <!-- Point out areas that may require fine finishing or polishing for performance or cosmetic reasons. -->
+   Datums: <!-- Indicate flat surfaces or edges likely to serve as reference datums during measurement or machining. -->
+   Tolerances: <!-- Mention if any tolerances can be inferred, e.g., tight fits, loose clearances, or any standard class assumptions. -->
+```
+If any feature cannot be determined from the depth maps, state “Not visible” or “Cannot be inferred.”
+Use clear technical vocabulary appropriate for manufacturing and quality control."""
 async def generate_description_from_aggregated_depth_map(np_image: np.ndarray) -> str:
+    test_prompt = DESCRIPTION_AGGREGATED_DEPTH_MAP_PROMPT
+    base64_image = llm_service.encode_image(image=np_image)
+    return await llm_service.chat_with_image(prompt=test_prompt, image=base64_image)
+clip_embedding_model = ClipEmbedding(
+    embed_batch_size=1536,  # this parameter does not effect to the model
+)
+text_embedding_model = OpenAIEmbedding(
+    mode=OpenAIEmbeddingMode.TEXT_SEARCH_MODE,
+    model="text-embedding-3-small",
+    api_key=OPENAI_API_KEY,
+    dimensions=1536,
+    embed_batch_size=512,  # default == 100
+)
 async def aget_image_embedding_from_np_image(np_image: np.ndarray):
+    # Save np_image to a temporary file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+        temp_file_path = temp_file.name
+        # Convert np_image to PIL Image and save it
+        Image.fromarray(np_image).save(temp_file_path)
+    image_embedding = await clip_embedding_model.aget_image_embedding(temp_file_path)
+    # Delete the temporary file after processing
+    os.remove(temp_file_path)
     return image_embedding
 async def embedding_3d_object(obj_path: str) -> Dict[str, Any]:
+    # get 10 depth images
     depth_images = render_depth_images_from_obj(obj_path=obj_path)
+    # aggregate to single image
     aggregated_image = aggregate_images(depth_images)
+    # description
     description = await generate_description_from_aggregated_depth_map(
         np_image=aggregated_image
     )
+    # embedding aggregated_image: np.ndarray and description: str
     image_embedding = await aget_image_embedding_from_np_image(
         np_image=aggregated_image
     )
     }
+BASE_SAMPLE_DIR = "/Users/tridoan/Spartan/Datum/service-ai/poc/3D/gradio_cache/"
+sample_files = [
+    #  BASE_SAMPLE_DIR + "C5 Knuckle Object.obj",
+    #  BASE_SAMPLE_DIR + "NEMA 17 Stepper Motor 23mm-NEMA 17 Stepper Motor 23mm.obj",
+    #  BASE_SAMPLE_DIR + "TS6-THT_H-5.0.obj",
+    #  BASE_SAMPLE_DIR + "TS6-THT_H-11.0.obj"
+]
+#######################################################################################################################
+## Accumulating and Rendering 3D
+#######################################################################################################################
 async def accumulate_and_embedding(input_files, file_list, embedding_dict):
+    # accumulate
     if not isinstance(input_files, list):
         input_files = [input_files]
+    all_files = input_files
+    new_files = input_files[len(file_list) :]
+    # embedding
+    for file_path in new_files:
+        logger.info("Processing new upload file:", file_path)
+        obj_path = convert_to_obj(file_path)
+        embeddings = await embedding_3d_object(obj_path)
+        if obj_path not in embedding_dict:
+            embedding_dict[obj_path] = {}
+        embedding_dict[obj_path]["description"] = embeddings["description"]
+        embedding_dict[obj_path]["image_embedding"] = embeddings["image_embedding"]
+        embedding_dict[obj_path]["text_embedding"] = embeddings["text_embedding"]
+    return all_files, gr.update(choices=all_files), embedding_dict
+def select_file(filename, file_list):
+    for file in file_list:
+        if file.name == filename:
+            with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
+                content = f.read()
+            return f"Selected: {file.name}\n---\n{content[:300]}..."
+    return "File not found."
+def render_3D_object(filepath) -> Tuple[str, str]:
+    _, ext = os.path.splitext(filepath)
+    ext = ext.lower()
+    if ext in tuple(GRADIO_3D_MODEL_DEFAULT_FORMAT):
+        return filepath, filepath
+    if ext in tuple(
+        USER_REQUIRE_FORMAT + FREECAD_LOW_LEVEL_FORMAT + FREECAD_NATIVE_FORMAT
     ):
+        return convert_to_obj(filepath), filepath
+    return filepath, filepath
+#######################################################################################################################
+## Launching Gradio server
+#######################################################################################################################
+valid_file_types = list(
+    set(
+        GRADIO_3D_MODEL_DEFAULT_FORMAT
+        + USER_REQUIRE_FORMAT
+        + FREECAD_NATIVE_FORMAT
+        + FREECAD_LOW_LEVEL_FORMAT
     )
+)
+valid_file_types = valid_file_types + [t.upper() for t in valid_file_types]
+with gr.Blocks() as demo:
     with gr.Row():
+        file_state = gr.State(sample_files)
+        ###################################### !IMPORTANT #############################################################
+        embedding_store = gr.State({})  ####### !IMPORTANT. This is in memory vector database ##########################
+        file_input = gr.File(
+            file_count="multiple",
+            label="Upload files (You can append more)",
+            file_types=valid_file_types,
+        )
     with gr.Row():
         with gr.Column():
+            query_input = gr.Textbox(placeholder="Which 3D CAD contains 2 holes?")
+            query_button = gr.Button("Query Search")
             with gr.Row():
+                with gr.Row():
+                    model_q_1 = gr.Model3D(
+                        label="3D Top 1", interactive=False
+                    )  # debugging
+                    model_q_1_btn = gr.Button(value="3D Top 1", size="sm")
+                with gr.Row():
+                    model_q_2 = gr.Model3D(label="3D Top 2", interactive=False)
+                    model_q_2_btn = gr.Button(value="3D Top 2", size="sm")
             with gr.Row():
+                with gr.Row():
+                    model_q_3 = gr.Model3D(label="3D Top 3", interactive=False)
+                    model_q_3_btn = gr.Button(value="3D Top 3", size="sm")
+                with gr.Row():
+                    model_q_4 = gr.Model3D(label="3D Top 4", interactive=False)
+                    model_q_4_btn = gr.Button(value="3D Top 4", size="sm")
         with gr.Column():
+            model_render = gr.Model3D(label="3D", height=500, interactive=False)
+            model_hidden_filepath = gr.Textbox(visible=False)
+            description_render = gr.Textbox(label="Description", lines=6)
+            metadata_render = gr.Textbox(label="Metadata", lines=6)
+            sim_button = gr.Button("Similarity Search")
             with gr.Row():
+                with gr.Row():
+                    model_s_1 = gr.Model3D(label="3D Sim 1", interactive=False)
+                    model_s_1_btn = gr.Button(value="3D Sim 1", size="sm")
+                with gr.Row():
+                    model_s_2 = gr.Model3D(label="3D Sim 2", interactive=False)
+                    model_s_2_btn = gr.Button(value="3D Sim 2", size="sm")
             with gr.Row():
+                with gr.Row():
+                    model_s_3 = gr.Model3D(label="3D Sim 3", interactive=False)
+                    model_s_3_btn = gr.Button(value="3D Sim 3", size="sm")
+                with gr.Row():
+                    model_s_4 = gr.Model3D(label="3D Sim 4", interactive=False)
+                    model_s_4_btn = gr.Button(value="3D Sim 4", size="sm")
+        with gr.Column():
+            file_dropdown = gr.Dropdown(
+                label="Select a file to process", choices=sample_files, interactive=True
+            )
+    file_input.change(
         fn=accumulate_and_embedding,
         inputs=[file_input, file_state, embedding_store],
         outputs=[file_state, file_dropdown, embedding_store],
     )
+    # query button
+    query_button.click(
+        query_3D_object,
+        [query_input, embedding_store],
+        [
+            model_q_1,
+            model_q_2,
+            model_q_3,
+            model_q_4,
+            model_q_1_btn,
+            model_q_2_btn,
+            model_q_3_btn,
+            model_q_4_btn,
+        ],
     )
+    # model query
+    model_q_1_btn.click(
+        render_3D_object, model_q_1, [model_render, model_hidden_filepath]
+    )
+    model_q_2_btn.click(
+        render_3D_object, model_q_2, [model_render, model_hidden_filepath]
+    )
+    model_q_3_btn.click(
+        render_3D_object, model_q_3, [model_render, model_hidden_filepath]
+    )
+    model_q_4_btn.click(
+        render_3D_object, model_q_4, [model_render, model_hidden_filepath]
+    )
+    # sim button
     sim_button.click(
+        search_3D_similarity,
+        [model_render, embedding_store],
+        [
             model_s_1,
             model_s_2,
             model_s_3,
             model_s_4,
+            model_s_1_btn,
+            model_s_2_btn,
+            model_s_3_btn,
+            model_s_4_btn,
+        ],
     )
+    # model similarity
+    model_s_1_btn.click(
+        render_3D_object, model_s_1, [model_render, model_hidden_filepath]
+    )
+    model_s_2_btn.click(
+        render_3D_object, model_s_2, [model_render, model_hidden_filepath]
+    )
+    model_s_3_btn.click(
+        render_3D_object, model_s_3, [model_render, model_hidden_filepath]
+    )
+    model_s_4_btn.click(
+        render_3D_object, model_s_4, [model_render, model_hidden_filepath]
+    )
+    # drop down
+    file_dropdown.change(
+        render_3D_object, file_dropdown, [model_render, model_hidden_filepath]
+    )
+    # parse metadata
+    model_hidden_filepath.change(
+        render_3D_metadata,
+        [model_hidden_filepath, model_render, embedding_store],
+        [metadata_render, description_render],
     )
 if __name__ == "__main__":
+    demo.launch(share=True)

encode_image.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# %%writefile encode_image.py
+import base64
+from typing import Union
+import cv2
+import numpy as np
+from PIL import Image
+Image.MAX_IMAGE_PIXELS = None  # Removes the limit, use with caution
+def encode_image(image: Union[str, np.ndarray]) -> str:
+    """
+    Encodes an image as a base64 string.
+    Args:
+        image (Union[str, np.ndarray]): Path to the image file or a NumPy array representing the image.
+    Returns:
+        str: Base64-encoded image string.
+    """
+    if isinstance(image, str):  # If the input is a file path
+        with open(image, "rb") as image_file:
+            return base64.b64encode(image_file.read()).decode("utf-8")
+    elif isinstance(image, np.ndarray):  # If the input is a NumPy array
+        _, buffer = cv2.imencode(".jpg", image)  # Encode image as JPEG
+        return base64.b64encode(buffer).decode("utf-8")
+    else:
+        raise TypeError("Input must be a file path (str) or a NumPy array.")

llm_service.py ADDED Viewed

	@@ -0,0 +1,258 @@

+# %%writefile llm_service.py
+import asyncio
+import base64
+import io
+import os
+from enum import Enum
+from typing import List, Tuple, Union, cast
+import cv2
+import numpy as np
+from openai import AsyncOpenAI
+from PIL import Image
+from loguru import logger
+from encode_image import encode_image
+from string_utils import StringUtils
+Image.MAX_IMAGE_PIXELS = None  # Removes the limit, use with caution
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
+class OpenAIService:
+    def __init__(self):
+        # self.llm_settings = getattr(settings.llm, settings.llm.name)
+        self.model_name = "gpt-4o"  # settings.llm.openai.model
+        self.temperature = 0.3  # settings.llm.openai.temperature
+        self.client = AsyncOpenAI(api_key=OPENAI_API_KEY)
+        # Follow the documentation: https://platform.openai.com/docs/models
+        self.deprecated_temperature_models = [
+            "o4-mini",
+            "o4",
+            "o3-mini",
+            "o3",
+        ]  # settings.llm.openai.deprecated_temperature_models
+    @staticmethod
+    def encode_image(image: Union[str, np.ndarray]) -> str:
+        return encode_image(image=image)
+    def get_temperature(self, temperature: float | None) -> dict:
+        return (
+            {
+                "temperature": temperature
+                if temperature is not None
+                else self.temperature
+            }
+            if self.model_name not in self.deprecated_temperature_models
+            else {}
+        )
+    async def chat_with_text(
+        self,
+        prompt: str,
+        return_as_json: bool = False,
+        retry_left: int = 3,  # settings.llm.openai.retry_left,
+        temperature: float | None = None,
+    ) -> str:
+        """
+        Sends a text-based chat prompt to the OpenAI model.
+        Args:
+            prompt (str): User input text.
+            return_as_json (bool): whether to generate output as a json object
+            retry_left (int): number of retries left
+            temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
+        Returns:
+            str: Response from the model.
+        """
+        model_kwargs = {
+            "model": self.model_name,
+            "messages": [
+                {"role": "system", "content": "You are a helpful assistant."},
+                {"role": "user", "content": prompt},
+            ],
+            **self.get_temperature(temperature=temperature),
+        }
+        if return_as_json:
+            model_kwargs["response_format"] = {"type": "json_object"}
+        try:
+            response = await self.client.chat.completions.create(**model_kwargs)
+        except Exception as e:
+            if retry_left > 0:
+                logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
+                await asyncio.sleep(1)  # quota out
+                return await self.chat_with_text(
+                    prompt=prompt,
+                    return_as_json=return_as_json,
+                    retry_left=retry_left - 1,
+                    temperature=temperature,
+                )
+            else:
+                logger.error(
+                    f"OpenAI API calling failed due to {e}. Return empty string!"
+                )
+                return ""
+        return response.choices[0].message.content
+    async def chat_with_image(
+        self,
+        prompt: str,
+        image: str,
+        return_as_json: bool = False,
+        retry_left: int = 3,  # settings.llm.openai.retry_left,
+        temperature: float | None = None,
+    ) -> str:
+        """
+        Sends an image along with a text prompt to the OpenAI model.
+        Args:
+            prompt (str): User input text.
+            image_path (str): Path to the image file.
+            return_as_json (bool): whether to generate output as a json object
+            retry_left (int): number of retries left
+            temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
+        Returns:
+            str: Response from the model.
+        """
+        if os.path.isfile(image):
+            base64_image = self.encode_image(image=image)
+        elif StringUtils.is_base64(image):
+            base64_image = image
+        else:
+            raise Exception(
+                "ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()"
+            )
+        model_kwargs = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{base64_image}"
+                            },
+                        },
+                    ],
+                }
+            ],
+            **self.get_temperature(temperature=temperature),
+        }
+        if return_as_json:
+            model_kwargs["response_format"] = {"type": "json_object"}
+        try:
+            response = await self.client.chat.completions.create(**model_kwargs)
+        except Exception as e:
+            if retry_left > 0:
+                logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
+                await asyncio.sleep(1)  # quota out
+                return await self.chat_with_image(
+                    prompt=prompt,
+                    image=image,
+                    return_as_json=return_as_json,
+                    retry_left=retry_left - 1,
+                    temperature=temperature,
+                )
+            else:
+                logger.error(
+                    f"OpenAI API calling failed due to {e}. Return empty string!"
+                )
+                return ""
+        return response.choices[0].message.content
+    async def chat_with_multiple_images(
+        self,
+        prompt: str,
+        images: list[str],
+        return_as_json: bool = False,
+        retry_left: int = 3,  # settings.llm.openai.retry_left,
+        temperature: float | None = None,
+    ) -> str:
+        """
+        Sends multiple images along with a text prompt to the OpenAI model.
+        Args:
+            prompt (str): User input text.
+            images (list[str]): List of base64 encoded images.
+            return_as_json (bool): whether to generate output as a json object
+            retry_left (int): number of retries left
+            temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
+        Returns:
+            list[str]: Responses from the model for each image.
+        """
+        if len(images) == 0:
+            logger.warning("OpenAI chats with multiple images mode without any images")
+        base64_images = []
+        for image in images:
+            if os.path.isfile(image):
+                base64_images.append(self.encode_image(image=image))
+            elif StringUtils.is_base64(image):
+                base64_images.append(image)
+            else:
+                raise Exception(
+                    "ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()"
+                )
+        model_kwargs = {
+            "model": self.model_name,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": prompt},
+                        *[
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{base64_image}"
+                                },
+                            }
+                            for base64_image in base64_images
+                        ],
+                    ],
+                }
+            ],
+            **self.get_temperature(temperature=temperature),
+        }
+        if return_as_json:
+            model_kwargs["response_format"] = {"type": "json_object"}
+        try:
+            response = await self.client.chat.completions.create(**model_kwargs)
+        except Exception as e:
+            if retry_left > 0:
+                logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
+                await asyncio.sleep(1)  # quota out
+                return await self.chat_with_multiple_images(
+                    prompt=prompt,
+                    images=images,
+                    return_as_json=return_as_json,
+                    retry_left=retry_left - 1,
+                    temperature=temperature,
+                )
+            else:
+                logger.error(
+                    f"OpenAI API calling failed due to {e}. Return empty list!"
+                )
+                return ""
+        return response.choices[0].message.content
+class LLMService:
+    @classmethod
+    def from_partner(cls):
+        return OpenAIService()

mv_utils_zs.py ADDED Viewed

	@@ -0,0 +1,483 @@

+# %%writefile mv_utils_zs.py
+"""
+Author: yangyangyang127
+Github: https://github.com/yangyangyang127
+Repo: https://github.com/yangyangyang127/PointCLIP_V2
+Path: https://github.com/yangyangyang127/PointCLIP_V2/blob/main/zeroshot_cls/trainers/mv_utils_zs.py#L135
+"""
+import numpy as np
+import torch
+import torch.nn as nn
+from torch_scatter import scatter
+TRANS = -1.5
+# realistic projection parameters
+params = {
+    "maxpoolz": 1,
+    "maxpoolxy": 7,
+    "maxpoolpadz": 0,
+    "maxpoolpadxy": 2,
+    "convz": 1,
+    "convxy": 3,
+    "convsigmaxy": 3,
+    "convsigmaz": 1,
+    "convpadz": 0,
+    "convpadxy": 1,
+    "imgbias": 0.0,
+    "depth_bias": 0.2,
+    "obj_ratio": 0.8,
+    "bg_clr": 0.0,
+    "resolution": 122,
+    "depth": 8,  # default = 8
+    "grid_height": 64,
+    "grid_width": 64,
+}
+class Grid2Image(nn.Module):
+    """A pytorch implementation to turn 3D grid to 2D image.
+    Maxpool: densifying the grid
+    Convolution: smoothing via Gaussian
+    Maximize: squeezing the depth channel
+    """
+    def __init__(self):
+        super().__init__()
+        torch.backends.cudnn.benchmark = False
+        self.maxpool = nn.MaxPool3d(
+            (params["maxpoolz"], params["maxpoolxy"], params["maxpoolxy"]),
+            stride=1,
+            padding=(
+                params["maxpoolpadz"],
+                params["maxpoolpadxy"],
+                params["maxpoolpadxy"],
+            ),
+        )
+        self.conv = torch.nn.Conv3d(
+            1,
+            1,
+            kernel_size=(params["convz"], params["convxy"], params["convxy"]),
+            stride=1,
+            padding=(params["convpadz"], params["convpadxy"], params["convpadxy"]),
+            bias=True,
+        )
+        kn3d = get3DGaussianKernel(
+            params["convxy"],
+            params["convz"],
+            sigma=params["convsigmaxy"],
+            zsigma=params["convsigmaz"],
+        )
+        self.conv.weight.data = torch.Tensor(kn3d).repeat(1, 1, 1, 1, 1)
+        self.conv.bias.data.fill_(0)
+    def forward(self, x):
+        x = self.maxpool(x.unsqueeze(1))
+        x = self.conv(x)
+        img = torch.max(x, dim=2)[0]
+        img = img / torch.max(torch.max(img, dim=-1)[0], dim=-1)[0][:, :, None, None]
+        img = 1 - img
+        img = img.repeat(1, 3, 1, 1)
+        return img
+def euler2mat(angle):
+    """Convert euler angles to rotation matrix.
+     :param angle: [3] or [b, 3]
+     :return
+        rotmat: [3] or [b, 3, 3]
+    source
+    https://github.com/ClementPinard/SfmLearner-Pytorch/blob/master/inverse_warp.py
+    """
+    if len(angle.size()) == 1:
+        x, y, z = angle[0], angle[1], angle[2]
+        _dim = 0
+        _view = [3, 3]
+    elif len(angle.size()) == 2:
+        b, _ = angle.size()
+        x, y, z = angle[:, 0], angle[:, 1], angle[:, 2]
+        _dim = 1
+        _view = [b, 3, 3]
+    else:
+        assert False
+    cosz = torch.cos(z)
+    sinz = torch.sin(z)
+    # zero = torch.zeros([b], requires_grad=False, device=angle.device)[0]
+    # one = torch.ones([b], requires_grad=False, device=angle.device)[0]
+    zero = z.detach() * 0
+    one = zero.detach() + 1
+    zmat = torch.stack(
+        [cosz, -sinz, zero, sinz, cosz, zero, zero, zero, one], dim=_dim
+    ).reshape(_view)
+    cosy = torch.cos(y)
+    siny = torch.sin(y)
+    ymat = torch.stack(
+        [cosy, zero, siny, zero, one, zero, -siny, zero, cosy], dim=_dim
+    ).reshape(_view)
+    cosx = torch.cos(x)
+    sinx = torch.sin(x)
+    xmat = torch.stack(
+        [one, zero, zero, zero, cosx, -sinx, zero, sinx, cosx], dim=_dim
+    ).reshape(_view)
+    rot_mat = xmat @ ymat @ zmat
+    # print(rot_mat)
+    return rot_mat
+def points_to_2d_grid(
+    points, grid_h=params["grid_height"], grid_w=params["grid_width"]
+):
+    """
+    Chuyển đổi point cloud thành lưới 2D dựa trên tọa độ X, Y.
+    Các điểm được chiếu lên một mặt phẳng và được lượng tử hóa vào các ô lưới.
+    Args:
+        points (torch.tensor): Tensor chứa các điểm, kích thước [B, P, 3]
+                               (B: batch size, P: số lượng điểm, 3: tọa độ x, y, z)
+        grid_h (int): Chiều cao của lưới 2D đầu ra.
+        grid_w (int): Chiều rộng của lưới 2D đầu ra.
+    Returns:
+        grid (torch.tensor): Lưới 2D biểu diễn sự chiếm dụng của các điểm,
+                             kích thước [B, grid_h, grid_w].
+                             Giá trị 1.0 tại ô (y, x) nếu có ít nhất một điểm rơi vào đó,
+                             ngược lại là giá trị nền (params["bg_clr"]).
+    """
+    batch, pnum, _ = points.shape
+    device = points.device
+    # --- Bước 1: Chuẩn hóa tọa độ điểm ---
+    # Tìm min/max cho từng point cloud trong batch (chỉ xét X, Y để chuẩn hóa 2D tốt hơn)
+    pmax_xy = points[:, :, :2].max(dim=1)[0]
+    pmin_xy = points[:, :, :2].min(dim=1)[0]
+    # Tính tâm và phạm vi dựa trên X, Y
+    pcent_xy = (pmax_xy + pmin_xy) / 2
+    pcent_xy = pcent_xy[:, None, :]  # Thêm chiều P để broadcast [B, 1, 2]
+    # Sử dụng phạm vi lớn nhất giữa X và Y để giữ tỷ lệ aspect ratio
+    prange_xy = (pmax_xy - pmin_xy).max(dim=-1)[0][:, None, None]  # [B, 1, 1]
+    # Thêm một epsilon nhỏ để tránh chia cho 0 nếu tất cả các điểm trùng nhau
+    epsilon = 1e-8
+    # Chỉ chuẩn hóa X, Y vào khoảng [-1, 1] dựa trên phạm vi X, Y
+    # (points[:, :, :2] - pcent_xy) -> [B, P, 2]
+    # prange_xy -> [B, 1, 1]
+    points_normalized_xy = (points[:, :, :2] - pcent_xy) / (prange_xy + epsilon) * 2.0
+    # Điều chỉnh tỷ lệ theo obj_ratio (nếu cần)
+    points_normalized_xy = points_normalized_xy * params["obj_ratio"]
+    # --- Bước 2: Ánh xạ tọa độ chuẩn hóa vào chỉ số lưới 2D ---
+    # Ánh xạ X từ khoảng [-obj_ratio, obj_ratio] -> [0, grid_w]
+    # Ánh xạ Y từ khoảng [-obj_ratio, obj_ratio] -> [0, grid_h]
+    # Công thức chung: (normalized_coord + scale) / (2 * scale) * grid_dim
+    _x = (
+        (points_normalized_xy[:, :, 0] + params["obj_ratio"])
+        / (2 * params["obj_ratio"])
+        * grid_w
+    )
+    _y = (
+        (points_normalized_xy[:, :, 1] + params["obj_ratio"])
+        / (2 * params["obj_ratio"])
+        * grid_h
+    )
+    # Làm tròn xuống để xác định chỉ số ô lưới (index)
+    _x = torch.floor(_x).long()
+    _y = torch.floor(_y).long()
+    # --- Bước 3: Giới hạn chỉ số vào phạm vi hợp lệ của lưới ---
+    # Clip _x vào [0, grid_w - 1]
+    # Clip _y vào [0, grid_h - 1]
+    _x = torch.clip(_x, 0, grid_w - 1)
+    _y = torch.clip(_y, 0, grid_h - 1)
+    # --- Bước 4: Tạo lưới 2D và đánh dấu các ô bị chiếm dụng ---
+    # Khởi tạo lưới 2D với giá trị nền
+    grid = torch.full(
+        (batch, grid_h, grid_w), params["bg_clr"], dtype=torch.float32, device=device
+    )
+    # Tạo chỉ số batch tương ứng với mỗi điểm
+    batch_indices = torch.arange(batch, device=device).view(-1, 1).repeat(1, pnum)
+    # Flatten các chỉ số để dễ dàng gán giá trị
+    batch_idx_flat = batch_indices.view(-1)
+    y_idx_flat = _y.view(-1)
+    x_idx_flat = _x.view(-1)
+    # Gán giá trị 1.0 vào các ô lưới (y, x) tương ứng với vị trí các điểm
+    # Nếu nhiều điểm rơi vào cùng một ô, ô đó vẫn chỉ có giá trị 1.0
+    grid[batch_idx_flat, y_idx_flat, x_idx_flat] = 1.0
+    return grid
+def points2grid(points, resolution=params["resolution"], depth=params["depth"]):
+    """Quantize each point cloud to a 3D grid.
+    Args:
+        points (torch.tensor): of size [B, _, 3]
+    Returns:
+        grid (torch.tensor): of size [B * self.num_views, depth, resolution, resolution]
+    """
+    batch, pnum, _ = points.shape
+    pmax, pmin = points.max(dim=1)[0], points.min(dim=1)[0]
+    pcent = (pmax + pmin) / 2
+    pcent = pcent[:, None, :]
+    prange = (pmax - pmin).max(dim=-1)[0][:, None, None]
+    points = (points - pcent) / prange * 2.0
+    points[:, :, :2] = points[:, :, :2] * params["obj_ratio"]
+    depth_bias = params["depth_bias"]
+    _x = (points[:, :, 0] + 1) / 2 * resolution
+    _y = (points[:, :, 1] + 1) / 2 * resolution
+    _z = ((points[:, :, 2] + 1) / 2 + depth_bias) / (1 + depth_bias) * (depth - 2)
+    _x.ceil_()
+    _y.ceil_()
+    z_int = _z.ceil()
+    _x = torch.clip(_x, 1, resolution - 2)
+    _y = torch.clip(_y, 1, resolution - 2)
+    _z = torch.clip(_z, 1, depth - 2)
+    coordinates = z_int * resolution * resolution + _y * resolution + _x
+    grid = (
+        torch.ones([batch, depth, resolution, resolution], device=points.device).view(
+            batch, -1
+        )
+        * params["bg_clr"]
+    )
+    # # *** THAY ĐỔI CHÍNH Ở ĐÂY ***
+    # # Tạo tensor nguồn (src) chứa giá trị 1.0 cho mỗi điểm
+    # # Kích thước phải phù hợp với coordinates khi flatten: [B * pnum]
+    # values_to_scatter = torch.ones(batch * pnum, dtype=torch.float32, device=points.device)
+    # # Scatter giá trị 1.0 vào grid tại các vị trí `coordinates`
+    # # Sử dụng reduce="max". Nếu ô có ít nhất một điểm, max(1.0, bg_clr) sẽ là 1.0 (nếu bg_clr <= 1)
+    # # Nếu muốn chắc chắn là 1 bất kể bg_clr, có thể dùng reduce khác hoặc xử lý sau scatter.
+    # # Lựa chọn an toàn hơn nếu bg_clr có thể > 1 là khởi tạo grid bằng 0 và dùng reduce='max'/'mean'
+    # # Hoặc khởi tạo bằng bg_clr và xử lý sau scatter.
+    # # Giả định bg_clr = 0.0 là phổ biến nhất cho occupancy grid.
+    # grid = scatter(
+    #     values_to_scatter,
+    #     coordinates.view(-1).long(),  # Flatten coordinates thành [B*pnum]
+    #     dim=0,  # Scatter trên chiều 0 của grid đã flatten [B*D*R*R]
+    #     # Cần chỉ số batch tương ứng nếu grid chưa flatten theo batch
+    #     out=grid.view(-1),  # Flatten grid thành [B*D*R*R] để scatter trên dim 0
+    #     reduce="max",
+    # )  # Nếu có điểm -> giá trị ô là 1, nếu không là bg_clr
+    # # **********************************
+    grid = scatter(_z, coordinates.long(), dim=1, out=grid, reduce="max")
+    grid = grid.reshape((batch, depth, resolution, resolution)).permute((0, 1, 3, 2))
+    return grid
+# Giả sử bạn có thư viện scatter, ví dụ: from torch_scatter import scatter
+# Hoặc hàm scatter tương đương
+# import torch # Đảm bảo đã import torch
+# from torch_scatter import scatter # Ví dụ
+def points_to_occupancy_grid(
+    points, resolution=params["resolution"], depth=params["depth"]
+):
+    """Quantize each point cloud to a 3D occupancy grid."""
+    batch, pnum, _ = points.shape
+    device = points.device  # Lấy device để tạo tensor mới
+    # --- Phần chuẩn hóa và ánh xạ tọa độ giữ nguyên ---
+    pmax, pmin = points.max(dim=1)[0], points.min(dim=1)[0]
+    pcent = (pmax + pmin) / 2
+    pcent = pcent[:, None, :]
+    prange = (pmax - pmin).max(dim=-1)[0][
+        :, None, None
+    ] + 1e-8  # Thêm epsilon tránh chia 0
+    points_norm = (points - pcent) / prange * 2.0
+    points_norm[:, :, :2] = points_norm[:, :, :2] * params["obj_ratio"]
+    depth_bias = params["depth_bias"]
+    _x = (points_norm[:, :, 0] + 1) / 2 * resolution
+    _y = (points_norm[:, :, 1] + 1) / 2 * resolution
+    _z = ((points_norm[:, :, 2] + 1) / 2 + depth_bias) / (1 + depth_bias) * (depth - 2)
+    _x.ceil_()
+    _y.ceil_()
+    z_int = _z.ceil()
+    _x = torch.clip(_x, 1, resolution - 2)
+    _y = torch.clip(_y, 1, resolution - 2)
+    # z_int cũng nên được clip nếu dùng làm chỉ số tọa độ
+    z_int = torch.clip(z_int, 1, depth - 2)
+    # --- Tính toán flattened coordinates giữ nguyên ---
+    coordinates = z_int * resolution * resolution + _y * resolution + _x
+    coordinates = coordinates.long()  # Chuyển sang Long
+    # --- Tạo Grid và Scatter ---
+    # Khởi tạo grid với giá trị nền (ví dụ: 0)
+    # Sử dụng torch.zeros thay vì torch.ones và nhân bg_clr
+    bg_clr_value = params.get("bg_clr", 0.0)  # Lấy bg_clr, mặc định là 0
+    grid = torch.full(
+        (batch, depth * resolution * resolution),
+        bg_clr_value,
+        dtype=torch.float32,  # Hoặc dtype phù hợp
+        device=device,
+    )
+    # *** THAY ĐỔI CHÍNH Ở ĐÂY ***
+    # Tạo tensor nguồn (src) chứa giá trị 1.0 cho mỗi điểm
+    # Kích thước phải phù hợp với coordinates khi flatten: [B * pnum]
+    values_to_scatter = torch.ones(batch * pnum, dtype=torch.float32, device=device)
+    # Scatter giá trị 1.0 vào grid tại các vị trí `coordinates`
+    # Sử dụng reduce="max". Nếu ô có ít nhất một điểm, max(1.0, bg_clr) sẽ là 1.0 (nếu bg_clr <= 1)
+    # Nếu muốn chắc chắn là 1 bất kể bg_clr, có thể dùng reduce khác hoặc xử lý sau scatter.
+    # Lựa chọn an toàn hơn nếu bg_clr có thể > 1 là khởi tạo grid bằng 0 và dùng reduce='max'/'mean'
+    # Hoặc khởi tạo bằng bg_clr và xử lý sau scatter.
+    # Giả định bg_clr = 0.0 là phổ biến nhất cho occupancy grid.
+    if bg_clr_value != 0.0:
+        print(
+            "Warning: bg_clr is not 0.0, occupancy grid might not be strictly binary 0/1 with reduce='max'. Consider initializing grid with 0."
+        )
+    grid = scatter(
+        values_to_scatter,
+        coordinates.view(-1),  # Flatten coordinates thành [B*pnum]
+        dim=0,  # Scatter trên chiều 0 của grid đã flatten [B*D*R*R]
+        # Cần chỉ số batch tương ứng nếu grid chưa flatten theo batch
+        out=grid.view(-1),  # Flatten grid thành [B*D*R*R] để scatter trên dim 0
+        reduce="max",
+    )  # Nếu có điểm -> giá trị ô là 1, nếu không là bg_clr
+    # --- Reshape và Permute giữ nguyên ---
+    # Reshape lại grid về đúng kích thước 3D + batch
+    # Lưu ý: scatter vào grid đã flatten cần reshape cẩn thận
+    grid = grid.view(batch, depth, resolution, resolution)  # Reshape lại
+    grid = grid.permute((0, 1, 3, 2))
+    return grid
+class Realistic_Projection:
+    """For creating images from PC based on the view information."""
+    def __init__(self):
+        _views = np.asarray([
+            [[1 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[3 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[5 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[7 * np.pi / 4, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[0 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[1 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[2 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[3 * np.pi / 2, 0, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[0, -np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]],
+            [[0, np.pi / 2, np.pi / 2], [-0.5, -0.5, TRANS]],
+        ])
+        # adding some bias to the view angle to reveal more surface
+        _views_bias = np.asarray([
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 9, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 15, 0], [-0.5, 0, TRANS]],
+            [[0, np.pi / 15, 0], [-0.5, 0, TRANS]],
+        ])
+        self.num_views = _views.shape[0]
+        angle = torch.tensor(_views[:, 0, :]).float()  # .cuda()
+        self.rot_mat = euler2mat(angle).transpose(1, 2)
+        angle2 = torch.tensor(_views_bias[:, 0, :]).float()  # .cuda()
+        self.rot_mat2 = euler2mat(angle2).transpose(1, 2)
+        self.translation = torch.tensor(_views[:, 1, :]).float()  # .cuda()
+        self.translation = self.translation.unsqueeze(1)
+        self.grid2image = Grid2Image()  # .cuda()
+    def get_img(self, points):
+        b, _, _ = points.shape
+        v = self.translation.shape[0]
+        _points = self.point_transform(
+            points=torch.repeat_interleave(points, v, dim=0),
+            rot_mat=self.rot_mat.repeat(b, 1, 1),
+            rot_mat2=self.rot_mat2.repeat(b, 1, 1),
+            translation=self.translation.repeat(b, 1, 1),
+        )
+        grid = points2grid(
+            points=_points, resolution=params["resolution"], depth=params["depth"]
+        ).squeeze()
+        img = self.grid2image(grid)
+        return img
+    @staticmethod
+    def point_transform(points, rot_mat, rot_mat2, translation):
+        """
+        :param points: [batch, num_points, 3]
+        :param rot_mat: [batch, 3]
+        :param rot_mat2: [batch, 3]
+        :param translation: [batch, 1, 3]
+        :return:
+        """
+        rot_mat = rot_mat.to(points.device)
+        rot_mat2 = rot_mat2.to(points.device)
+        translation = translation.to(points.device)
+        points = torch.matmul(points, rot_mat)
+        points = torch.matmul(points, rot_mat2)
+        points = points - translation
+        return points
+def get2DGaussianKernel(ksize, sigma=0):
+    center = ksize // 2
+    xs = np.arange(ksize, dtype=np.float32) - center
+    kernel1d = np.exp(-(xs**2) / (2 * sigma**2))
+    kernel = kernel1d[..., None] @ kernel1d[None, ...]
+    kernel = torch.from_numpy(kernel)
+    kernel = kernel / kernel.sum()
+    return kernel
+# Without numpy
+# def get2DGaussianKernel(ksize, sigma):
+#     xs = torch.linspace(-(ksize // 2), ksize // 2, steps=ksize)
+#     kernel1d = torch.exp(-(xs ** 2) / (2 * sigma ** 2))
+#     kernel2d = torch.outer(kernel1d, kernel1d)
+#     kernel2d /= kernel2d.sum()
+#     return kernel2d
+def get3DGaussianKernel(ksize, depth, sigma=2, zsigma=2):
+    kernel2d = get2DGaussianKernel(ksize, sigma)
+    zs = np.arange(depth, dtype=np.float32) - depth // 2
+    zkernel = np.exp(-(zs**2) / (2 * zsigma**2))
+    kernel3d = np.repeat(kernel2d[None, :, :], depth, axis=0) * zkernel[:, None, None]
+    kernel3d = kernel3d / torch.sum(kernel3d)
+    return kernel3d

string_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# %%writefile string_utils.py
+import base64
+import random
+import re
+import string
+from urllib.parse import urlparse
+class StringUtils:
+    @staticmethod
+    def generate_random_string(length: int = 32) -> str:
+        characters = string.ascii_letters + string.digits
+        random_string = "".join(random.choice(characters) for _ in range(length))
+        return random_string
+    @staticmethod
+    def clean_string(input_string: str) -> str:
+        # Remove non-ASCII characters
+        cleaned_string = re.sub(r"[^\x00-\x7F]+", " ", input_string)
+        # Consolidate spaces and ensure correct spacing around punctuation
+        cleaned_string = re.sub(r"\s*([.,;!?%:])\s*", r"\1 ", cleaned_string)
+        # Adjust spacing for the dollar sign
+        cleaned_string = re.sub(r"\$\s+", "$", cleaned_string)
+        # Ensure correct spacing inside parentheses around numbers
+        cleaned_string = re.sub(r"\(\s*(\d+)\s*\)", r"( \1 )", cleaned_string)
+        # Remove extra spaces around punctuation (this might be redundant but ensures
+        # no trailing space before punctuation)
+        cleaned_string = re.sub(r"\s+([.,;!?%:])", r"\1", cleaned_string)
+        # Remove leading and trailing whitespace, reduce multiple spaces to a single
+        # space, and convert to lower case
+        cleaned_string = re.sub(r"\s+", " ", cleaned_string).strip().lower()
+        return cleaned_string
+    @staticmethod
+    def get_file_name_without_extension(file_name: str) -> str:
+        return ".".join(file_name.split(".")[:-1])
+    @staticmethod
+    def is_valid_url(url: str):
+        try:
+            result = urlparse(url)
+            return all([result.scheme, result.netloc])
+        except ValueError:
+            return False
+    @staticmethod
+    def is_base64(string: str) -> bool:
+        """
+        Validates if the input string is a Base64-encoded string.
+        Args:
+            string (str): The string to validate.
+        Returns:
+            bool: True if the string is Base64, False otherwise.
+        """
+        try:
+            # Check if the string can be decoded
+            base64_bytes = base64.b64decode(string, validate=True)
+            # Check if decoded bytes can be re-encoded to the original string
+            return base64.b64encode(base64_bytes).decode("utf-8") == string
+        except Exception:
+            return False