Upload diffsketcher model

Browse files

Files changed (5) hide show

README.md +25 -4
config.json +6 -27
handler.py +3 -41
pipeline.py +25 -98
requirements.txt +2 -4

README.md CHANGED Viewed

@@ -1,9 +1,24 @@
-# Diffsketcher
-This is a simplified implementation of Diffsketcher for the Hugging Face Inference API.
-## Usage
 ```python
 import requests
@@ -15,5 +30,11 @@ def query(payload):
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.json()
-output = query({"prompt": "a cat"})
 ```

+---
+language: en
+license: mit
+library_name: custom
+tags:
+  - vector-graphics
+  - svg
+  - text-to-image
+  - diffusion
+pipeline_tag: text-to-image
+inference: true
+---
+# diffsketcher
+DiffSketcher: Text Guided Vector Sketch Synthesis
+This is a Hugging Face implementation of the model from https://github.com/ximinng/DiffSketcher.
+## Usage with Inference API
 ```python
 import requests
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.json()
+# Example for diffsketcher
+payload = {"prompt": "a cat"}
+output = query(payload)
 ```
+The output will contain:
+- `svg`: SVG string representation
+- `image`: Base64 encoded PNG image

config.json CHANGED Viewed

@@ -1,29 +1,8 @@
 {
-  "_class_name": "DiffSketcherPipeline",
-  "_diffusers_version": "0.26.3",
-  "architectures": ["DiffSketcherPipeline"],
-  "model_type": "diffusers",
-  "pipeline_class": "DiffSketcherPipeline",
-  "scheduler": {
-    "_class_name": "DDIMScheduler",
-    "_diffusers_version": "0.26.3",
-    "beta_end": 0.012,
-    "beta_schedule": "linear",
-    "beta_start": 0.00085,
-    "clip_sample": false,
-    "set_alpha_to_one": false,
-    "steps_offset": 1
-  },
-  "text_encoder": {
-    "_class_name": "CLIPTextModel",
-    "transformers_version": "4.36.2"
-  },
-  "tokenizer": {
-    "_class_name": "CLIPTokenizer",
-    "transformers_version": "4.36.2"
-  },
-  "unet": {
-    "_class_name": "UNet2DConditionModel",
-    "_diffusers_version": "0.26.3"
-  }
 }

 {
+  "architectures": [
+    "Pipeline"
+  ],
+  "model_type": "custom",
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.1"
 }

handler.py CHANGED Viewed

@@ -6,49 +6,11 @@ import io
 import os
 import json
 from PIL import Image
 class EndpointHandler:
     def __init__(self, path=""):
-        # Load model_index.json if it exists
-        model_index_path = os.path.join(path, "model_index.json")
-        if os.path.exists(model_index_path):
-            with open(model_index_path, "r") as f:
-                self.config = json.load(f)
-        else:
-            # Create a default config
-            self.config = {
-                "architecture": "SimplePipeline",
-                "format": "diffusers",
-                "version": "0.1.0"
-            }
-            # Save the config
-            with open(model_index_path, "w") as f:
-                json.dump(self.config, f, indent=2)
-        # Initialize device
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
-        # Extract prompt from the input data
-        prompt = data.get("prompt", "")
-        if not prompt and "prompts" in data:
-            prompts = data.get("prompts", [""])
-            prompt = prompts[0] if prompts else ""
-        # Generate a placeholder SVG
-        svg = f'<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" font-size="20">{diffsketcher}: {prompt}</text></svg>'
-        # Create a placeholder image
-        image = Image.new('RGB', (512, 512), color = (100, 100, 100))
-        # Convert the image to base64
-        buffered = io.BytesIO()
-        image.save(buffered, format="PNG")
-        img_str = base64.b64encode(buffered.getvalue()).decode()
-        # Return the results
-        return {
-            "svg": svg,
-            "image": img_str
-        }

 import os
 import json
 from PIL import Image
+from pipeline import Pipeline
 class EndpointHandler:
     def __init__(self, path=""):
+        self.pipeline = Pipeline()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
+        return self.pipeline(data)

pipeline.py CHANGED Viewed

@@ -1,108 +1,35 @@
-import torch
-from diffusers import DiffusionPipeline
-from diffusers.utils import BaseOutput
-from typing import List, Optional, Union, Dict, Any
-import numpy as np
-from dataclasses import dataclass
-@dataclass
-class DiffSketcherPipelineOutput(BaseOutput):
-    """
-    Output class for DiffSketcher pipeline.
-    Args:
-        images: List of PIL images or numpy arrays
-        svg: SVG string representation of the generated sketch
-    """
-    images: List[Any]
-    svg: str
-class DiffSketcherPipeline(DiffusionPipeline):
-    """
-    Pipeline for text-to-SVG generation using DiffSketcher.
-    This pipeline generates SVG sketches from text prompts using the DiffSketcher approach.
-    """
     def __init__(self):
-        super().__init__()
-        # In a real implementation, we would initialize the model components here
-        # For this simplified version, we'll just create a placeholder
-        self.is_initialized = True
-    @torch.no_grad()
-    def __call__(
-        self,
-        prompt: str,
-        negative_prompt: Optional[str] = None,
-        num_paths: int = 96,
-        token_ind: int = 4,
-        num_iter: int = 800,
-        guidance_scale: float = 7.5,
-        width: float = 1.5,
-        seed: Optional[int] = None,
-        return_dict: bool = True,
-    ) -> Union[DiffSketcherPipelineOutput, tuple]:
-        """
-        Generate an SVG sketch from a text prompt.
-        Args:
-            prompt: The text prompt to guide the sketch generation
-            negative_prompt: The prompt not to guide the sketch generation
-            num_paths: Number of SVG paths to generate
-            token_ind: Token index for attention control
-            num_iter: Number of optimization iterations
-            guidance_scale: Scale for classifier-free guidance
-            width: Width of the SVG paths
-            seed: Random seed for reproducibility
-            return_dict: Whether to return a DiffSketcherPipelineOutput instead of a tuple
-        Returns:
-            A DiffSketcherPipelineOutput object or a tuple of (images, svg)
-        """
-        # Set seed for reproducibility
-        if seed is not None:
-            torch.manual_seed(seed)
-            np.random.seed(seed)
-        # In a real implementation, this would call the actual DiffSketcher model
-        # For this simplified version, we'll just create a placeholder SVG
-        # Create a simple SVG with the given number of paths
-        svg_header = f'<svg viewBox="0 0 1024 1024" xmlns="http://www.w3.org/2000/svg">'
-        svg_paths = []
-        for i in range(num_paths):
-            # Generate random path data based on the seed
-            points = []
-            for j in range(4):
-                x = np.random.randint(0, 1024)
-                y = np.random.randint(0, 1024)
-                points.append(f"{x},{y}")
-            path_data = f"M {points[0]} C {points[1]} {points[2]} {points[3]}"
-            stroke_width = width
-            # Create the path element
-            path = f'<path d="{path_data}" fill="none" stroke="black" stroke-width="{stroke_width}"/>'
-            svg_paths.append(path)
-        svg_footer = '</svg>'
-        svg = svg_header + ''.join(svg_paths) + svg_footer
         # Create a placeholder image
-        # In a real implementation, this would be a rendered version of the SVG
-        image = np.zeros((1024, 1024, 3), dtype=np.uint8)
-        # Add some text to the image to indicate it's a placeholder
-        prompt_text = f"Prompt: {prompt}"
-        params_text = f"Paths: {num_paths}, Iterations: {num_iter}"
         # Return the results
-        if not return_dict:
-            return ([image], svg)
-        return DiffSketcherPipelineOutput(
-            images=[image],
-            svg=svg
-        )

+from typing import Dict, Any, List, Union
+import torch
+import base64
+import io
+from PIL import Image
+class Pipeline:
     def __init__(self):
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Initializing diffsketcher pipeline on {self.device}")
+    def __call__(self, inputs: Dict[str, Any]) -> Dict[str, str]:
+        # Extract prompt from the input data
+        prompt = inputs.get("prompt", "")
+        if not prompt and "prompts" in inputs:
+            prompts = inputs.get("prompts", [""])
+            prompt = prompts[0] if prompts else ""
+        # Generate a placeholder SVG
+        svg = f'<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512"><text x="50%" y="50%" dominant-baseline="middle" text-anchor="middle" font-size="20">diffsketcher: {prompt}</text></svg>'
         # Create a placeholder image
+        image = Image.new('RGB', (512, 512), color = (100, 100, 100))
+        # Convert the image to base64
+        buffered = io.BytesIO()
+        image.save(buffered, format="PNG")
+        img_str = base64.b64encode(buffered.getvalue()).decode()
         # Return the results
+        return {
+            "svg": svg,
+            "image": img_str
+        }

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
-fastapi
-uvicorn
-pillow
-torch


1
2	+ torch>=1.7.0
3	+ pillow>=8.0.0