briaai
/

FIBO-VLM-prompt-to-JSON

custom_code

Model card Files Files and versions

xet

Community

kfirbria commited on Oct 28

Commit

fd176a7

verified ·

1 Parent(s): 7d3c3ee

Update fibo_vlm_prompt_to_json.py

Browse files

Files changed (1) hide show

fibo_vlm_prompt_to_json.py +43 -5

fibo_vlm_prompt_to_json.py CHANGED Viewed

@@ -4,6 +4,7 @@ import textwrap
 from typing import Any, Dict, Iterable, List, Optional
 import torch
 from boltons.iterutils import remap
 from PIL import Image
 from transformers import AutoModelForCausalLM, AutoProcessor, Qwen3VLForConditionalGeneration
@@ -11,6 +12,13 @@ from transformers import AutoModelForCausalLM, AutoProcessor, Qwen3VLForConditio
 from diffusers.modular_pipelines import ComponentSpec, InputParam, ModularPipelineBlocks, OutputParam, PipelineState
 def parse_aesthetic_score(record: dict) -> str:
     ae = record["aesthetic_score"]
     if ae < 5.5:
@@ -57,7 +65,24 @@ def prepare_clean_caption(record: dict) -> str:
         if "aesthetic_score" in record:
             scores["aesthetic_score"] = parse_aesthetic_score(record)
-        clean_caption_dict = remap(record, visit=keep)
         # Set aesthetics scores
         if "aesthetics" not in clean_caption_dict:
@@ -67,7 +92,7 @@ def prepare_clean_caption(record: dict) -> str:
             clean_caption_dict["aesthetics"].update(scores)
         # Dumps clean structured caption as minimal json string (i.e. no newlines\whitespaces seps)
-        clean_caption_str = json.dumps(clean_caption_dict)
         return clean_caption_str
     except Exception as ex:
         print("Error: ", ex)
@@ -221,6 +246,7 @@ def generate_json_prompt(
     prompt: Optional[str] = None,
     structured_prompt: Optional[str] = None,
 ):
     if image is None and structured_prompt is None:
         # only got prompt
         task = "generate"
@@ -233,6 +259,7 @@ def generate_json_prompt(
         # got image and prompt
         task = "refine"
         editing_instructions = prompt
     elif image is not None and structured_prompt is None and prompt is None:
         # only got image
         task = "inspire"
@@ -244,6 +271,7 @@ def generate_json_prompt(
         task,
         image=image,
         prompt=prompt,
         structured_prompt=structured_prompt,
         editing_instructions=editing_instructions,
     )
@@ -277,12 +305,22 @@ def build_messages(
         if refine_image is None:
             base_prompt = (structured_prompt or "").strip()
             edits = (editing_instructions or "").strip()
-            formatted = textwrap.dedent(f"""<refine> Input: {base_prompt} Editing instructions: {edits}""").strip()
             user_content.append({"type": "text", "text": formatted})
         else:
             user_content.append({"type": "image", "image": refine_image})
             edits = (editing_instructions or "").strip()
-            formatted = textwrap.dedent(f"""<refine> Editing instructions: {edits}""").strip()
             user_content.append({"type": "text", "text": formatted})
     messages: List[Dict[str, Any]] = []
@@ -293,7 +331,7 @@ def build_messages(
 class BriaFiboVLMPromptToJson(ModularPipelineBlocks):
     model_name = "BriaFibo"
-    def __init__(self, model_id = "briaai/vlm-processor-new"):
         super().__init__()
         self.engine = TransformersEngine(model_id)
         self.engine.model.to("cuda")

 from typing import Any, Dict, Iterable, List, Optional
 import torch
+import ujson
 from boltons.iterutils import remap
 from PIL import Image
 from transformers import AutoModelForCausalLM, AutoProcessor, Qwen3VLForConditionalGeneration
 from diffusers.modular_pipelines import ComponentSpec, InputParam, ModularPipelineBlocks, OutputParam, PipelineState
+def clean_json(caption):
+    caption["pickascore"] = 1.0
+    caption["aesthetic_score"] = 10.0
+    caption = prepare_clean_caption(caption)
+    return caption
 def parse_aesthetic_score(record: dict) -> str:
     ae = record["aesthetic_score"]
     if ae < 5.5:
         if "aesthetic_score" in record:
             scores["aesthetic_score"] = parse_aesthetic_score(record)
+        # Create structured caption dict of original values
+        fields = [
+            "short_description",
+            "objects",
+            "background_setting",
+            "lighting",
+            "aesthetics",
+            "photographic_characteristics",
+            "style_medium",
+            "text_render",
+            "context",
+            "artistic_style",
+        ]
+        original_caption_dict = {f: record[f] for f in fields if f in record}
+        # filter empty values recursivly (i.e. None, "", {}, [], float("nan"))
+        clean_caption_dict = remap(original_caption_dict, visit=keep)
         # Set aesthetics scores
         if "aesthetics" not in clean_caption_dict:
             clean_caption_dict["aesthetics"].update(scores)
         # Dumps clean structured caption as minimal json string (i.e. no newlines\whitespaces seps)
+        clean_caption_str = ujson.dumps(clean_caption_dict, escape_forward_slashes=False)
         return clean_caption_str
     except Exception as ex:
         print("Error: ", ex)
     prompt: Optional[str] = None,
     structured_prompt: Optional[str] = None,
 ):
+    refine_image = None
     if image is None and structured_prompt is None:
         # only got prompt
         task = "generate"
         # got image and prompt
         task = "refine"
         editing_instructions = prompt
+        refine_image = image
     elif image is not None and structured_prompt is None and prompt is None:
         # only got image
         task = "inspire"
         task,
         image=image,
         prompt=prompt,
+        refine_image=refine_image,
         structured_prompt=structured_prompt,
         editing_instructions=editing_instructions,
     )
         if refine_image is None:
             base_prompt = (structured_prompt or "").strip()
             edits = (editing_instructions or "").strip()
+            formatted = textwrap.dedent(
+                f"""<refine>
+Input:
+{base_prompt}
+Editing instructions:
+{edits}"""
+            ).strip()
             user_content.append({"type": "text", "text": formatted})
         else:
             user_content.append({"type": "image", "image": refine_image})
             edits = (editing_instructions or "").strip()
+            formatted = textwrap.dedent(
+                f"""<refine>
+Editing instructions:
+{edits}"""
+            ).strip()
             user_content.append({"type": "text", "text": formatted})
     messages: List[Dict[str, Any]] = []
 class BriaFiboVLMPromptToJson(ModularPipelineBlocks):
     model_name = "BriaFibo"
+    def __init__(self, model_id="briaai/vlm-processor-new"):
         super().__init__()
         self.engine = TransformersEngine(model_id)
         self.engine.model.to("cuda")