Spaces:
Running
on
Zero
Running
on
Zero
| """Z-Image-Turbo v2.3 - Multilingual Support""" | |
| import os | |
| import logging | |
| import torch | |
| import spaces | |
| import gradio as gr | |
| import requests | |
| import io | |
| import base64 | |
| import tempfile | |
| from typing import Tuple, Optional, Dict | |
| from PIL import Image | |
| from diffusers import DiffusionPipeline, ZImageImg2ImgPipeline | |
| from openai import OpenAI | |
| # Configure logging (replaces debug print statements) | |
| logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s') | |
| logger = logging.getLogger(__name__) | |
| # ============================================================================= | |
| # MULTILINGUAL SUPPORT | |
| # ============================================================================= | |
| LANGUAGES = ["English", "Español", "Português (BR)", "العربية", "हिंदी"] | |
| TRANSLATIONS: Dict[str, Dict[str, str]] = { | |
| "English": { | |
| # Header | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "AI Image Generation & Transformation powered by DeepSeek Reasoning", | |
| "like_msg": "If you liked it, please ❤️ like it. Thank you!", | |
| # Tabs | |
| "tab_generate": "Generate", | |
| "tab_assistant": "AI Assistant", | |
| "tab_transform": "Transform", | |
| # Generate tab | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Describe your image in detail...", | |
| "polish_checkbox": "Prompt+ by deepseek-reasoner", | |
| "style": "Style", | |
| "aspect_ratio": "Aspect Ratio", | |
| "advanced_settings": "Advanced Settings", | |
| "steps": "Steps", | |
| "seed": "Seed", | |
| "random_seed": "Random Seed", | |
| "generate_btn": "Generate", | |
| "generated_image": "Generated Image", | |
| "enhanced_prompt": "Enhanced Prompt", | |
| "seed_used": "Seed Used", | |
| "share": "Share", | |
| # AI Assistant tab | |
| "ai_description": "**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.", | |
| "upload_image": "Upload Image", | |
| "analyze_btn": "Analyze Image", | |
| "image_description": "Image Description", | |
| "changes_request": "What changes do you want?", | |
| "changes_placeholder": "e.g., 'watercolor style' or 'dramatic sunset lighting'", | |
| "target_style": "Target Style", | |
| "generate_prompt_btn": "Generate Prompt", | |
| "generated_prompt": "Generated Prompt", | |
| "send_to_transform": "Send to Transform Tab", | |
| "how_to_use": "How to Use", | |
| "how_to_use_content": """1. **Upload** an image and click "Analyze Image" | |
| 2. **Describe** the changes you want | |
| 3. **Generate** an optimized prompt | |
| 4. **Send** to Transform tab to apply changes""", | |
| # Transform tab | |
| "transform_description": "**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.", | |
| "transformation_prompt": "Transformation Prompt", | |
| "transform_placeholder": "e.g., 'oil painting style, vibrant colors'", | |
| "strength": "Strength", | |
| "transform_btn": "Transform", | |
| "transformed_image": "Transformed Image", | |
| "example_prompts": "Example Prompts", | |
| # Footer | |
| "models": "Models", | |
| "by": "by", | |
| }, | |
| "Español": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "Generación y Transformación de Imágenes con IA impulsado por DeepSeek Reasoning", | |
| "like_msg": "Si te gustó, por favor dale me gusta. ¡Gracias!", | |
| "tab_generate": "Generar", | |
| "tab_assistant": "Asistente IA", | |
| "tab_transform": "Transformar", | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Describe tu imagen en detalle...", | |
| "polish_checkbox": "Prompt+ por deepseek-reasoner", | |
| "style": "Estilo", | |
| "aspect_ratio": "Relación de Aspecto", | |
| "advanced_settings": "Configuración Avanzada", | |
| "steps": "Pasos", | |
| "seed": "Semilla", | |
| "random_seed": "Semilla Aleatoria", | |
| "generate_btn": "Generar", | |
| "generated_image": "Imagen Generada", | |
| "enhanced_prompt": "Prompt Mejorado", | |
| "seed_used": "Semilla Usada", | |
| "share": "Compartir", | |
| "ai_description": "**Generador de Prompts con IA** - Sube una imagen, analízala con GLM-4.6V, y genera prompts optimizados.", | |
| "upload_image": "Subir Imagen", | |
| "analyze_btn": "Analizar Imagen", | |
| "image_description": "Descripción de la Imagen", | |
| "changes_request": "¿Qué cambios quieres?", | |
| "changes_placeholder": "ej., 'estilo acuarela' o 'iluminación de atardecer dramático'", | |
| "target_style": "Estilo Objetivo", | |
| "generate_prompt_btn": "Generar Prompt", | |
| "generated_prompt": "Prompt Generado", | |
| "send_to_transform": "Enviar a Transformar", | |
| "how_to_use": "Cómo Usar", | |
| "how_to_use_content": """1. **Sube** una imagen y haz clic en "Analizar Imagen" | |
| 2. **Describe** los cambios que quieres | |
| 3. **Genera** un prompt optimizado | |
| 4. **Envía** a la pestaña Transformar para aplicar cambios""", | |
| "transform_description": "**Transforma tu imagen** - Sube y describe la transformación. Menor fuerza = sutil, mayor = dramático.", | |
| "transformation_prompt": "Prompt de Transformación", | |
| "transform_placeholder": "ej., 'estilo pintura al óleo, colores vibrantes'", | |
| "strength": "Fuerza", | |
| "transform_btn": "Transformar", | |
| "transformed_image": "Imagen Transformada", | |
| "example_prompts": "Prompts de Ejemplo", | |
| "models": "Modelos", | |
| "by": "por", | |
| }, | |
| "Português (BR)": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "Geração e Transformação de Imagens com IA alimentado por DeepSeek Reasoning", | |
| "like_msg": "Se você gostou, por favor curta. Obrigado!", | |
| "tab_generate": "Gerar", | |
| "tab_assistant": "Assistente IA", | |
| "tab_transform": "Transformar", | |
| "prompt": "Prompt", | |
| "prompt_placeholder": "Descreva sua imagem em detalhes...", | |
| "polish_checkbox": "Prompt+ por deepseek-reasoner", | |
| "style": "Estilo", | |
| "aspect_ratio": "Proporção", | |
| "advanced_settings": "Configurações Avançadas", | |
| "steps": "Passos", | |
| "seed": "Semente", | |
| "random_seed": "Semente Aleatória", | |
| "generate_btn": "Gerar", | |
| "generated_image": "Imagem Gerada", | |
| "enhanced_prompt": "Prompt Aprimorado", | |
| "seed_used": "Semente Usada", | |
| "share": "Compartilhar", | |
| "ai_description": "**Gerador de Prompts com IA** - Envie uma imagem, analise com GLM-4.6V, e gere prompts otimizados.", | |
| "upload_image": "Enviar Imagem", | |
| "analyze_btn": "Analisar Imagem", | |
| "image_description": "Descrição da Imagem", | |
| "changes_request": "Quais mudanças você quer?", | |
| "changes_placeholder": "ex., 'estilo aquarela' ou 'iluminação dramática de pôr do sol'", | |
| "target_style": "Estilo Alvo", | |
| "generate_prompt_btn": "Gerar Prompt", | |
| "generated_prompt": "Prompt Gerado", | |
| "send_to_transform": "Enviar para Transformar", | |
| "how_to_use": "Como Usar", | |
| "how_to_use_content": """1. **Envie** uma imagem e clique em "Analisar Imagem" | |
| 2. **Descreva** as mudanças que você quer | |
| 3. **Gere** um prompt otimizado | |
| 4. **Envie** para a aba Transformar para aplicar mudanças""", | |
| "transform_description": "**Transforme sua imagem** - Envie e descreva a transformação. Menor força = sutil, maior = dramático.", | |
| "transformation_prompt": "Prompt de Transformação", | |
| "transform_placeholder": "ex., 'estilo pintura a óleo, cores vibrantes'", | |
| "strength": "Força", | |
| "transform_btn": "Transformar", | |
| "transformed_image": "Imagem Transformada", | |
| "example_prompts": "Prompts de Exemplo", | |
| "models": "Modelos", | |
| "by": "por", | |
| }, | |
| "العربية": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "توليد وتحويل الصور بالذكاء الاصطناعي مدعوم من DeepSeek Reasoning", | |
| "like_msg": "إذا أعجبك، يرجى الإعجاب. شكراً لك!", | |
| "tab_generate": "توليد", | |
| "tab_assistant": "مساعد الذكاء الاصطناعي", | |
| "tab_transform": "تحويل", | |
| "prompt": "الوصف", | |
| "prompt_placeholder": "صف صورتك بالتفصيل...", | |
| "polish_checkbox": "تحسين+ بواسطة deepseek-reasoner", | |
| "style": "النمط", | |
| "aspect_ratio": "نسبة العرض", | |
| "advanced_settings": "إعدادات متقدمة", | |
| "steps": "الخطوات", | |
| "seed": "البذرة", | |
| "random_seed": "بذرة عشوائية", | |
| "generate_btn": "توليد", | |
| "generated_image": "الصورة المولدة", | |
| "enhanced_prompt": "الوصف المحسن", | |
| "seed_used": "البذرة المستخدمة", | |
| "share": "مشاركة", | |
| "ai_description": "**مولد الأوصاف بالذكاء الاصطناعي** - ارفع صورة، حللها باستخدام GLM-4.6V، ثم أنشئ أوصافاً محسنة.", | |
| "upload_image": "رفع صورة", | |
| "analyze_btn": "تحليل الصورة", | |
| "image_description": "وصف الصورة", | |
| "changes_request": "ما التغييرات التي تريدها؟", | |
| "changes_placeholder": "مثال: 'نمط ألوان مائية' أو 'إضاءة غروب درامية'", | |
| "target_style": "النمط المستهدف", | |
| "generate_prompt_btn": "توليد الوصف", | |
| "generated_prompt": "الوصف المولد", | |
| "send_to_transform": "إرسال إلى التحويل", | |
| "how_to_use": "كيفية الاستخدام", | |
| "how_to_use_content": """1. **ارفع** صورة وانقر على "تحليل الصورة" | |
| 2. **صف** التغييرات التي تريدها | |
| 3. **أنشئ** وصفاً محسناً | |
| 4. **أرسل** إلى تبويب التحويل لتطبيق التغييرات""", | |
| "transform_description": "**حوّل صورتك** - ارفع وصف التحويل. قوة أقل = تغيير طفيف، قوة أكبر = تغيير جذري.", | |
| "transformation_prompt": "وصف التحويل", | |
| "transform_placeholder": "مثال: 'نمط لوحة زيتية، ألوان نابضة'", | |
| "strength": "القوة", | |
| "transform_btn": "تحويل", | |
| "transformed_image": "الصورة المحولة", | |
| "example_prompts": "أمثلة الأوصاف", | |
| "models": "النماذج", | |
| "by": "بواسطة", | |
| }, | |
| "हिंदी": { | |
| "title": "Z Image Turbo + GLM-4.6V", | |
| "subtitle": "DeepSeek Reasoning द्वारा संचालित AI छवि निर्माण और रूपांतरण", | |
| "like_msg": "अगर आपको पसंद आया, तो कृपया लाइक करें। धन्यवाद!", | |
| "tab_generate": "बनाएं", | |
| "tab_assistant": "AI सहायक", | |
| "tab_transform": "रूपांतरित करें", | |
| "prompt": "प्रॉम्प्ट", | |
| "prompt_placeholder": "अपनी छवि का विस्तार से वर्णन करें...", | |
| "polish_checkbox": "Prompt+ by deepseek-reasoner", | |
| "style": "शैली", | |
| "aspect_ratio": "पक्षानुपात", | |
| "advanced_settings": "उन्नत सेटिंग्स", | |
| "steps": "चरण", | |
| "seed": "बीज", | |
| "random_seed": "यादृच्छिक बीज", | |
| "generate_btn": "बनाएं", | |
| "generated_image": "बनाई गई छवि", | |
| "enhanced_prompt": "उन्नत प्रॉम्प्ट", | |
| "seed_used": "प्रयुक्त बीज", | |
| "share": "साझा करें", | |
| "ai_description": "**AI-संचालित प्रॉम्प्ट जनरेटर** - एक छवि अपलोड करें, GLM-4.6V से विश्लेषण करें, फिर अनुकूलित प्रॉम्प्ट बनाएं।", | |
| "upload_image": "छवि अपलोड करें", | |
| "analyze_btn": "छवि विश्लेषण करें", | |
| "image_description": "छवि विवरण", | |
| "changes_request": "आप क्या बदलाव चाहते हैं?", | |
| "changes_placeholder": "उदा., 'वॉटरकलर शैली' या 'नाटकीय सूर्यास्त प्रकाश'", | |
| "target_style": "लक्ष्य शैली", | |
| "generate_prompt_btn": "प्रॉम्प्ट बनाएं", | |
| "generated_prompt": "बनाया गया प्रॉम्प्ट", | |
| "send_to_transform": "रूपांतरण टैब पर भेजें", | |
| "how_to_use": "कैसे उपयोग करें", | |
| "how_to_use_content": """1. **अपलोड** करें एक छवि और "छवि विश्लेषण करें" पर क्लिक करें | |
| 2. **वर्णन** करें जो बदलाव आप चाहते हैं | |
| 3. **बनाएं** एक अनुकूलित प्रॉम्प्ट | |
| 4. **भेजें** रूपांतरण टैब पर बदलाव लागू करने के लिए""", | |
| "transform_description": "**अपनी छवि रूपांतरित करें** - अपलोड करें और रूपांतरण का वर्णन करें। कम शक्ति = सूक्ष्म, अधिक = नाटकीय।", | |
| "transformation_prompt": "रूपांतरण प्रॉम्प्ट", | |
| "transform_placeholder": "उदा., 'तेल चित्रकला शैली, जीवंत रंग'", | |
| "strength": "शक्ति", | |
| "transform_btn": "रूपांतरित करें", | |
| "transformed_image": "रूपांतरित छवि", | |
| "example_prompts": "उदाहरण प्रॉम्प्ट", | |
| "models": "मॉडल", | |
| "by": "द्वारा", | |
| }, | |
| } | |
| def get_text(lang: str, key: str) -> str: | |
| """Get translated text for a key.""" | |
| return TRANSLATIONS.get(lang, TRANSLATIONS["English"]).get(key, key) | |
| def change_language(lang_name: str): | |
| """Update all component labels when language changes.""" | |
| t = TRANSLATIONS.get(lang_name, TRANSLATIONS["English"]) | |
| return [ | |
| # Generate tab | |
| gr.update(label=t["prompt"], placeholder=t["prompt_placeholder"]), | |
| gr.update(label=t["polish_checkbox"]), | |
| gr.update(label=t["style"]), | |
| gr.update(label=t["aspect_ratio"]), | |
| gr.update(label=t["steps"]), | |
| gr.update(label=t["seed"]), | |
| gr.update(label=t["random_seed"]), | |
| gr.update(value=t["generate_btn"]), | |
| gr.update(label=t["generated_image"]), | |
| gr.update(label=t["enhanced_prompt"]), | |
| gr.update(label=t["seed_used"]), | |
| gr.update(value=t["share"]), | |
| # AI Assistant tab | |
| gr.update(value=t["ai_description"]), | |
| gr.update(label=t["upload_image"]), | |
| gr.update(value=t["analyze_btn"]), | |
| gr.update(label=t["image_description"]), | |
| gr.update(label=t["changes_request"], placeholder=t["changes_placeholder"]), | |
| gr.update(label=t["target_style"]), | |
| gr.update(value=t["generate_prompt_btn"]), | |
| gr.update(label=t["generated_prompt"]), | |
| gr.update(value=t["send_to_transform"]), | |
| gr.update(value=t["how_to_use_content"]), | |
| # Transform tab | |
| gr.update(value=t["transform_description"]), | |
| gr.update(label=t["upload_image"]), | |
| gr.update(label=t["transformation_prompt"], placeholder=t["transform_placeholder"]), | |
| gr.update(label=t["polish_checkbox"]), | |
| gr.update(label=t["style"]), | |
| gr.update(label=t["strength"]), | |
| gr.update(label=t["steps"]), | |
| gr.update(label=t["seed"]), | |
| gr.update(label=t["random_seed"]), | |
| gr.update(value=t["transform_btn"]), | |
| gr.update(label=t["transformed_image"]), | |
| gr.update(label=t["enhanced_prompt"]), | |
| gr.update(label=t["seed_used"]), | |
| gr.update(value=t["share"]), | |
| ] | |
| # ============================================================================= | |
| # Constants (replaces magic numbers) | |
| MIN_IMAGE_DIM = 512 | |
| MAX_IMAGE_DIM = 2048 | |
| IMAGE_ALIGNMENT = 16 | |
| API_TIMEOUT = 90.0 | |
| API_MAX_RETRIES = 2 | |
| MAX_DESCRIPTION_LENGTH = 1200 # For GLM prompt generation | |
| # Enable optimized backends (SDPA uses FlashAttention when available) | |
| torch.backends.cuda.enable_flash_sdp(True) | |
| torch.backends.cuda.enable_mem_efficient_sdp(True) | |
| torch.backends.cudnn.benchmark = True | |
| # Enable TF32 for better performance on Ampere+ GPUs | |
| torch.backends.cuda.matmul.allow_tf32 = True | |
| torch.backends.cudnn.allow_tf32 = True | |
| # Singleton clients with timeout and retry | |
| _deepseek_client: Optional[OpenAI] = None | |
| _glm_client: Optional[OpenAI] = None | |
| def get_deepseek_client() -> Optional[OpenAI]: | |
| """Get DeepSeek API client (singleton with timeout).""" | |
| global _deepseek_client | |
| if _deepseek_client is None: | |
| api_key = os.environ.get("DEEPSEEK_API_KEY") | |
| if not api_key: | |
| logger.warning("DEEPSEEK_API_KEY not configured") | |
| return None | |
| _deepseek_client = OpenAI( | |
| base_url="https://api.deepseek.com", | |
| api_key=api_key, | |
| timeout=API_TIMEOUT, | |
| max_retries=API_MAX_RETRIES, | |
| ) | |
| return _deepseek_client | |
| def polish_prompt(original_prompt: str, mode: str = "generate") -> str: | |
| """Expand short prompts into detailed, high-quality prompts using deepseek-reasoner.""" | |
| logger.info(f"polish_prompt called: mode={mode}, prompt_len={len(original_prompt) if original_prompt else 0}") | |
| if not original_prompt or not original_prompt.strip(): | |
| logger.info("polish_prompt: empty input, using default") | |
| if mode == "transform": | |
| return "high quality, enhanced details, professional finish" | |
| return "Ultra HD, 4K, cinematic composition, highly detailed" | |
| client = get_deepseek_client() | |
| if not client: | |
| logger.warning("polish_prompt: DeepSeek client not available, returning original") | |
| return original_prompt | |
| if mode == "transform": | |
| system_prompt = """ROLE: Expert prompt engineer for AI image-to-image transformation. | |
| TASK: Rewrite the user's input into a precise, technical prompt describing the target visual result. | |
| STRICT RULES: | |
| - MAXIMUM 600 TOKENS (strict limit). You MUST write the new prompt in maximum of 600 tokens. | |
| - Focus on: artistic style, color palette, lighting, texture, rendering technique, mood | |
| - Describe HOW the image should look, not what to change | |
| - No action words like "transform", "convert", "change" | |
| - Present tense, as if describing the final image | |
| You MUST respect the maximum of 600 TOKENS in your response. | |
| OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count.""" | |
| else: | |
| system_prompt = """ROLE: Expert prompt engineer for AI image generation. | |
| TASK: Expand the user's input into a detailed, expressive prompt for stunning image generation. | |
| STRICT RULES: | |
| - MAXIMUM 600 TOKENS (strict limit). You MUST write the new prompt in maximum of 600 tokens. | |
| - Be descriptive about: subject, lighting, atmosphere, style, composition, details | |
| - Use vivid, specific language | |
| - Include artistic style references when appropriate | |
| You MUST respect the maximum of 600 TOKENS in your response. | |
| OUTPUT FORMAT: Only the final prompt text. No thinking, no explanation, no preamble, no word count.""" | |
| try: | |
| response = client.chat.completions.create( | |
| model="deepseek-reasoner", | |
| max_tokens=600, | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": original_prompt} | |
| ], | |
| ) | |
| msg = response.choices[0].message | |
| content = msg.content if msg.content else "" | |
| logger.info(f"polish_prompt API response: content_len={len(content)}, has_reasoning={hasattr(msg, 'reasoning_content') and bool(msg.reasoning_content)}") | |
| # If content is empty, try to extract final answer from reasoning_content | |
| if not content and hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| text = msg.reasoning_content.strip() | |
| paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()] | |
| if paragraphs: | |
| content = paragraphs[-1] | |
| logger.info(f"polish_prompt: extracted from reasoning_content, len={len(content)}") | |
| if content: | |
| content = content.strip().replace("\n", " ") | |
| if "<think>" in content: | |
| content = content.split("</think>")[-1].strip() | |
| if content.startswith('"') and content.endswith('"'): | |
| content = content[1:-1] | |
| max_words = 600 # 600 tokens limit for all modes | |
| words = content.split() | |
| if len(words) > max_words: | |
| content = " ".join(words[:max_words]) | |
| logger.info(f"polish_prompt SUCCESS: enhanced from {len(original_prompt)} to {len(content)} chars") | |
| return content | |
| logger.warning(f"polish_prompt: no content extracted, returning original prompt") | |
| return original_prompt | |
| except Exception as e: | |
| logger.error(f"polish_prompt FAILED: {type(e).__name__}: {str(e)}") | |
| return original_prompt | |
| # GLM-4V Vision AI functions (runs on CPU - API calls) | |
| def get_glm_client() -> Optional[OpenAI]: | |
| """Get GLM API client (singleton with timeout).""" | |
| global _glm_client | |
| if _glm_client is None: | |
| api_key = os.environ.get("GLM_API_KEY") | |
| if not api_key: | |
| return None | |
| _glm_client = OpenAI( | |
| base_url="https://api.z.ai/api/paas/v4", | |
| api_key=api_key, | |
| timeout=API_TIMEOUT, | |
| max_retries=API_MAX_RETRIES, | |
| ) | |
| return _glm_client | |
| def encode_image_base64(image: Optional[Image.Image]) -> Optional[str]: | |
| """Convert PIL image to base64 with proper memory cleanup.""" | |
| if image is None: | |
| return None | |
| buf = io.BytesIO() | |
| try: | |
| image.save(buf, format='JPEG', quality=90) # JPEG is faster for API calls | |
| buf.seek(0) | |
| return base64.b64encode(buf.getvalue()).decode('utf-8') | |
| finally: | |
| buf.close() | |
| def clean_glm_response(text: str) -> str: | |
| """Remove GLM special tokens and clean up text.""" | |
| if not text: | |
| return "" | |
| text = text.replace('<|begin_of_box|>', '').replace('<|end_of_box|>', '') | |
| text = text.strip() | |
| return text | |
| def is_thinking_text(text: str) -> bool: | |
| """Check if text looks like GLM thinking/reasoning rather than actual content.""" | |
| if not text: | |
| return True | |
| text_lower = text.lower().strip() | |
| # Reject if starts with planning/markdown headers | |
| planning_starts = ( | |
| '**plan', '## plan', '# plan', 'plan:', | |
| '**step', '## step', '# step', | |
| '**analysis', '**approach', '**strategy', | |
| 'here is my', 'here\'s my', | |
| ) | |
| if any(text_lower.startswith(pat) for pat in planning_starts): | |
| return True | |
| # Reject if starts with clear meta-language | |
| thinking_starts = ( | |
| 'let me ', 'i need to', 'i should ', 'i will ', "i'll ", | |
| 'got it', 'okay, ', 'okay ', 'alright, ', 'alright ', | |
| 'the user ', 'the request ', 'based on ', 'following the ', | |
| 'now i ', 'my prompt ', 'for this task', 'considering ', | |
| 'understood', 'i understand', 'sure, ', 'sure ', | |
| '1. ', '1) ', # Numbered lists = planning | |
| ) | |
| if any(text_lower.startswith(pat) for pat in thinking_starts): | |
| return True | |
| # Check for planning phrases ANYWHERE in text (these are NEVER in good prompts) | |
| planning_phrases = ( | |
| 'i need to describe', 'i should ', 'i\'ll describe', 'i\'ll keep', | |
| 'i will describe', 'i will keep', 'this includes', | |
| 'the key change', 'key part of the scene', 'is a defining feature', | |
| 'is crucial', 'is important', 'should remain', 'should be', | |
| '**main subject:**', '**weapon:**', '**setting:**', '**mood:**', | |
| '**colors', '**lighting', '**plan:**', | |
| ) | |
| if any(phrase in text_lower for phrase in planning_phrases): | |
| return True | |
| return False | |
| def analyze_image_with_glm(image: Optional[Image.Image]) -> str: | |
| """Analyze image using GLM-4V and return description. | |
| FIXED: Removed double filtering, lowered thresholds, added debug logging. | |
| """ | |
| if image is None: | |
| return "Please upload an image first." | |
| client = get_glm_client() | |
| if not client: | |
| return "GLM API key not configured. Please add GLM_API_KEY to space secrets." | |
| try: | |
| base64_image = encode_image_base64(image) | |
| response = client.chat.completions.create( | |
| model="glm-4.6v-flash", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} | |
| }, | |
| { | |
| "type": "text", | |
| "text": """Write a DETAILED image description. LENGTH: 400-500 TOKENS. This is important - DO NOT stop early, write a FULL detailed description until you reach 500 tokens. | |
| START DIRECTLY with the main subject. NO meta-language, NO preamble. | |
| WRONG starts: "This image shows...", "I can see...", "The image depicts...", "Here is..." | |
| CORRECT starts: "A woman in red dress...", "Golden sunset over mountains...", "Vintage car parked..." | |
| DESCRIBE IN DETAIL (use ALL 250-350 tokens): | |
| - Main subject: appearance, clothing, pose, expression, features | |
| - Setting: environment, location, architecture, objects nearby | |
| - Colors: specific hues, color palette, dominant colors | |
| - Lighting: source, quality, shadows, highlights, time of day | |
| - Textures: materials (silk, metal, wood, fabric, skin) | |
| - Atmosphere: mood, emotion, feeling, energy | |
| - Background: secondary elements, depth, perspective | |
| - Small details: accessories, decorations, patterns | |
| OUTPUT FORMAT: One continuous paragraph, 400-500 tokens. No bullet points, no sections. Keep writing until you reach 500 tokens. | |
| Write the complete detailed description now:""" | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=1000, | |
| ) | |
| msg = response.choices[0].message | |
| raw_content = msg.content if msg.content else "" | |
| # Debug logging | |
| logger.debug(f"GLM Analyze: raw content length={len(raw_content)}") | |
| if raw_content: | |
| logger.debug(f"GLM Analyze preview: {raw_content[:200]}...") | |
| # For image descriptions, use the FULL content (don't split by paragraphs) | |
| # Only apply minimal cleaning | |
| result = clean_glm_response(raw_content) | |
| # Remove common bad starts but keep the rest | |
| bad_starts = ('here is', 'here\'s', 'the image shows', 'this image', 'i can see') | |
| result_lower = result.lower() | |
| for bad in bad_starts: | |
| if result_lower.startswith(bad): | |
| # Find the first period or comma and start after it | |
| for i, c in enumerate(result): | |
| if c in '.,:' and i < 50: | |
| result = result[i+1:].strip() | |
| break | |
| break | |
| # Strip quotes | |
| result = result.strip('"\'""') | |
| # If content is too short, try reasoning_content | |
| if len(result) < 100: | |
| if hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| reasoning = clean_glm_response(msg.reasoning_content) | |
| # Take the longest paragraph from reasoning as fallback | |
| paragraphs = [p.strip() for p in reasoning.split('\n\n') if len(p.strip()) > 50] | |
| if paragraphs: | |
| longest = max(paragraphs, key=len) | |
| if len(longest) > len(result): | |
| result = longest.strip('"\'""') | |
| logger.debug(f"GLM Analyze: using reasoning content ({len(result)} chars)") | |
| if result and len(result) >= 50: | |
| logger.info(f"GLM Analyze: success ({len(result)} chars)") | |
| return result | |
| error_details = f"content_len={len(raw_content)}" | |
| logger.warning(f"GLM Analyze: result too short ({error_details})") | |
| return f"Description too short ({error_details}). Please try again." | |
| except Exception as e: | |
| logger.error(f"GLM Analyze exception: {type(e).__name__}: {str(e)}") | |
| return f"Error analyzing image: {str(e)}" | |
| def generate_prompt_with_glm(image_description: str, user_request: str, style: str) -> str: | |
| """Generate transformation prompt using GLM based on image description and user request. | |
| FIXED: Removed double filtering, lowered thresholds, added debug logging. | |
| """ | |
| if not image_description or image_description.startswith("Please") or image_description.startswith("Error") or image_description.startswith("GLM API") or image_description.startswith("Could not"): | |
| return "Please analyze the image first." | |
| if not user_request or not user_request.strip(): | |
| return "Please describe what changes you want." | |
| client = get_glm_client() | |
| if not client: | |
| return "GLM API key not configured. Please add GLM_API_KEY to space secrets." | |
| style_hint = f" Style: {style}." if style and style != "None" else "" | |
| desc = image_description[:MAX_DESCRIPTION_LENGTH] if len(image_description) > MAX_DESCRIPTION_LENGTH else image_description | |
| try: | |
| response = client.chat.completions.create( | |
| model="glm-4.6v-flash", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": f"""TASK: Write an image prompt describing the FINAL transformed scene. | |
| ORIGINAL: {desc} | |
| CHANGE: {user_request}{style_hint} | |
| CRITICAL OUTPUT RULES: | |
| - Output ONLY the final prompt text (80-120 words) | |
| - Start directly with the main subject (e.g., "A cyberpunk samurai...") | |
| - NO planning, NO thinking, NO explanations, NO numbered lists | |
| - NO phrases like "I will", "I should", "The key change is" | |
| - ONE paragraph describing the final image as if it already exists | |
| OUTPUT THE PROMPT NOW (nothing else):""" | |
| } | |
| ], | |
| max_tokens=1000, | |
| ) | |
| msg = response.choices[0].message | |
| raw_content = msg.content if msg.content else "" | |
| # Debug logging | |
| logger.debug(f"GLM Prompt: raw content length={len(raw_content)}") | |
| if raw_content: | |
| logger.debug(f"GLM Prompt preview: {raw_content[:200]}...") | |
| # Use FULL content (don't split by paragraphs) | |
| result = clean_glm_response(raw_content) | |
| # Remove thinking starts but keep the rest | |
| result_lower = result.lower() | |
| bad_starts = ('here is', 'here\'s', 'sure,', 'sure ', 'okay,', 'okay ') | |
| for bad in bad_starts: | |
| if result_lower.startswith(bad): | |
| for i, c in enumerate(result): | |
| if c in '.,:' and i < 30: | |
| result = result[i+1:].strip() | |
| break | |
| break | |
| # Check if it's thinking text | |
| if is_thinking_text(result): | |
| # Try reasoning_content | |
| if hasattr(msg, 'reasoning_content') and msg.reasoning_content: | |
| reasoning = clean_glm_response(msg.reasoning_content) | |
| paragraphs = [p.strip() for p in reasoning.split('\n\n') if len(p.strip()) > 50 and not is_thinking_text(p)] | |
| if paragraphs: | |
| result = max(paragraphs, key=len) | |
| logger.debug(f"GLM Prompt: using reasoning ({len(result)} chars)") | |
| result = result.strip('"\'""') | |
| if result and len(result) >= 50: | |
| logger.info(f"GLM Prompt: success ({len(result)} chars)") | |
| return result | |
| error_details = f"content_len={len(raw_content)}" | |
| logger.warning(f"GLM Prompt: failed ({error_details})") | |
| return f"Could not generate prompt ({error_details}). Please try again." | |
| except Exception as e: | |
| logger.error(f"GLM Prompt exception: {type(e).__name__}: {str(e)}") | |
| return f"Error: {str(e)}" | |
| logger.info("Loading Z-Image-Turbo pipeline...") | |
| pipe_t2i = DiffusionPipeline.from_pretrained( | |
| "Tongyi-MAI/Z-Image-Turbo", | |
| torch_dtype=torch.bfloat16, # Set dtype at load time for efficiency | |
| ) | |
| pipe_t2i.to("cuda") | |
| # Enable FlashAttention-3 via kernels library (H100/H200 Hopper GPUs) | |
| try: | |
| pipe_t2i.transformer.set_attention_backend("_flash_3_hub") | |
| logger.info("FlashAttention-3 enabled via kernels library") | |
| except Exception as e: | |
| logger.warning(f"FA3 not available, using default SDPA attention: {e}") | |
| # Enable AoTI for VAE decoder (transformer has incompatible dynamic device access) | |
| try: | |
| pipe_t2i.vae.decode = torch.compile( | |
| pipe_t2i.vae.decode, | |
| mode="reduce-overhead", | |
| ) | |
| logger.info("torch.compile (AoTI) enabled for VAE decoder") | |
| except Exception as e: | |
| logger.warning(f"VAE torch.compile failed: {e}") | |
| # Note: ZImagePipeline custom pipeline doesn't support VAE slicing/tiling optimization | |
| pipe_i2i = ZImageImg2ImgPipeline( | |
| transformer=pipe_t2i.transformer, | |
| vae=pipe_t2i.vae, | |
| text_encoder=pipe_t2i.text_encoder, | |
| tokenizer=pipe_t2i.tokenizer, | |
| scheduler=pipe_t2i.scheduler, | |
| ) | |
| logger.info("Pipelines ready! (TF32 + FA3 + VAE AoTI)") | |
| STYLES = ["None", "Photorealistic", "Cinematic", "Anime", "Digital Art", | |
| "Oil Painting", "Watercolor", "3D Render", "Fantasy", "Sci-Fi"] | |
| STYLE_SUFFIXES = { | |
| "None": "", | |
| "Photorealistic": ", photorealistic, ultra detailed, 8k, professional photography", | |
| "Cinematic": ", cinematic lighting, movie scene, dramatic atmosphere, film grain", | |
| "Anime": ", anime style, vibrant colors, cel shaded, studio ghibli inspired", | |
| "Digital Art": ", digital art, artstation trending, concept art, highly detailed", | |
| "Oil Painting": ", oil painting style, classical art, brush strokes visible", | |
| "Watercolor": ", watercolor painting, soft edges, artistic, delicate colors", | |
| "3D Render": ", 3D render, octane render, unreal engine 5, ray tracing", | |
| "Fantasy": ", fantasy art, magical, ethereal glow, mystical atmosphere", | |
| "Sci-Fi": ", science fiction, futuristic, advanced technology, neon accents", | |
| } | |
| RATIOS = [ | |
| "1:1 Square (1024x1024)", "16:9 Landscape (1344x768)", "9:16 Portrait (768x1344)", | |
| "4:3 Standard (1152x896)", "3:4 Vertical (896x1152)", "21:9 Cinematic (1536x640)", | |
| "3:2 Photo (1216x832)", "2:3 Photo Portrait (832x1216)", "1:1 XL (1536x1536)", | |
| "16:9 XL (1920x1088)", "9:16 XL (1088x1920)", "4:3 XL (1536x1152)", | |
| "3:4 XL (1152x1536)", "1:1 MAX (2048x2048)", "16:9 MAX (2048x1152)", | |
| "9:16 MAX (1152x2048)", "4:3 MAX (2048x1536)", "3:4 MAX (1536x2048)", | |
| ] | |
| RATIO_DIMS = { | |
| "1:1 Square (1024x1024)": (1024, 1024), "16:9 Landscape (1344x768)": (1344, 768), | |
| "9:16 Portrait (768x1344)": (768, 1344), "4:3 Standard (1152x896)": (1152, 896), | |
| "3:4 Vertical (896x1152)": (896, 1152), "21:9 Cinematic (1536x640)": (1536, 640), | |
| "3:2 Photo (1216x832)": (1216, 832), "2:3 Photo Portrait (832x1216)": (832, 1216), | |
| "1:1 XL (1536x1536)": (1536, 1536), "16:9 XL (1920x1088)": (1920, 1088), | |
| "9:16 XL (1088x1920)": (1088, 1920), "4:3 XL (1536x1152)": (1536, 1152), | |
| "3:4 XL (1152x1536)": (1152, 1536), "1:1 MAX (2048x2048)": (2048, 2048), | |
| "16:9 MAX (2048x1152)": (2048, 1152), "9:16 MAX (1152x2048)": (1152, 2048), | |
| "4:3 MAX (2048x1536)": (2048, 1536), "3:4 MAX (1536x2048)": (1536, 2048), | |
| } | |
| EXAMPLES_GENERATE = [ | |
| ["Majestic phoenix rising from volcanic flames at midnight, ember particles swirling against a star-filled sky, wings of liquid gold and crimson fire", "Fantasy", "1:1 Square (1024x1024)", 9, 42, True], | |
| ["Underwater steampunk city with brass submarines and coral-covered clockwork towers, schools of glowing fish swimming through glass tunnels", "Digital Art", "9:16 Portrait (768x1344)", 9, 42, True], | |
| ["Street food vendor in a bustling night market, steam rising from sizzling woks, colorful paper lanterns illuminating weathered hands preparing dumplings", "Photorealistic", "4:3 Standard (1152x896)", 9, 42, True], | |
| ["Android geisha performing tea ceremony in a neon-lit zen garden, holographic cherry blossoms falling around chrome kimono", "Sci-Fi", "3:4 Vertical (896x1152)", 9, 42, True], | |
| ["Venetian masquerade ball at twilight, masked dancers in elaborate baroque costumes twirling beneath frescoed ceilings, candlelight reflecting off gilded mirrors and velvet drapes", "Oil Painting", "4:3 XL (1536x1152)", 9, 42, True], | |
| ["Colossal ancient tree growing through the ruins of a forgotten temple, roots wrapped around crumbling stone pillars, golden light filtering through the dense canopy as fireflies dance in the mist", "Cinematic", "16:9 XL (1920x1088)", 9, 42, True], | |
| ["Crystal ice palace floating above frozen tundra, aurora borealis casting ethereal green and purple ribbons across the polar sky, snow wolves howling on distant glaciers below", "Fantasy", "16:9 MAX (2048x1152)", 9, 42, True], | |
| ["Alchemist laboratory in a medieval tower, bubbling potions in glass vessels connected by copper tubes, scattered grimoires and astronomical instruments, moonlight streaming through a rose window casting prismatic shadows", "Digital Art", "1:1 MAX (2048x2048)", 9, 42, True], | |
| ] | |
| EXAMPLES_TRANSFORM = [ | |
| ["Transform into ultra realistic photograph with sharp details and natural lighting", "Photorealistic", 0.7, 9, 42, True], | |
| ["Dramatic movie scene with cinematic lighting and film grain texture", "Cinematic", 0.65, 9, 42, True], | |
| ["Japanese anime style with vibrant colors and cel shading", "Anime", 0.75, 9, 42, True], | |
| ["Digital concept art style, trending on artstation", "Digital Art", 0.6, 9, 42, True], | |
| ["Classical oil painting with visible brush strokes and rich colors", "Oil Painting", 0.7, 9, 42, True], | |
| ["Soft watercolor painting with delicate washes and gentle edges", "Watercolor", 0.65, 9, 42, True], | |
| ["High quality 3D render with ray tracing and realistic materials", "3D Render", 0.7, 9, 42, True], | |
| ["Magical fantasy art with ethereal glow and mystical atmosphere", "Fantasy", 0.65, 9, 42, True], | |
| ["Futuristic sci-fi style with neon accents and advanced technology", "Sci-Fi", 0.7, 9, 42, True], | |
| ["Enhanced version with improved details and quality", "None", 0.4, 9, 42, True], | |
| ] | |
| def upload_to_hf_cdn(image: Optional[Image.Image]) -> str: | |
| """Upload image to HuggingFace CDN with proper memory cleanup.""" | |
| if image is None: | |
| return "No image to share" | |
| buf = io.BytesIO() | |
| try: | |
| image.save(buf, format='PNG') | |
| buf.seek(0) | |
| response = requests.post( | |
| "https://huggingface.co/uploads", | |
| headers={"Content-Type": "image/png"}, | |
| data=buf.getvalue(), | |
| timeout=30, | |
| ) | |
| if response.status_code == 200: | |
| return response.text.strip() | |
| return f"Upload failed: {response.status_code}" | |
| except requests.Timeout: | |
| return "Upload timed out. Please try again." | |
| except Exception as e: | |
| logger.error(f"upload_to_hf_cdn failed: {type(e).__name__}: {str(e)}") | |
| return "Upload error. Please try again." | |
| finally: | |
| buf.close() | |
| def do_polish_prompt(prompt: str, style: str, do_polish: bool, mode: str = "generate") -> Tuple[str, str]: | |
| """Polish prompt before generation (runs on CPU, before GPU allocation).""" | |
| if not prompt or not prompt.strip(): | |
| return "", "" | |
| base_prompt = prompt.strip() | |
| if do_polish: | |
| polished = polish_prompt(base_prompt, mode=mode) | |
| else: | |
| polished = base_prompt | |
| final_prompt = polished + STYLE_SUFFIXES.get(style, "") | |
| return final_prompt, polished | |
| def do_polish_transform_prompt(prompt: str, style: str, do_polish: bool) -> Tuple[str, str]: | |
| """Polish prompt for transformation (style-focused).""" | |
| if not do_polish: | |
| base = prompt.strip() if prompt else "high quality image" | |
| final = base + STYLE_SUFFIXES.get(style, "") | |
| return final, "" | |
| return do_polish_prompt(prompt, style, True, mode="transform") | |
| # ============================================================================= | |
| # UNIFIED WRAPPER FUNCTIONS (Fix for race condition with gr.State) | |
| # These combine polish + generate/transform into single atomic operations | |
| # ============================================================================= | |
| def generate_with_polish(prompt: str, style: str, do_polish: bool, ratio: str, steps: int, seed: int, randomize: bool): | |
| """Unified generate with progress feedback using generator. | |
| Yields intermediate status updates so user knows what's happening. | |
| """ | |
| logger.info(f"generate_with_polish: do_polish={do_polish}, style={style}, prompt_len={len(prompt) if prompt else 0}") | |
| # Always yield initial status | |
| if do_polish: | |
| yield None, "✨ Enhancing prompt with DeepSeek Reasoner...", seed | |
| else: | |
| yield None, "🎨 Preparing generation...", seed | |
| full_prompt, polished_display = do_polish_prompt(prompt, style, do_polish, mode="generate") | |
| # Show whether enhancement was applied | |
| if do_polish and polished_display and polished_display != prompt: | |
| logger.info(f"generate_with_polish: Prompt+ applied successfully") | |
| elif do_polish: | |
| logger.warning(f"generate_with_polish: Prompt+ was enabled but enhancement unchanged") | |
| if not full_prompt.strip(): | |
| yield None, "❌ Empty prompt - please enter a description", seed | |
| return | |
| # Show status before GPU generation with the prompt that will be used | |
| status_prompt = polished_display if polished_display else full_prompt | |
| yield None, f"🎨 Generating image...\n\n{status_prompt}", seed | |
| # GPU generation | |
| image, used_seed = generate(full_prompt, polished_display, ratio, steps, seed, randomize) | |
| # Final result | |
| final_display = polished_display if polished_display else full_prompt | |
| yield image, final_display, used_seed | |
| def transform_with_polish(input_image: Optional[Image.Image], prompt: str, style: str, do_polish: bool, strength: float, steps: int, seed: int, randomize: bool): | |
| """Unified transform with progress feedback using generator. | |
| Yields intermediate status updates so user knows what's happening. | |
| """ | |
| logger.info(f"transform_with_polish: do_polish={do_polish}, style={style}, prompt_len={len(prompt) if prompt else 0}") | |
| if input_image is None: | |
| yield None, "❌ Please upload an image first", 0 | |
| return | |
| # Always yield initial status | |
| if do_polish: | |
| yield None, "✨ Enhancing prompt with DeepSeek Reasoner...", 0 | |
| else: | |
| yield None, "🎨 Preparing transformation...", 0 | |
| full_prompt, polished_display = do_polish_transform_prompt(prompt, style, do_polish) | |
| # Show whether enhancement was applied | |
| if do_polish and polished_display and polished_display != prompt: | |
| logger.info(f"transform_with_polish: Prompt+ applied successfully") | |
| elif do_polish: | |
| logger.warning(f"transform_with_polish: Prompt+ was enabled but enhancement unchanged") | |
| # Show status before GPU transform with the prompt that will be used | |
| status_prompt = polished_display if polished_display else full_prompt | |
| yield None, f"🎨 Transforming image...\n\n{status_prompt}", 0 | |
| # GPU transform | |
| image, used_seed = transform(input_image, full_prompt, polished_display, strength, steps, seed, randomize) | |
| # Final result | |
| final_display = polished_display if polished_display else full_prompt | |
| yield image, final_display, used_seed | |
| def generate(full_prompt: str, polished_display: str, ratio: str, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: | |
| """Generate image from text prompt.""" | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt.strip(): | |
| return None, seed | |
| try: | |
| w, h = RATIO_DIMS.get(ratio, (1024, 1024)) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_t2i( | |
| prompt=full_prompt, | |
| height=h, | |
| width=w, | |
| num_inference_steps=int(steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_gen_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"Generation failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| def transform(input_image: Optional[Image.Image], full_prompt: str, polished_display: str, strength: float, steps: int, seed: int, randomize: bool, progress=gr.Progress(track_tqdm=True)) -> Tuple[Optional[Image.Image], int]: | |
| """Transform image using prompt guidance.""" | |
| if input_image is None: | |
| return None, 0 | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not full_prompt.strip(): | |
| full_prompt = "high quality image, enhanced details" | |
| try: | |
| input_image = input_image.convert("RGB") | |
| w, h = input_image.size | |
| w = (w // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| h = (h // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| w = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, w)) | |
| h = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, h)) | |
| input_image = input_image.resize((w, h), Image.Resampling.BILINEAR) | |
| strength = float(strength) | |
| effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_i2i( | |
| prompt=full_prompt, | |
| image=input_image, | |
| strength=strength, | |
| num_inference_steps=effective_steps, | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_trans_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"Transform failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| # ============================================================================= | |
| # MCP-FRIENDLY WRAPPER FUNCTIONS | |
| # These functions expose all parameters directly for MCP server compatibility | |
| # ============================================================================= | |
| def mcp_generate(prompt: str, style: str = "None", ratio: str = "1:1 Square (1024x1024)", | |
| steps: int = 9, seed: int = 42, randomize: bool = True) -> Tuple[Optional[Image.Image], int]: | |
| """MCP-friendly image generation. Takes prompt directly and handles polish internally.""" | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| if not prompt or not prompt.strip(): | |
| return None, seed | |
| # Apply style suffix | |
| full_prompt = prompt.strip() + STYLE_SUFFIXES.get(style, "") | |
| try: | |
| w, h = RATIO_DIMS.get(ratio, (1024, 1024)) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| image = pipe_t2i( | |
| prompt=full_prompt, | |
| height=h, | |
| width=w, | |
| num_inference_steps=int(steps), | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_mcp_gen_{seed}.png") | |
| image.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"MCP Generate failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| def mcp_transform(image: Optional[Image.Image], prompt: str, style: str = "None", | |
| strength: float = 0.6, steps: int = 9, seed: int = 42, | |
| randomize: bool = True) -> Tuple[Optional[Image.Image], int]: | |
| """MCP-friendly image transformation. Takes all parameters directly.""" | |
| if image is None: | |
| return None, 0 | |
| if randomize: | |
| seed = torch.randint(0, 2**32 - 1, (1,)).item() | |
| seed = int(seed) | |
| # Apply style suffix | |
| full_prompt = (prompt.strip() if prompt else "high quality image") + STYLE_SUFFIXES.get(style, "") | |
| try: | |
| image = image.convert("RGB") | |
| w, h = image.size | |
| w = (w // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| h = (h // IMAGE_ALIGNMENT) * IMAGE_ALIGNMENT | |
| w = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, w)) | |
| h = max(MIN_IMAGE_DIM, min(MAX_IMAGE_DIM, h)) | |
| image = image.resize((w, h), Image.Resampling.BILINEAR) | |
| strength = float(strength) | |
| effective_steps = max(4, int(steps / strength)) if strength > 0 else int(steps) | |
| generator = torch.Generator("cuda").manual_seed(seed) | |
| result = pipe_i2i( | |
| prompt=full_prompt, | |
| image=image, | |
| strength=strength, | |
| num_inference_steps=effective_steps, | |
| guidance_scale=0.0, | |
| generator=generator, | |
| ).images[0] | |
| # Force PNG format for MCP server output | |
| png_path = os.path.join(tempfile.gettempdir(), f"z_mcp_trans_{seed}.png") | |
| result.save(png_path, format="PNG") | |
| return Image.open(png_path), seed | |
| except Exception as e: | |
| logger.error(f"MCP Transform failed: {type(e).__name__}: {str(e)}") | |
| return None, seed | |
| css = r""" | |
| /* Google Fonts for multilingual support */ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Noto+Sans+Arabic:wght@400;500;600;700&family=Noto+Sans+Devanagari:wght@400;500;600;700&display=swap'); | |
| :root { | |
| --bg-primary: #0c0c0e; | |
| --bg-secondary: #141416; | |
| --bg-tertiary: #1c1c20; | |
| --surface: #232328; | |
| --surface-hover: #2a2a30; | |
| --accent-primary: #818cf8; | |
| --accent-secondary: #a78bfa; | |
| --accent-hover: #6366f1; | |
| --accent-gradient: linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%); | |
| --accent-glow: rgba(99, 102, 241, 0.4); | |
| --text-primary: #f4f4f5; | |
| --text-secondary: #a1a1aa; | |
| --text-muted: #71717a; | |
| --border-subtle: rgba(255, 255, 255, 0.08); | |
| --border-default: rgba(255, 255, 255, 0.12); | |
| --success: #10b981; | |
| --warning: #f59e0b; | |
| --error: #ef4444; | |
| --shadow-sm: 0 1px 2px rgba(0,0,0,0.3); | |
| --shadow-md: 0 4px 6px -1px rgba(0,0,0,0.4); | |
| --shadow-lg: 0 10px 15px -3px rgba(0,0,0,0.5); | |
| --shadow-glow: 0 0 20px var(--accent-glow); | |
| --radius-sm: 8px; | |
| --radius-md: 12px; | |
| --radius-lg: 16px; | |
| --transition: 0.2s ease; | |
| /* Font stacks */ | |
| --font-latin: 'Inter', -apple-system, BlinkMacSystemFont, system-ui, sans-serif; | |
| --font-arabic: 'Noto Sans Arabic', 'Tahoma', sans-serif; | |
| --font-hindi: 'Noto Sans Devanagari', 'Mangal', sans-serif; | |
| } | |
| /* Arabic font */ | |
| .lang-ar, .lang-ar * { font-family: var(--font-arabic) !important; } | |
| /* Hindi font */ | |
| .lang-hi, .lang-hi * { font-family: var(--font-hindi) !important; } | |
| /* RTL Support for Arabic */ | |
| [dir="rtl"], .rtl { direction: rtl; text-align: right; } | |
| [dir="rtl"] .tab-nav { flex-direction: row-reverse; } | |
| [dir="rtl"] .gr-row, [dir="rtl"] [class*="row"] { flex-direction: row-reverse; } | |
| [dir="rtl"] input, [dir="rtl"] textarea { text-align: right; direction: rtl; } | |
| [dir="rtl"] input[type="number"] { direction: ltr; text-align: left; } | |
| [dir="rtl"] label, [dir="rtl"] .gr-label { text-align: right; } | |
| [dir="rtl"] .gr-checkbox { flex-direction: row-reverse; } | |
| [dir="rtl"] .gr-slider { direction: ltr; } | |
| [dir="rtl"] .gr-markdown ul, [dir="rtl"] .gr-markdown ol { padding-left: 0; padding-right: 1.5em; } | |
| /* Language selector in header */ | |
| .lang-selector-row { display: flex; justify-content: flex-end; margin-bottom: 8px; } | |
| [dir="rtl"] .lang-selector-row { justify-content: flex-start; } | |
| .gradio-container { | |
| background: var(--bg-primary) !important; | |
| min-height: 100vh; | |
| color: var(--text-primary); | |
| } | |
| .tabs { background: transparent !important; padding: 8px 0; } | |
| .tab-nav { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg); | |
| padding: 6px; | |
| gap: 6px; | |
| margin-bottom: 20px; | |
| display: flex; | |
| justify-content: center; | |
| flex-wrap: wrap; | |
| } | |
| .tab-nav > button { | |
| background: transparent !important; | |
| color: var(--text-secondary) !important; | |
| border: none !important; | |
| border-radius: var(--radius-md); | |
| padding: 12px 24px; | |
| font-weight: 500; | |
| font-size: 0.95rem; | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| } | |
| .tab-nav > button:hover { | |
| background: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| .tab-nav > button.selected, | |
| .tab-nav > button[aria-selected="true"], | |
| [role="tab"][aria-selected="true"] { | |
| background: var(--accent-gradient) !important; | |
| color: white !important; | |
| font-weight: 600; | |
| box-shadow: var(--shadow-glow); | |
| } | |
| button.primary, .primary { | |
| background: var(--accent-gradient) !important; | |
| border: none !important; | |
| border-radius: var(--radius-md); | |
| font-weight: 600; | |
| padding: 12px 24px; | |
| color: white !important; | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| box-shadow: var(--shadow-md); | |
| } | |
| button.primary:hover, .primary:hover { | |
| box-shadow: var(--shadow-glow), var(--shadow-lg); | |
| filter: brightness(1.1); | |
| } | |
| button.secondary, .secondary { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm); | |
| cursor: pointer; | |
| transition: all var(--transition); | |
| } | |
| button.secondary:hover, .secondary:hover { | |
| background: var(--surface-hover) !important; | |
| border-color: var(--accent-primary) !important; | |
| } | |
| .block { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg) !important; | |
| box-shadow: var(--shadow-sm); | |
| padding: 20px; | |
| margin: 8px 0; | |
| transition: all var(--transition); | |
| } | |
| .tabitem { background: transparent !important; padding: 16px 0; } | |
| input, textarea, .gr-input, .gr-textbox textarea { | |
| background: var(--bg-tertiary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| color: var(--text-primary) !important; | |
| transition: all var(--transition); | |
| } | |
| input:focus, textarea:focus { | |
| border-color: var(--accent-primary) !important; | |
| box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; | |
| outline: none !important; | |
| } | |
| .gr-dropdown, select { | |
| background: var(--bg-tertiary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| .gr-slider input[type="range"] { accent-color: var(--accent-primary); } | |
| .gr-checkbox input[type="checkbox"] { accent-color: var(--accent-primary); } | |
| label, .gr-label { color: var(--text-secondary) !important; font-weight: 500; } | |
| .gr-image, .image-container { | |
| background: var(--bg-tertiary) !important; | |
| border: 2px dashed var(--border-default) !important; | |
| border-radius: var(--radius-lg) !important; | |
| transition: all var(--transition); | |
| } | |
| .gr-image:hover { border-color: var(--accent-primary) !important; } | |
| .gr-image img { border-radius: var(--radius-md); } | |
| /* Examples table - Dark theme (stable selectors only) */ | |
| .examples, .gr-examples, [class*="example"], [class*="Example"], | |
| div[class*="example"], div[class*="sample"], .sample-table, | |
| [data-testid="examples"], [data-testid*="example"] { | |
| background: var(--bg-secondary) !important; | |
| border-radius: var(--radius-lg) !important; | |
| } | |
| /* Table itself */ | |
| .examples table, .gr-examples table, [class*="example"] table, | |
| [data-testid="examples"] table { | |
| background: var(--bg-secondary) !important; | |
| border-collapse: collapse !important; | |
| width: 100% !important; | |
| } | |
| /* All rows */ | |
| .examples tr, .gr-examples tr, [class*="example"] tr, | |
| [data-testid="examples"] tr { | |
| background: var(--bg-secondary) !important; | |
| border-bottom: 1px solid var(--border-default) !important; | |
| } | |
| /* Row hover */ | |
| .examples tr:hover, .gr-examples tr:hover, [class*="example"] tr:hover, | |
| [data-testid="examples"] tr:hover { | |
| background: var(--surface) !important; | |
| } | |
| /* Table cells */ | |
| .examples td, .gr-examples td, [class*="example"] td, | |
| [data-testid="examples"] td { | |
| color: var(--text-secondary) !important; | |
| background: transparent !important; | |
| } | |
| /* First column (prompts) - emphasized */ | |
| .examples td:first-child, [class*="example"] td:first-child, | |
| [data-testid="examples"] td:first-child { | |
| color: var(--text-primary) !important; | |
| font-weight: 500 !important; | |
| } | |
| /* Headers */ | |
| .examples th, .gr-examples th, [class*="example"] th, | |
| [data-testid="examples"] th { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| font-weight: 600 !important; | |
| border-bottom: 1px solid var(--border-default) !important; | |
| } | |
| /* Wrapper divs */ | |
| .examples > div, [class*="example"] > div { | |
| background: var(--bg-secondary) !important; | |
| } | |
| h1, h2, h3, h4 { color: var(--text-primary) !important; } | |
| h1 { font-size: clamp(1.5rem, 4vw, 2.2rem); font-weight: 700; } | |
| .markdown-text, .gr-markdown { color: var(--text-secondary) !important; } | |
| .gr-markdown a { color: var(--accent-primary) !important; } | |
| .gr-group { | |
| background: var(--surface) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-lg) !important; | |
| padding: 16px !important; | |
| } | |
| .gr-accordion { | |
| background: var(--bg-secondary) !important; | |
| border: 1px solid var(--border-subtle) !important; | |
| border-radius: var(--radius-md) !important; | |
| } | |
| .footer-no-box { background: transparent !important; border: none !important; box-shadow: none !important; padding: 0; } | |
| .gradio-container > footer { | |
| background: var(--bg-secondary) !important; | |
| border-top: 1px solid var(--border-subtle) !important; | |
| padding: 12px 20px; | |
| } | |
| .gradio-container > footer span, .gradio-container > footer p { color: var(--text-muted) !important; } | |
| .gradio-container > footer a { color: var(--accent-primary) !important; } | |
| .progress-bar { background: var(--bg-tertiary) !important; border-radius: 4px; } | |
| .progress-bar > div { background: var(--accent-gradient) !important; border-radius: 4px; } | |
| @media (prefers-reduced-motion: reduce) { | |
| *, *::before, *::after { animation-duration: 0.01ms !important; transition-duration: 0.01ms !important; } | |
| } | |
| @media (max-width: 768px) { | |
| .tab-nav { padding: 4px; gap: 4px; } | |
| .tab-nav > button { padding: 10px 16px; font-size: 0.85rem; } | |
| .block { padding: 12px; margin: 6px 0; } | |
| button.primary { padding: 10px 16px; width: 100%; } | |
| h1 { font-size: 1.4rem !important; } | |
| } | |
| /* Accessibility - keyboard focus indicators */ | |
| button:focus-visible, input:focus-visible, textarea:focus-visible, | |
| select:focus-visible, [role="button"]:focus-visible { | |
| outline: 2px solid var(--accent-primary) !important; | |
| outline-offset: 2px !important; | |
| } | |
| .gr-image:focus-visible, [role="tab"]:focus-visible { | |
| outline: 2px solid var(--accent-primary) !important; | |
| outline-offset: 2px !important; | |
| } | |
| ::-webkit-scrollbar { width: 8px; height: 8px; } | |
| ::-webkit-scrollbar-track { background: var(--bg-secondary); } | |
| ::-webkit-scrollbar-thumb { background: var(--bg-tertiary); border-radius: 4px; } | |
| ::-webkit-scrollbar-thumb:hover { background: var(--surface); } | |
| /* Tab navigation text */ | |
| .tab-nav button, .tab-nav > button, button[role="tab"], .tabs button { color: var(--text-primary) !important; } | |
| /* Labels and spans */ | |
| label, .gr-label, .label-wrap, .label-wrap span, .gr-box label, .gr-form label, .gr-group label { color: var(--text-secondary) !important; } | |
| .gr-block span, .gr-box span, .gr-form span, .gr-group span, .block span { color: var(--text-secondary) !important; } | |
| /* Table overrides */ | |
| table thead, table thead tr, table thead th, [class*="examples"] thead th { background: var(--surface) !important; color: var(--text-primary) !important; } | |
| table tbody td, [class*="examples"] td { color: var(--text-secondary) !important; } | |
| /* Accordion and markdown */ | |
| .gr-accordion summary, .gr-accordion button, details summary, summary span { color: var(--text-primary) !important; } | |
| .gr-markdown, .gr-markdown p, .gr-markdown li, .markdown-text, .prose { color: var(--text-secondary) !important; } | |
| /* Input placeholders and buttons */ | |
| input::placeholder, textarea::placeholder { color: var(--text-muted) !important; } | |
| button.secondary, .secondary { color: var(--text-primary) !important; } | |
| /* Dropdown menus - dark theme */ | |
| .gr-dropdown ul, .gr-dropdown li, [data-testid="dropdown"] ul, | |
| .svelte-select-list, .dropdown-menu, select option, | |
| [role="listbox"], [role="listbox"] [role="option"] { | |
| background: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| } | |
| /* Dropdown hover/selected states */ | |
| .gr-dropdown li:hover, select option:hover, | |
| [role="option"]:hover, [role="option"][aria-selected="true"] { | |
| background: var(--surface) !important; | |
| } | |
| /* Portal dropdowns (rendered outside .gradio-container) */ | |
| [data-testid="dropdown-list"], | |
| [role="listbox"]:not(.gradio-container [role="listbox"]) { | |
| background-color: var(--bg-tertiary) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| border-radius: var(--radius-sm) !important; | |
| } | |
| /* Slider and checkbox labels */ | |
| .gr-slider span, .gr-slider output, .range-wrap span, | |
| input[type="range"] + span { color: var(--text-primary) !important; } | |
| .gr-checkbox label, .gr-checkbox span, | |
| input[type="checkbox"] + span { color: var(--text-secondary) !important; } | |
| /* Image upload text */ | |
| .gr-image span, .gr-image p, .upload-text, | |
| [data-testid="image"] span { color: var(--text-secondary) !important; } | |
| .gr-image svg, .upload-icon { fill: var(--text-muted) !important; } | |
| /* Error/warning states */ | |
| .gr-error, [class*="error"] { | |
| background: rgba(239,68,68,0.15) !important; | |
| color: var(--error) !important; | |
| border-color: var(--error) !important; | |
| } | |
| .gr-info, [class*="info-msg"] { | |
| background: rgba(129,140,248,0.15) !important; | |
| color: var(--accent-primary) !important; | |
| } | |
| /* Copy buttons and icons */ | |
| .gr-textbox button, button svg, .copy-button { | |
| color: var(--text-secondary) !important; | |
| fill: var(--text-secondary) !important; | |
| } | |
| .gr-textbox button:hover { color: var(--text-primary) !important; } | |
| /* Tooltips */ | |
| [role="tooltip"], .gr-tooltip, .tooltip { | |
| background: var(--surface) !important; | |
| color: var(--text-primary) !important; | |
| border: 1px solid var(--border-default) !important; | |
| } | |
| /* Progress/loading text */ | |
| .progress-text, .loading-text, [class*="loading"] span, | |
| [class*="progress"] span { color: var(--text-secondary) !important; } | |
| /* Number input spinners */ | |
| input[type="number"]::-webkit-inner-spin-button, | |
| input[type="number"]::-webkit-outer-spin-button { filter: invert(0.8); } | |
| """ | |
| # Create custom dark theme | |
| dark_theme = gr.themes.Base( | |
| primary_hue=gr.themes.colors.indigo, | |
| secondary_hue=gr.themes.colors.purple, | |
| neutral_hue=gr.themes.colors.zinc, | |
| ).set( | |
| # Backgrounds | |
| body_background_fill="#0c0c0e", | |
| body_background_fill_dark="#0c0c0e", | |
| background_fill_primary="#141416", | |
| background_fill_primary_dark="#141416", | |
| background_fill_secondary="#1c1c20", | |
| background_fill_secondary_dark="#1c1c20", | |
| # Borders | |
| border_color_primary="rgba(255,255,255,0.12)", | |
| border_color_primary_dark="rgba(255,255,255,0.12)", | |
| # Text | |
| body_text_color="#e5e5e5", | |
| body_text_color_dark="#e5e5e5", | |
| body_text_color_subdued="#a1a1aa", | |
| body_text_color_subdued_dark="#a1a1aa", | |
| # Blocks | |
| block_background_fill="#141416", | |
| block_background_fill_dark="#141416", | |
| block_border_color="rgba(255,255,255,0.08)", | |
| block_border_color_dark="rgba(255,255,255,0.08)", | |
| block_label_background_fill="#1c1c20", | |
| block_label_background_fill_dark="#1c1c20", | |
| block_label_text_color="#a1a1aa", | |
| block_label_text_color_dark="#a1a1aa", | |
| # Inputs | |
| input_background_fill="#1c1c20", | |
| input_background_fill_dark="#1c1c20", | |
| input_border_color="rgba(255,255,255,0.12)", | |
| input_border_color_dark="rgba(255,255,255,0.12)", | |
| # Buttons | |
| button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", | |
| button_primary_background_fill_dark="linear-gradient(135deg, #6366f1 0%, #8b5cf6 100%)", | |
| button_primary_text_color="white", | |
| button_primary_text_color_dark="white", | |
| button_secondary_background_fill="#232328", | |
| button_secondary_background_fill_dark="#232328", | |
| button_secondary_text_color="#e5e5e5", | |
| button_secondary_text_color_dark="#e5e5e5", | |
| # Table/Examples - CRITICAL for fixing white background | |
| table_even_background_fill="#1a1a1e", | |
| table_even_background_fill_dark="#1a1a1e", | |
| table_odd_background_fill="#1a1a1e", | |
| table_odd_background_fill_dark="#1a1a1e", | |
| table_row_focus="#252528", | |
| table_row_focus_dark="#252528", | |
| ) | |
| with gr.Blocks(title="Z Image Turbo", css=css, theme=dark_theme) as demo: | |
| # Language selector at top | |
| with gr.Row(elem_classes="lang-selector-row"): | |
| lang_selector = gr.Dropdown( | |
| choices=LANGUAGES, | |
| value="English", | |
| label="🌐 Language", | |
| scale=0, | |
| min_width=160, | |
| interactive=True | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 8px 16px 16px 16px;"> | |
| <h1 style="background: linear-gradient(135deg, #818cf8 0%, #a78bfa 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: clamp(1.5rem, 4vw, 2.2rem); margin-bottom: 8px; font-weight: 700;"> | |
| Z-Image Turbo + GLM-4.6V / DeepSeek-3.2 Thinking | |
| </h1> | |
| <p style="color: #a1a1aa; font-size: 1rem; margin: 0;"> | |
| Image Gen & Edit with GLM-4.6V + DeepSeek-3.2 | |
| </p> | |
| <p style="color: #ef4444; font-size: 0.95rem; margin-top: 12px; font-weight: 500;"> | |
| If you liked it, please ❤️ like it. Thank you! | |
| </p> | |
| </div> | |
| <script> | |
| // RTL toggle based on language | |
| document.addEventListener('DOMContentLoaded', function() { | |
| const observer = new MutationObserver(function(mutations) { | |
| const dropdown = document.querySelector('.lang-selector-row select, .lang-selector-row input'); | |
| if (dropdown) { | |
| const checkLang = () => { | |
| const val = dropdown.value || ''; | |
| const html = document.documentElement; | |
| const body = document.body; | |
| if (val.includes('العربية')) { | |
| html.setAttribute('dir', 'rtl'); | |
| body.classList.add('rtl', 'lang-ar'); | |
| body.classList.remove('lang-hi'); | |
| } else if (val.includes('हिंदी')) { | |
| html.removeAttribute('dir'); | |
| body.classList.remove('rtl', 'lang-ar'); | |
| body.classList.add('lang-hi'); | |
| } else { | |
| html.removeAttribute('dir'); | |
| body.classList.remove('rtl', 'lang-ar', 'lang-hi'); | |
| } | |
| }; | |
| dropdown.addEventListener('change', checkLang); | |
| checkLang(); | |
| } | |
| }); | |
| observer.observe(document.body, { childList: true, subtree: true }); | |
| }); | |
| </script> | |
| """) | |
| with gr.Tabs(): | |
| # TAB 1: Generate Image | |
| with gr.Tab("Generate"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gen_prompt = gr.Textbox(label="Prompt", placeholder="Describe your image in detail...", lines=4) | |
| gen_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) | |
| with gr.Row(): | |
| gen_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| gen_ratio = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)", label="Aspect Ratio") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| gen_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| gen_seed = gr.Number(label="Seed", value=42, precision=0) | |
| gen_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| gen_btn = gr.Button("Generate", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| gen_output = gr.Image(label="Generated Image", type="pil", interactive=False, height=512) | |
| gen_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4) | |
| gen_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| gen_share_btn = gr.Button("Share", variant="secondary") | |
| gen_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) | |
| gr.Examples(examples=EXAMPLES_GENERATE, inputs=[gen_prompt, gen_style, gen_ratio, gen_steps, gen_seed, gen_randomize]) | |
| gen_btn.click( | |
| fn=generate_with_polish, | |
| inputs=[gen_prompt, gen_style, gen_polish, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_polished_prompt, gen_seed_out] | |
| ) | |
| gen_prompt.submit( | |
| fn=generate_with_polish, | |
| inputs=[gen_prompt, gen_style, gen_polish, gen_ratio, gen_steps, gen_seed, gen_randomize], | |
| outputs=[gen_output, gen_polished_prompt, gen_seed_out] | |
| ) | |
| gen_share_btn.click(fn=upload_to_hf_cdn, inputs=[gen_output], outputs=[gen_share_link]) | |
| # TAB 2: AI Vision Assistant | |
| with gr.Tab("AI Assistant"): | |
| ai_desc_md = gr.Markdown("**AI-Powered Prompt Generator** - Upload an image, analyze it with GLM-4.6V, then generate optimized prompts.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| ai_image = gr.Image(label="Upload Image", type="pil", height=300) | |
| ai_analyze_btn = gr.Button("Analyze Image", variant="primary") | |
| ai_description = gr.Textbox(label="Image Description", lines=5, interactive=False) | |
| with gr.Column(scale=1): | |
| ai_request = gr.Textbox(label="What changes do you want?", placeholder="e.g., 'watercolor style' or 'dramatic sunset lighting'", lines=2) | |
| ai_style = gr.Dropdown(choices=STYLES, value="None", label="Target Style") | |
| ai_generate_btn = gr.Button("Generate Prompt", variant="primary") | |
| ai_generated_prompt = gr.Textbox(label="Generated Prompt", lines=6, interactive=False) | |
| ai_send_btn = gr.Button("Send to Transform Tab", variant="primary") | |
| with gr.Accordion("How to Use", open=False): | |
| ai_howto_md = gr.Markdown(""" | |
| 1. **Upload** an image and click "Analyze Image" | |
| 2. **Describe** the changes you want | |
| 3. **Generate** an optimized prompt | |
| 4. **Send** to Transform tab to apply changes | |
| """) | |
| ai_analyze_btn.click( | |
| fn=analyze_image_with_glm, | |
| inputs=[ai_image], | |
| outputs=[ai_description] | |
| ) | |
| ai_generate_btn.click( | |
| fn=generate_prompt_with_glm, | |
| inputs=[ai_description, ai_request, ai_style], | |
| outputs=[ai_generated_prompt] | |
| ) | |
| # TAB 3: Transform Image | |
| with gr.Tab("Transform"): | |
| trans_desc_md = gr.Markdown("**Transform your image** - Upload and describe the transformation. Lower strength = subtle, higher = dramatic.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| trans_input = gr.Image(label="Upload Image", type="pil", height=300) | |
| trans_prompt = gr.Textbox(label="Transformation Prompt", placeholder="e.g., 'oil painting style, vibrant colors'", lines=3) | |
| trans_polish = gr.Checkbox(label="Prompt+ by deepseek-reasoner", value=False) | |
| with gr.Row(): | |
| trans_style = gr.Dropdown(choices=STYLES, value="None", label="Style") | |
| trans_strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, step=0.05, label="Strength") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| trans_steps = gr.Slider(minimum=4, maximum=16, value=9, step=1, label="Steps") | |
| with gr.Row(): | |
| trans_seed = gr.Number(label="Seed", value=42, precision=0) | |
| trans_randomize = gr.Checkbox(label="Random Seed", value=True) | |
| trans_btn = gr.Button("Transform", variant="primary", size="lg") | |
| with gr.Column(scale=3): | |
| trans_output = gr.Image(label="Transformed Image", type="pil", interactive=False, height=512) | |
| trans_polished_prompt = gr.Textbox(label="Enhanced Prompt", interactive=False, visible=True, lines=4) | |
| trans_seed_out = gr.Number(label="Seed Used", interactive=False) | |
| with gr.Row(): | |
| trans_share_btn = gr.Button("Share", variant="secondary") | |
| trans_share_link = gr.Textbox(label="", interactive=False, show_copy_button=True, show_label=False) | |
| with gr.Accordion("Example Prompts", open=False): | |
| gr.Examples(examples=EXAMPLES_TRANSFORM, inputs=[trans_prompt, trans_style, trans_strength, trans_steps, trans_seed, trans_randomize]) | |
| trans_btn.click( | |
| fn=transform_with_polish, | |
| inputs=[trans_input, trans_prompt, trans_style, trans_polish, trans_strength, trans_steps, trans_seed, trans_randomize], | |
| outputs=[trans_output, trans_polished_prompt, trans_seed_out] | |
| ) | |
| trans_prompt.submit( | |
| fn=transform_with_polish, | |
| inputs=[trans_input, trans_prompt, trans_style, trans_polish, trans_strength, trans_steps, trans_seed, trans_randomize], | |
| outputs=[trans_output, trans_polished_prompt, trans_seed_out] | |
| ) | |
| trans_share_btn.click(fn=upload_to_hf_cdn, inputs=[trans_output], outputs=[trans_share_link]) | |
| # Cross-tab handler | |
| ai_send_btn.click( | |
| fn=lambda prompt, img: (prompt, img), | |
| inputs=[ai_generated_prompt, ai_image], | |
| outputs=[trans_prompt, trans_input] | |
| ) | |
| # Language selector - update all UI labels when language changes | |
| lang_selector.change( | |
| fn=change_language, | |
| inputs=[lang_selector], | |
| outputs=[ | |
| # Generate tab (12 components) | |
| gen_prompt, gen_polish, gen_style, gen_ratio, gen_steps, gen_seed, | |
| gen_randomize, gen_btn, gen_output, gen_polished_prompt, gen_seed_out, gen_share_btn, | |
| # AI Assistant tab (10 components) | |
| ai_desc_md, ai_image, ai_analyze_btn, ai_description, ai_request, ai_style, | |
| ai_generate_btn, ai_generated_prompt, ai_send_btn, ai_howto_md, | |
| # Transform tab (14 components) | |
| trans_desc_md, trans_input, trans_prompt, trans_polish, trans_style, trans_strength, | |
| trans_steps, trans_seed, trans_randomize, trans_btn, trans_output, trans_polished_prompt, | |
| trans_seed_out, trans_share_btn, | |
| ] | |
| ) | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; width: 100%; font-size: 0.9rem; padding: 1rem; margin-top: 1.5rem; background: #141416; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; color: #71717a;"> | |
| <div style="margin-bottom: 8px;"> | |
| <strong style="color: #a1a1aa;">Image Generation:</strong> | |
| <a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank" style="color: #818cf8; font-weight: 500;">Z-Image-Turbo</a> | |
| <span style="color: #52525b;">(Tongyi-MAI)</span> | |
| </div> | |
| <div style="margin-bottom: 8px;"> | |
| <strong style="color: #a1a1aa;">Vision AI:</strong> | |
| <a href="https://huggingface.co/zai-org/GLM-4.6V" target="_blank" style="color: #818cf8; font-weight: 500;">GLM-4.6V</a> | |
| <span style="color: #52525b;">(Z.AI / Zhipu)</span> | | |
| <strong style="color: #a1a1aa;">Prompt+:</strong> | |
| <a href="https://deepseek.com" target="_blank" style="color: #818cf8; font-weight: 500;">DeepSeek Reasoner</a> | |
| </div> | |
| <div> | |
| <strong style="color: #a1a1aa;">Built by</strong> | |
| <a href="https://huggingface.co/lulavc" target="_blank" style="color: #a78bfa; font-weight: 600;">@lulavc</a> | | |
| <a href="https://huggingface.co/spaces/lulavc/Z-Image-Turbo" target="_blank" style="color: #6366f1; font-weight: 500;">MCP Server Enabled</a> | |
| </div> | |
| </div> | |
| """, | |
| elem_classes="footer-no-box" | |
| ) | |
| # MCP API Endpoints - Hidden components for direct API access | |
| with gr.Row(visible=False): | |
| mcp_prompt_in = gr.Textbox() | |
| mcp_style_in = gr.Dropdown(choices=STYLES, value="None") | |
| mcp_ratio_in = gr.Dropdown(choices=RATIOS, value="1:1 Square (1024x1024)") | |
| mcp_steps_in = gr.Slider(minimum=4, maximum=16, value=9) | |
| mcp_seed_in = gr.Number(value=42) | |
| mcp_random_in = gr.Checkbox(value=True) | |
| mcp_image_out = gr.Image(type="pil", format="png") | |
| mcp_seed_out = gr.Number() | |
| mcp_gen_btn = gr.Button() | |
| mcp_gen_btn.click( | |
| fn=mcp_generate, | |
| inputs=[mcp_prompt_in, mcp_style_in, mcp_ratio_in, mcp_steps_in, mcp_seed_in, mcp_random_in], | |
| outputs=[mcp_image_out, mcp_seed_out], | |
| api_name="mcp_generate" | |
| ) | |
| demo.launch(mcp_server=True) | |