Spaces:

Gapeleon
/

Llama-3.1-Nemotron-Nano-VL-8B-V1-Demo

Runtime error

App Files Files Community

Gapeleon commited on Jun 4

Commit

9933c70

verified ·

1 Parent(s): 07c4f24

Create app.py

Browse files

Files changed (1) hide show

app.py +332 -0

app.py ADDED Viewed

	@@ -0,0 +1,332 @@

+import torch
+import gradio as gr
+from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
+from PIL import Image
+import gc
+import os
+import spaces
+# Model configuration
+MODEL_PATH = "nvidia/Llama-Nemotron-Nano-VL-8B-V1"
+# Load model globally
+print("Loading model...")
+model = AutoModel.from_pretrained(
+    MODEL_PATH,
+    torch_dtype=torch.bfloat16,
+    low_cpu_mem_usage=True,
+    trust_remote_code=True,
+).eval()
+tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
+image_processor = AutoImageProcessor.from_pretrained(
+    MODEL_PATH,
+    trust_remote_code=True
+)
+print("Model loaded successfully!")
+def move_to_device(obj, device):
+    """Recursively move tensors to device"""
+    if torch.is_tensor(obj):
+        return obj.to(device)
+    elif isinstance(obj, dict):
+        return {k: move_to_device(v, device) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [move_to_device(v, device) for v in obj]
+    elif isinstance(obj, tuple):
+        return tuple(move_to_device(v, device) for v in obj)
+    elif hasattr(obj, 'to'):
+        return obj.to(device)
+    else:
+        return obj
+@spaces.GPU(duration=60)
+def chat_text_only(message):
+    try:
+        device = "cuda"
+        # Move entire model to GPU
+        model.to(device)
+        generation_config = dict(
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.7,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        # Tokenize on CPU then move to GPU
+        inputs = tokenizer(message, return_tensors="pt")
+        inputs = move_to_device(inputs, device)
+        # Generate
+        with torch.no_grad():
+            response, _ = model.chat(
+                tokenizer,
+                None,
+                message,
+                generation_config,
+                history=None,
+                return_history=True
+            )
+        # Move model back to CPU
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return response
+    except Exception as e:
+        # Ensure model is back on CPU even if error occurs
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return f"Error: {str(e)}"
+@spaces.GPU(duration=60)
+def chat_with_image(image, message):
+    if image is None:
+        return "Please upload an image."
+    try:
+        device = "cuda"
+        # Move entire model to GPU
+        model.to(device)
+        generation_config = dict(
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.7,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        # Process image
+        image_features = image_processor(image)
+        # Move all image features to GPU
+        image_features = move_to_device(image_features, device)
+        # Add image token to message if not present
+        if "<image>" not in message:
+            message = f"<image>\n{message}"
+        # Generate
+        with torch.no_grad():
+            response = model.chat(
+                tokenizer=tokenizer,
+                question=message,
+                generation_config=generation_config,
+                **image_features
+            )
+        # Move model back to CPU
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return response
+    except Exception as e:
+        # Ensure model is back on CPU even if error occurs
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return f"Error: {str(e)}"
+@spaces.GPU(duration=60)
+def chat_with_two_images(image1, image2, message):
+    if image1 is None or image2 is None:
+        return "Please upload both images."
+    try:
+        device = "cuda"
+        # Move entire model to GPU
+        model.to(device)
+        generation_config = dict(
+            max_new_tokens=512,
+            do_sample=True,
+            temperature=0.7,
+            eos_token_id=tokenizer.eos_token_id
+        )
+        # Process both images
+        image_features = image_processor([image1, image2])
+        # Move all image features to GPU
+        image_features = move_to_device(image_features, device)
+        # Format message for two images
+        if "<image-1>" not in message and "<image-2>" not in message:
+            message = f"<image-1>: <image>\n<image-2>: <image>\n{message}"
+        # Generate
+        with torch.no_grad():
+            response = model.chat(
+                tokenizer=tokenizer,
+                question=message,
+                generation_config=generation_config,
+                **image_features
+            )
+        # Move model back to CPU
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return response
+    except Exception as e:
+        # Ensure model is back on CPU even if error occurs
+        model.to("cpu")
+        torch.cuda.empty_cache()
+        gc.collect()
+        return f"Error: {str(e)}"
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(title="Llama Nemotron Nano VL 8B", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🦙 Llama Nemotron Nano VL 8B Vision-Language Model")
+        gr.Markdown("Chat with a powerful vision-language model that can understand both text and images!")
+        with gr.Tabs():
+            # Text-only chat tab
+            with gr.TabItem("💬 Text Chat"):
+                gr.Markdown("### Chat with the model using text only")
+                with gr.Row():
+                    with gr.Column():
+                        text_input = gr.Textbox(
+                            label="Your message",
+                            placeholder="Ask me anything...",
+                            lines=3
+                        )
+                        text_submit = gr.Button("Send", variant="primary")
+                    with gr.Column():
+                        text_output = gr.Textbox(
+                            label="Model Response",
+                            lines=10,
+                            max_lines=20
+                        )
+                text_submit.click(
+                    chat_text_only,
+                    inputs=[text_input],
+                    outputs=[text_output]
+                )
+                # Example questions
+                gr.Examples(
+                    examples=[
+                        ["What is artificial intelligence?"],
+                        ["Explain quantum computing in simple terms."],
+                        ["What happened in 1969?"],
+                        ["Write a short story about a robot."]
+                    ],
+                    inputs=[text_input]
+                )
+            # Single image chat tab
+            with gr.TabItem("🖼️ Image + Text Chat"):
+                gr.Markdown("### Upload an image and ask questions about it")
+                with gr.Row():
+                    with gr.Column():
+                        image_input = gr.Image(
+                            label="Upload Image",
+                            type="pil"
+                        )
+                        image_text_input = gr.Textbox(
+                            label="Your question about the image",
+                            placeholder="What do you see in this image?",
+                            lines=3
+                        )
+                        image_submit = gr.Button("Analyze", variant="primary")
+                    with gr.Column():
+                        image_output = gr.Textbox(
+                            label="Model Response",
+                            lines=10,
+                            max_lines=20
+                        )
+                image_submit.click(
+                    chat_with_image,
+                    inputs=[image_input, image_text_input],
+                    outputs=[image_output]
+                )
+                # Example prompts
+                gr.Examples(
+                    examples=[
+                        ["Describe what you see in this image."],
+                        ["What objects are in this image?"],
+                        ["Extract any text from this image."],
+                        ["What is the main subject of this image?"]
+                    ],
+                    inputs=[image_text_input]
+                )
+            # Two images comparison tab
+            with gr.TabItem("🖼️🖼️ Compare Two Images"):
+                gr.Markdown("### Upload two images and ask the model to compare them")
+                with gr.Row():
+                    with gr.Column():
+                        image1_input = gr.Image(
+                            label="First Image",
+                            type="pil"
+                        )
+                        image2_input = gr.Image(
+                            label="Second Image",
+                            type="pil"
+                        )
+                        two_images_text_input = gr.Textbox(
+                            label="Your question about both images",
+                            placeholder="Compare these two images...",
+                            lines=3
+                        )
+                        two_images_submit = gr.Button("Compare", variant="primary")
+                    with gr.Column():
+                        two_images_output = gr.Textbox(
+                            label="Model Response",
+                            lines=10,
+                            max_lines=20
+                        )
+                two_images_submit.click(
+                    chat_with_two_images,
+                    inputs=[image1_input, image2_input, two_images_text_input],
+                    outputs=[two_images_output]
+                )
+                # Example prompts
+                gr.Examples(
+                    examples=[
+                        ["What are the main differences between these two images?"],
+                        ["Describe both images briefly."],
+                        ["Which image is more colorful?"],
+                        ["Compare the subjects in these images."]
+                    ],
+                    inputs=[two_images_text_input]
+                )
+        # Footer
+        gr.Markdown("---")
+        gr.Markdown("⚡ Powered by NVIDIA Llama Nemotron Nano VL 8B")
+    return demo
+# Create and launch the interface
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.queue()  # Enable queuing for Zero GPU
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860
+    )