Spaces:
Runtime error
Runtime error
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModel, AutoImageProcessor | |
| from PIL import Image | |
| import gc | |
| import os | |
| import spaces | |
| # Model configuration | |
| MODEL_PATH = "nvidia/Llama-Nemotron-Nano-VL-8B-V1" | |
| # Load model globally | |
| print("Loading model...") | |
| model = AutoModel.from_pretrained( | |
| MODEL_PATH, | |
| torch_dtype=torch.bfloat16, | |
| low_cpu_mem_usage=True, | |
| trust_remote_code=True, | |
| ).eval() | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
| image_processor = AutoImageProcessor.from_pretrained( | |
| MODEL_PATH, | |
| trust_remote_code=True | |
| ) | |
| print("Model loaded successfully!") | |
| def move_to_device(obj, device): | |
| """Recursively move tensors to device""" | |
| if torch.is_tensor(obj): | |
| return obj.to(device) | |
| elif isinstance(obj, dict): | |
| return {k: move_to_device(v, device) for k, v in obj.items()} | |
| elif isinstance(obj, list): | |
| return [move_to_device(v, device) for v in obj] | |
| elif isinstance(obj, tuple): | |
| return tuple(move_to_device(v, device) for v in obj) | |
| elif hasattr(obj, 'to'): | |
| return obj.to(device) | |
| else: | |
| return obj | |
| def chat_text_only(message): | |
| try: | |
| device = "cuda" | |
| # Move entire model to GPU | |
| model.to(device) | |
| generation_config = dict( | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Tokenize on CPU then move to GPU | |
| inputs = tokenizer(message, return_tensors="pt") | |
| inputs = move_to_device(inputs, device) | |
| # Generate | |
| with torch.no_grad(): | |
| response, _ = model.chat( | |
| tokenizer, | |
| None, | |
| message, | |
| generation_config, | |
| history=None, | |
| return_history=True | |
| ) | |
| # Move model back to CPU | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return response | |
| except Exception as e: | |
| # Ensure model is back on CPU even if error occurs | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return f"Error: {str(e)}" | |
| def chat_with_image(image, message): | |
| if image is None: | |
| return "Please upload an image." | |
| try: | |
| device = "cuda" | |
| # Move entire model to GPU | |
| model.to(device) | |
| generation_config = dict( | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Process image | |
| image_features = image_processor(image) | |
| # Move all image features to GPU | |
| image_features = move_to_device(image_features, device) | |
| # Add image token to message if not present | |
| if "<image>" not in message: | |
| message = f"<image>\n{message}" | |
| # Generate | |
| with torch.no_grad(): | |
| response = model.chat( | |
| tokenizer=tokenizer, | |
| question=message, | |
| generation_config=generation_config, | |
| **image_features | |
| ) | |
| # Move model back to CPU | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return response | |
| except Exception as e: | |
| # Ensure model is back on CPU even if error occurs | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return f"Error: {str(e)}" | |
| def chat_with_two_images(image1, image2, message): | |
| if image1 is None or image2 is None: | |
| return "Please upload both images." | |
| try: | |
| device = "cuda" | |
| # Move entire model to GPU | |
| model.to(device) | |
| generation_config = dict( | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| eos_token_id=tokenizer.eos_token_id | |
| ) | |
| # Process both images | |
| image_features = image_processor([image1, image2]) | |
| # Move all image features to GPU | |
| image_features = move_to_device(image_features, device) | |
| # Format message for two images | |
| if "<image-1>" not in message and "<image-2>" not in message: | |
| message = f"<image-1>: <image>\n<image-2>: <image>\n{message}" | |
| # Generate | |
| with torch.no_grad(): | |
| response = model.chat( | |
| tokenizer=tokenizer, | |
| question=message, | |
| generation_config=generation_config, | |
| **image_features | |
| ) | |
| # Move model back to CPU | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return response | |
| except Exception as e: | |
| # Ensure model is back on CPU even if error occurs | |
| model.to("cpu") | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| return f"Error: {str(e)}" | |
| # Create Gradio interface | |
| def create_interface(): | |
| with gr.Blocks(title="Llama Nemotron Nano VL 8B", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🦙 Llama Nemotron Nano VL 8B Vision-Language Model") | |
| gr.Markdown("Chat with a powerful vision-language model that can understand both text and images!") | |
| with gr.Tabs(): | |
| # Text-only chat tab | |
| with gr.TabItem("💬 Text Chat"): | |
| gr.Markdown("### Chat with the model using text only") | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Your message", | |
| placeholder="Ask me anything...", | |
| lines=3 | |
| ) | |
| text_submit = gr.Button("Send", variant="primary") | |
| with gr.Column(): | |
| text_output = gr.Textbox( | |
| label="Model Response", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| text_submit.click( | |
| chat_text_only, | |
| inputs=[text_input], | |
| outputs=[text_output] | |
| ) | |
| # Example questions | |
| gr.Examples( | |
| examples=[ | |
| ["What is artificial intelligence?"], | |
| ["Explain quantum computing in simple terms."], | |
| ["What happened in 1969?"], | |
| ["Write a short story about a robot."] | |
| ], | |
| inputs=[text_input] | |
| ) | |
| # Single image chat tab | |
| with gr.TabItem("🖼️ Image + Text Chat"): | |
| gr.Markdown("### Upload an image and ask questions about it") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image_input = gr.Image( | |
| label="Upload Image", | |
| type="pil" | |
| ) | |
| image_text_input = gr.Textbox( | |
| label="Your question about the image", | |
| placeholder="What do you see in this image?", | |
| lines=3 | |
| ) | |
| image_submit = gr.Button("Analyze", variant="primary") | |
| with gr.Column(): | |
| image_output = gr.Textbox( | |
| label="Model Response", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| image_submit.click( | |
| chat_with_image, | |
| inputs=[image_input, image_text_input], | |
| outputs=[image_output] | |
| ) | |
| # Example prompts | |
| gr.Examples( | |
| examples=[ | |
| ["Describe what you see in this image."], | |
| ["What objects are in this image?"], | |
| ["Extract any text from this image."], | |
| ["What is the main subject of this image?"] | |
| ], | |
| inputs=[image_text_input] | |
| ) | |
| # Two images comparison tab | |
| with gr.TabItem("🖼️🖼️ Compare Two Images"): | |
| gr.Markdown("### Upload two images and ask the model to compare them") | |
| with gr.Row(): | |
| with gr.Column(): | |
| image1_input = gr.Image( | |
| label="First Image", | |
| type="pil" | |
| ) | |
| image2_input = gr.Image( | |
| label="Second Image", | |
| type="pil" | |
| ) | |
| two_images_text_input = gr.Textbox( | |
| label="Your question about both images", | |
| placeholder="Compare these two images...", | |
| lines=3 | |
| ) | |
| two_images_submit = gr.Button("Compare", variant="primary") | |
| with gr.Column(): | |
| two_images_output = gr.Textbox( | |
| label="Model Response", | |
| lines=10, | |
| max_lines=20 | |
| ) | |
| two_images_submit.click( | |
| chat_with_two_images, | |
| inputs=[image1_input, image2_input, two_images_text_input], | |
| outputs=[two_images_output] | |
| ) | |
| # Example prompts | |
| gr.Examples( | |
| examples=[ | |
| ["What are the main differences between these two images?"], | |
| ["Describe both images briefly."], | |
| ["Which image is more colorful?"], | |
| ["Compare the subjects in these images."] | |
| ], | |
| inputs=[two_images_text_input] | |
| ) | |
| # Footer | |
| gr.Markdown("---") | |
| gr.Markdown("⚡ Powered by NVIDIA Llama Nemotron Nano VL 8B") | |
| return demo | |
| # Create and launch the interface | |
| if __name__ == "__main__": | |
| demo = create_interface() | |
| demo.queue() # Enable queuing for Zero GPU | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| ssr_mode=False | |
| ) |