π TerraSense-Base
A Multimodal Large Language Model for Remote Sensing.
π Documentation
For usage instructions, examples, and detailed documentation, please visit:
π GitHub Repository
π Quick Start
from transformers import AutoModelForVision2Seq, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch
model = AutoModelForVision2Seq.from_pretrained(
"TerraSense-CASM/TerraSense-Base",
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True
)
processor = AutoProcessor.from_pretrained("TerraSense-CASM/TerraSense-Base", trust_remote_code=True)
messages = [{"role": "user", "content": [
{"type": "image", "image": "path/to/image.jpg"},
{"type": "text", "text": "Describe this remote sensing image."},
]}]
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs, _ = process_vision_info(messages)
inputs = processor(text=[text], images=image_inputs, padding=True, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=512)
print(processor.batch_decode(output, skip_special_tokens=True)[0])
π License
- Downloads last month
- 8