chhatramani/nepali-supremecourt-judgement-text-corpus-v1
Viewer • Updated • 70.9k • 4
Configuration Parsing Warning:Invalid JSON for config file config.json
This model is a pretrained on Nepal Supreme Court judgments dataset from Scratch.
-Model: build from scratch
Model_CONFIG = {
"vocab_size": tokenizer.vocab_size, # Update vocab size to match tokenizer (256000)
"context_length": 2048, # Reduced context length for T4 GPU memory constraints
"emb_dim": 640,
"n_heads": 4,
"n_layers": 18,
"hidden_dim": 2048,
"head_dim": 256,
"qk_norm": True,
"n_kv_groups": 1,
"rope_local_base": 10_000.0,
"rope_base": 1_000_000.0,
"sliding_window": 512,
"layer_types": [
"sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention",
"sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention",
"sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention"
],
"dtype": torch.bfloat16,
"query_pre_attn_scalar": 256,
}
from transformers import AutoTokenizer
import torch
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-270m-it")
# Load model (you need to implement the architecture or use the provided code)
# See the original implementation for model architecture
# Generate text
prompt = "सर्वोच्च अदालतको निर्णय अनुसार"
inputs = tokenizer(prompt, return_tensors="pt")
# ... generation code ...