hazyresearch
/

Weaver_Distilled_All_Datasets_gte-Qwen2-1.5B-instruct

@@ -1,119 +1,53 @@
 ```
-import torch
-import torch.nn as nn
-from transformers import AutoTokenizer, AutoModel
-from huggingface_hub import hf_hub_download
-# Define the MLPHead class (same as in training)
-class MLPHead(nn.Module):
-    def __init__(self, input_dim: int, hidden_dims: list, dropout_rate: float = 0.1):
-        super().__init__()
-        layers = []
-        prev_dim = input_dim
-        for hidden_dim in hidden_dims:
-            layers.extend([
-                nn.Linear(prev_dim, hidden_dim),
-                nn.GELU(),
-                nn.Dropout(dropout_rate)
-            ])
-            prev_dim = hidden_dim
-        layers.append(nn.Linear(prev_dim, 1))
-        self.mlp = nn.Sequential(*layers)
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        return self.mlp(x)
-# Define the CustomCrossEncoder class
-class CustomCrossEncoder(nn.Module):
-    def __init__(self, model_name="Alibaba-NLP/gte-Qwen2-1.5B-instruct"):
-        super().__init__()
-        # Load base model and tokenizer
-        self.base_model = AutoModel.from_pretrained(
-            model_name,
-            trust_remote_code=True,
-            torch_dtype=torch.bfloat16,
-            device_map="auto"
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-        # Initialize MLP head with same architecture as training
-        self.embedding_dim = 1536  # For Qwen2-1.5B
-        self.mlp_head = MLPHead(
-            input_dim=self.embedding_dim,
-            hidden_dims=[1536, 768, 384],  # Same as training
-            dropout_rate=0.1
-        ).to(torch.bfloat16)
-    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
-        outputs = self.base_model(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            output_hidden_states=True
-        )
-        last_hidden_state = outputs.hidden_states[-1]
-        cls_embedding = last_hidden_state[:, 0, :]
-        output = self.mlp_head(cls_embedding)
-        return output.squeeze(-1)
-# Load the model
-def load_cross_encoder(repo_id="hazyresearch/Weaver_Distilled_General_gte-Qwen2-1.5B-instruct"):
-    # Initialize model
-    model = CustomCrossEncoder()
-    # Download checkpoint from HF hub
-    checkpoint_path = hf_hub_download(
-        repo_id=repo_id,
-        filename="pytorch_model.pt",
-        use_auth_token=True  # Will use your HF token
-    )
-    # Load state dict
-    state_dict = torch.load(checkpoint_path, map_location="cuda")
-    model.load_state_dict(state_dict)
-    model.eval()
-    return model
-# Example usage
-def score_text_pair(model, text1, text2, max_length=8192):
-    """Score a pair of texts using the cross-encoder."""
-    # Tokenize
-    encoded = model.tokenizer(
-        text=text1,
-        text_pair=text2,
-        truncation=True,
-        max_length=max_length,
-        padding="max_length",
-        return_tensors="pt"
-    )
-    # Move to device
-    input_ids = encoded["input_ids"].cuda()
-    attention_mask = encoded["attention_mask"].cuda()
-    # Get score
-    with torch.no_grad():
-        score = model(input_ids, attention_mask)
-    return score.item()
-# Usage example
-if __name__ == "__main__":
-    # Load model
-    model = load_cross_encoder()
-    # Example: Score an instruction-response pair
-    instruction = "What is the capital of France?"
-    response = "The capital of France is Paris."
-    score = score_text_pair(model, instruction, response)
-    print(f"Score: {score:.4f}")
-```

+# Weaver Distilled - All Datasets (gte-Qwen2-1.5B-instruct)
+This is a distilled cross-encoder model based on gte-Qwen2-1.5B-instruct, trained to predict the correctness of answers across multiple domains. This general-purpose verifier was trained on a combined dataset of 35 different verifiers and reward models aggregated using Weaver.
+## Model Details
+- **Base Model**: [Alibaba-NLP/gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct)
+- **Architecture**: Cross-encoder with MLP head (1536 → 768 → 384 → 1)
+- **Max Sequence Length**: 4096
+- **Training Data**: Combined dataset from 35 different LM Judges and reward models aggregated with Weaver
+- **Training Objective**: Binary classification (correct/incorrect answer prediction)
+## Usage
+```python
+from custom_crossencoder import CustomCrossEncoder, TrainingConfig
+# Initialize model
+config = TrainingConfig(
+    model_name="Alibaba-NLP/gte-Qwen2-1.5B-instruct",
+    max_length=4096,
+    mlp_hidden_dims=[1536, 768, 384]
+)
+model = CustomCrossEncoder(config)
+# Load checkpoint
+model.load_state_dict(torch.load("path_to_checkpoint.pt"))
+model.eval()
+# Get prediction
+instruction = "Your instruction here"
+answer = "Your answer here"
+encoded = model.tokenizer(
+    text=instruction,
+    text_pair=answer,
+    truncation=True,
+    max_length=4096,
+    padding="max_length",
+    return_tensors="pt"
+)
+with torch.no_grad():
+    prediction = model(encoded["input_ids"], encoded["attention_mask"])
 ```
+## License
+[Your chosen license]
+## Citation
+If you use this model in your research, please cite:
+TODO