Spaces:
Running
on
Zero
Running
on
Zero
| import numpy as np | |
| from .utils import process_attn, calc_attn_score | |
| class AttentionDetector(): | |
| def __init__(self, model, pos_examples=None, neg_examples=None, use_token="first", instruction="Say xxxxxx", threshold=0.5): | |
| self.name = "attention" | |
| self.attn_func = "normalize_sum" | |
| self.model = model | |
| self.important_heads = model.important_heads | |
| self.instruction = instruction | |
| self.use_token = use_token | |
| self.threshold = threshold | |
| def attn2score(self, attention_maps, input_range): | |
| if self.use_token == "first": | |
| attention_maps = [attention_maps[0]] | |
| scores = [] | |
| for attention_map in attention_maps: | |
| heatmap = process_attn( | |
| attention_map, input_range, self.attn_func) | |
| score = calc_attn_score(heatmap, self.important_heads) | |
| scores.append(score) | |
| return sum(scores) if len(scores) > 0 else 0 | |
| def detect(self, data_prompt): | |
| _, _, attention_maps, _, input_range, _ = self.model.inference( | |
| self.instruction, data_prompt, max_output_tokens=1) | |
| focus_score = self.attn2score(attention_maps, input_range) | |
| return bool(focus_score <= self.threshold), {"focus_score": focus_score} | |