fyaronskiy
/

english_code_retriever

@@ -1,9 +1,16 @@
 ---
 tags:
 - sentence-transformers
 - sentence-similarity
 - feature-extraction
-- dense
 - generated_from_trainer
 - loss:MultipleNegativesRankingLoss
 widget:
@@ -822,7 +829,7 @@ Perfomance per task:
 |-------|---------------|----------------------------|----------------|----------------|--------------------------|------------------------|------------------|-------|------------------|-------------------|
 | english_code_retriever | 8.04 | 74.23 | 44.01 | 57.79 | 42.71 | 60.68 | 35.16 | 25.56 | 56.53 | 42.79 |
-more information you cand find [here](https://huggingface.co/spaces/mteb/leaderboard)
 ## Model Details
@@ -832,7 +839,7 @@ more information you cand find [here](https://huggingface.co/spaces/mteb/leaderb
 - **Maximum Sequence Length:** 8192 tokens
 - **Output Dimensionality:** 768
 - **Similarity Function:** Cosine Similarity
-- **Poling** Mean pooling
 ## Usage
@@ -904,23 +911,23 @@ query_embeddings = model.encode(queries, prompt_name='search_document', convert_
 for i, query in enumerate(queries):
     scores = util.cos_sim(query_embeddings[i], doc_embeddings)[0]
     best_idx = torch.argmax(scores).item()
-    print(f"\n🔍 Query {i+1}: {query}")
     print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
-'''🔍 Query 1: Write a Python function that calculates the factorial of a number recursively.
 Top-1 match (score=0.5983):
 def factorial(n):
     if n == 0:
         return 1
     return n * factorial(n-1)
-🔍 Query 2: How to check if a given string reads the same backward and forward?
 Top-1 match (score=0.4925):
 def is_palindrome(s: str) -> bool:
     s = s.lower().replace(" ", "")
     return s == s[::-1]
-🔍 Query 3: Combine two sorted lists into a single sorted list.
 Top-1 match (score=0.6524):
 def merge_sorted_lists(a, b):
     result = []
@@ -940,13 +947,11 @@ def merge_sorted_lists(a, b):
 Using with Transformers
 ```python
-# using with transformers
 import torch
 from transformers import AutoTokenizer, AutoModel
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load model and tokenizer from Hugging Face
 model_name = "fyaronskiy/english_code_retriever"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name).to(device)
@@ -994,18 +999,18 @@ query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
 for i, query in enumerate(queries):
     scores = torch.matmul(query_embeddings[i], doc_embeddings.T)
     best_idx = torch.argmax(scores).item()
-    print(f"\n🔍 Query {i+1}: {query}")
     print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
-'''🔍 Query 1: function of addition of two numbers
 Top-1 match (score=0.6047):
 def add(a, b): return a + b
-🔍 Query 2: finding the maximum element in an array
 Top-1 match (score=0.7772):
 def find_max(arr): return max(arr)
-🔍 Query 3: sorting a list in ascending order
 Top-1 match (score=0.7389):
 def sort_list(lst): return sorted(lst)
 '''

 ---
 tags:
 - sentence-transformers
+- code
+- code-retrieval
+- retrieval-augmented-generation
+- rag
+- python
+- java
+- go
+- php
 - sentence-similarity
 - feature-extraction
 - generated_from_trainer
 - loss:MultipleNegativesRankingLoss
 widget:
 |-------|---------------|----------------------------|----------------|----------------|--------------------------|------------------------|------------------|-------|------------------|-------------------|
 | english_code_retriever | 8.04 | 74.23 | 44.01 | 57.79 | 42.71 | 60.68 | 35.16 | 25.56 | 56.53 | 42.79 |
+more information you cand find [in MTEB leaderbord](https://huggingface.co/spaces/mteb/leaderboard)
 ## Model Details
 - **Maximum Sequence Length:** 8192 tokens
 - **Output Dimensionality:** 768
 - **Similarity Function:** Cosine Similarity
+- Mean pooling
 ## Usage
 for i, query in enumerate(queries):
     scores = util.cos_sim(query_embeddings[i], doc_embeddings)[0]
     best_idx = torch.argmax(scores).item()
+    print(f"\n Query {i+1}: {query}")
     print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
+''' Query 1: Write a Python function that calculates the factorial of a number recursively.
 Top-1 match (score=0.5983):
 def factorial(n):
     if n == 0:
         return 1
     return n * factorial(n-1)
+ Query 2: How to check if a given string reads the same backward and forward?
 Top-1 match (score=0.4925):
 def is_palindrome(s: str) -> bool:
     s = s.lower().replace(" ", "")
     return s == s[::-1]
+ Query 3: Combine two sorted lists into a single sorted list.
 Top-1 match (score=0.6524):
 def merge_sorted_lists(a, b):
     result = []
 Using with Transformers
 ```python
 import torch
 from transformers import AutoTokenizer, AutoModel
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_name = "fyaronskiy/english_code_retriever"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name).to(device)
 for i, query in enumerate(queries):
     scores = torch.matmul(query_embeddings[i], doc_embeddings.T)
     best_idx = torch.argmax(scores).item()
+    print(f"\n Query {i+1}: {query}")
     print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
+''' Query 1: function of addition of two numbers
 Top-1 match (score=0.6047):
 def add(a, b): return a + b
+ Query 2: finding the maximum element in an array
 Top-1 match (score=0.7772):
 def find_max(arr): return max(arr)
+ Query 3: sorting a list in ascending order
 Top-1 match (score=0.7389):
 def sort_list(lst): return sorted(lst)
 '''