Update README.md
Browse files
README.md
CHANGED
|
@@ -1,9 +1,16 @@
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- sentence-transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
- sentence-similarity
|
| 5 |
- feature-extraction
|
| 6 |
-
- dense
|
| 7 |
- generated_from_trainer
|
| 8 |
- loss:MultipleNegativesRankingLoss
|
| 9 |
widget:
|
|
@@ -822,7 +829,7 @@ Perfomance per task:
|
|
| 822 |
|-------|---------------|----------------------------|----------------|----------------|--------------------------|------------------------|------------------|-------|------------------|-------------------|
|
| 823 |
| english_code_retriever | 8.04 | 74.23 | 44.01 | 57.79 | 42.71 | 60.68 | 35.16 | 25.56 | 56.53 | 42.79 |
|
| 824 |
|
| 825 |
-
more information you cand find [
|
| 826 |
|
| 827 |
|
| 828 |
## Model Details
|
|
@@ -832,7 +839,7 @@ more information you cand find [here](https://huggingface.co/spaces/mteb/leaderb
|
|
| 832 |
- **Maximum Sequence Length:** 8192 tokens
|
| 833 |
- **Output Dimensionality:** 768
|
| 834 |
- **Similarity Function:** Cosine Similarity
|
| 835 |
-
-
|
| 836 |
|
| 837 |
## Usage
|
| 838 |
|
|
@@ -904,23 +911,23 @@ query_embeddings = model.encode(queries, prompt_name='search_document', convert_
|
|
| 904 |
for i, query in enumerate(queries):
|
| 905 |
scores = util.cos_sim(query_embeddings[i], doc_embeddings)[0]
|
| 906 |
best_idx = torch.argmax(scores).item()
|
| 907 |
-
print(f"\n
|
| 908 |
print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
|
| 909 |
|
| 910 |
-
'''
|
| 911 |
Top-1 match (score=0.5983):
|
| 912 |
def factorial(n):
|
| 913 |
if n == 0:
|
| 914 |
return 1
|
| 915 |
return n * factorial(n-1)
|
| 916 |
|
| 917 |
-
|
| 918 |
Top-1 match (score=0.4925):
|
| 919 |
def is_palindrome(s: str) -> bool:
|
| 920 |
s = s.lower().replace(" ", "")
|
| 921 |
return s == s[::-1]
|
| 922 |
|
| 923 |
-
|
| 924 |
Top-1 match (score=0.6524):
|
| 925 |
def merge_sorted_lists(a, b):
|
| 926 |
result = []
|
|
@@ -940,13 +947,11 @@ def merge_sorted_lists(a, b):
|
|
| 940 |
|
| 941 |
Using with Transformers
|
| 942 |
```python
|
| 943 |
-
# using with transformers
|
| 944 |
import torch
|
| 945 |
from transformers import AutoTokenizer, AutoModel
|
| 946 |
|
| 947 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 948 |
|
| 949 |
-
# Load model and tokenizer from Hugging Face
|
| 950 |
model_name = "fyaronskiy/english_code_retriever"
|
| 951 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 952 |
model = AutoModel.from_pretrained(model_name).to(device)
|
|
@@ -994,18 +999,18 @@ query_embeddings = torch.nn.functional.normalize(query_embeddings, p=2, dim=1)
|
|
| 994 |
for i, query in enumerate(queries):
|
| 995 |
scores = torch.matmul(query_embeddings[i], doc_embeddings.T)
|
| 996 |
best_idx = torch.argmax(scores).item()
|
| 997 |
-
print(f"\n
|
| 998 |
print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
|
| 999 |
|
| 1000 |
-
'''
|
| 1001 |
Top-1 match (score=0.6047):
|
| 1002 |
def add(a, b): return a + b
|
| 1003 |
|
| 1004 |
-
|
| 1005 |
Top-1 match (score=0.7772):
|
| 1006 |
def find_max(arr): return max(arr)
|
| 1007 |
|
| 1008 |
-
|
| 1009 |
Top-1 match (score=0.7389):
|
| 1010 |
def sort_list(lst): return sorted(lst)
|
| 1011 |
'''
|
|
|
|
| 1 |
---
|
| 2 |
tags:
|
| 3 |
- sentence-transformers
|
| 4 |
+
- code
|
| 5 |
+
- code-retrieval
|
| 6 |
+
- retrieval-augmented-generation
|
| 7 |
+
- rag
|
| 8 |
+
- python
|
| 9 |
+
- java
|
| 10 |
+
- go
|
| 11 |
+
- php
|
| 12 |
- sentence-similarity
|
| 13 |
- feature-extraction
|
|
|
|
| 14 |
- generated_from_trainer
|
| 15 |
- loss:MultipleNegativesRankingLoss
|
| 16 |
widget:
|
|
|
|
| 829 |
|-------|---------------|----------------------------|----------------|----------------|--------------------------|------------------------|------------------|-------|------------------|-------------------|
|
| 830 |
| english_code_retriever | 8.04 | 74.23 | 44.01 | 57.79 | 42.71 | 60.68 | 35.16 | 25.56 | 56.53 | 42.79 |
|
| 831 |
|
| 832 |
+
more information you cand find [in MTEB leaderbord](https://huggingface.co/spaces/mteb/leaderboard)
|
| 833 |
|
| 834 |
|
| 835 |
## Model Details
|
|
|
|
| 839 |
- **Maximum Sequence Length:** 8192 tokens
|
| 840 |
- **Output Dimensionality:** 768
|
| 841 |
- **Similarity Function:** Cosine Similarity
|
| 842 |
+
- Mean pooling
|
| 843 |
|
| 844 |
## Usage
|
| 845 |
|
|
|
|
| 911 |
for i, query in enumerate(queries):
|
| 912 |
scores = util.cos_sim(query_embeddings[i], doc_embeddings)[0]
|
| 913 |
best_idx = torch.argmax(scores).item()
|
| 914 |
+
print(f"\n Query {i+1}: {query}")
|
| 915 |
print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
|
| 916 |
|
| 917 |
+
''' Query 1: Write a Python function that calculates the factorial of a number recursively.
|
| 918 |
Top-1 match (score=0.5983):
|
| 919 |
def factorial(n):
|
| 920 |
if n == 0:
|
| 921 |
return 1
|
| 922 |
return n * factorial(n-1)
|
| 923 |
|
| 924 |
+
Query 2: How to check if a given string reads the same backward and forward?
|
| 925 |
Top-1 match (score=0.4925):
|
| 926 |
def is_palindrome(s: str) -> bool:
|
| 927 |
s = s.lower().replace(" ", "")
|
| 928 |
return s == s[::-1]
|
| 929 |
|
| 930 |
+
Query 3: Combine two sorted lists into a single sorted list.
|
| 931 |
Top-1 match (score=0.6524):
|
| 932 |
def merge_sorted_lists(a, b):
|
| 933 |
result = []
|
|
|
|
| 947 |
|
| 948 |
Using with Transformers
|
| 949 |
```python
|
|
|
|
| 950 |
import torch
|
| 951 |
from transformers import AutoTokenizer, AutoModel
|
| 952 |
|
| 953 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 954 |
|
|
|
|
| 955 |
model_name = "fyaronskiy/english_code_retriever"
|
| 956 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 957 |
model = AutoModel.from_pretrained(model_name).to(device)
|
|
|
|
| 999 |
for i, query in enumerate(queries):
|
| 1000 |
scores = torch.matmul(query_embeddings[i], doc_embeddings.T)
|
| 1001 |
best_idx = torch.argmax(scores).item()
|
| 1002 |
+
print(f"\n Query {i+1}: {query}")
|
| 1003 |
print(f"Top-1 match (score={scores[best_idx]:.4f}):\n{corpus[best_idx]}")
|
| 1004 |
|
| 1005 |
+
''' Query 1: function of addition of two numbers
|
| 1006 |
Top-1 match (score=0.6047):
|
| 1007 |
def add(a, b): return a + b
|
| 1008 |
|
| 1009 |
+
Query 2: finding the maximum element in an array
|
| 1010 |
Top-1 match (score=0.7772):
|
| 1011 |
def find_max(arr): return max(arr)
|
| 1012 |
|
| 1013 |
+
Query 3: sorting a list in ascending order
|
| 1014 |
Top-1 match (score=0.7389):
|
| 1015 |
def sort_list(lst): return sorted(lst)
|
| 1016 |
'''
|