Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
model_name = "berkeley-nest/Starling-LM-7B-alpha"
|
| 2 |
|
| 3 |
-
title = "👋🏻Welcome to Tonic's 💫🌠Starling 7B"
|
| 4 |
-
description = "You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
|
| 5 |
examples = [
|
| 6 |
[
|
| 7 |
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
|
@@ -26,6 +26,8 @@ import accelerate
|
|
| 26 |
import bitsandbytes
|
| 27 |
|
| 28 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
| 29 |
temperature=0.4
|
| 30 |
max_new_tokens=240
|
| 31 |
top_p=0.92
|
|
@@ -45,7 +47,7 @@ class StarlingBot:
|
|
| 45 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
| 46 |
try:
|
| 47 |
conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
|
| 48 |
-
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=
|
| 49 |
input_ids = input_ids.to(device)
|
| 50 |
response = model.generate(
|
| 51 |
input_ids=input_ids,
|
|
|
|
| 1 |
model_name = "berkeley-nest/Starling-LM-7B-alpha"
|
| 2 |
|
| 3 |
+
title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
|
| 4 |
+
description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
|
| 5 |
examples = [
|
| 6 |
[
|
| 7 |
"The following dialogue is a conversation between Emmanuel Macron and Elon Musk:", # user_message
|
|
|
|
| 26 |
import bitsandbytes
|
| 27 |
|
| 28 |
# device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 29 |
+
"bos_token_id": 1,
|
| 30 |
+
"eos_token_id": 32000,
|
| 31 |
temperature=0.4
|
| 32 |
max_new_tokens=240
|
| 33 |
top_p=0.92
|
|
|
|
| 47 |
def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
|
| 48 |
try:
|
| 49 |
conversation = f" <s> [INST] {self.system_prompt} [INST] {assistant_message if assistant_message else ''} </s> [/INST] {user_message} </s> "
|
| 50 |
+
input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
|
| 51 |
input_ids = input_ids.to(device)
|
| 52 |
response = model.generate(
|
| 53 |
input_ids=input_ids,
|