Starling

Paused

Tonic commited on Nov 28, 2023

Commit

0cea479

1 Parent(s): b23582b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 model_name = "berkeley-nest/Starling-LM-7B-alpha"
-title = "👋🏻Welcome to Tonic's 💫🌠Starling 7B"
-description = "You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."
 examples = [
     [
         "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:",  # user_message
@@ -26,6 +26,8 @@ import accelerate
 import bitsandbytes
 # device = "cuda" if torch.cuda.is_available() else "cpu"
 temperature=0.4
 max_new_tokens=240
 top_p=0.92
@@ -45,7 +47,7 @@ class StarlingBot:
     def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         try:
             conversation = f" <s> [INST] {self.system_prompt} [INST]  {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
-            input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
             input_ids = input_ids.to(device)
             response = model.generate(
                 input_ids=input_ids,

 model_name = "berkeley-nest/Starling-LM-7B-alpha"
+title = """👋🏻Welcome to Tonic's 💫🌠Starling 7B"""
+description = """You can use [💫🌠Starling 7B](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha) or duplicate it for local use or on Hugging Face! [Join me on Discord to build together](https://discord.gg/VqTxc76K3u)."""
 examples = [
     [
         "The following dialogue is a conversation between Emmanuel Macron and Elon Musk:",  # user_message
 import bitsandbytes
 # device = "cuda" if torch.cuda.is_available() else "cpu"
+"bos_token_id": 1,
+"eos_token_id": 32000,
 temperature=0.4
 max_new_tokens=240
 top_p=0.92
     def predict(self, user_message, assistant_message, system_prompt, do_sample, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         try:
             conversation = f" <s> [INST] {self.system_prompt} [INST]  {assistant_message if assistant_message else ''} </s> [/INST]  {user_message}  </s> "
+            input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=True)
             input_ids = input_ids.to(device)
             response = model.generate(
                 input_ids=input_ids,