Spaces:

stabilityai
/

stablelm-2-1_6b-zephyr

Runtime error

App Files Files Community

akhaliq HF Staff

radames commited on Apr 19, 2023

Commit

d14c800

1 Parent(s): e9f9901

fix queue (#6)

Browse files

- fix queue (9431ee6c6359396371b5551bbcb2e678cfa4b060)

Co-authored-by: Radamés Ajna <radames@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +37 -24

app.py CHANGED Viewed

@@ -4,11 +4,12 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Stopping
 import time
 import numpy as np
 from torch.nn import functional as F
-import os
-auth_key = os.environ["HF_ACCESS_TOKEN"]
 print(f"Starting to load the model to memory")
-m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-7b",use_auth_token=auth_key, torch_dtype=torch.float16).cuda()
-tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b",use_auth_token=auth_key)
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
@@ -30,8 +31,10 @@ class StopOnTokens(StoppingCriteria):
 def contrastive_generate(text, bad_text):
     with torch.no_grad():
-        tokens = tok(text, return_tensors="pt")['input_ids'].cuda()[:,:4096-1024]
-        bad_tokens = tok(bad_text, return_tensors="pt")['input_ids'].cuda()[:,:4096-1024]
         history = None
         bad_history = None
         curr_output = list()
@@ -39,7 +42,8 @@ def contrastive_generate(text, bad_text):
             out = m(tokens, past_key_values=history, use_cache=True)
             logits = out.logits
             history = out.past_key_values
-            bad_out = m(bad_tokens, past_key_values=bad_history, use_cache=True)
             bad_logits = bad_out.logits
             bad_history = bad_out.past_key_values
             probs = F.softmax(logits.float(), dim=-1)[0][-1].cpu()
@@ -60,39 +64,48 @@ def contrastive_generate(text, bad_text):
                 tokens.device)
         return tok.decode(curr_output)
 def generate(text, bad_text=None):
     stop = StopOnTokens()
-    result = generator(text, max_new_tokens=1024, num_return_sequences=1, num_beams=1, do_sample=True, temperature=1.0, top_p=0.95, top_k=1000, stopping_criteria=StoppingCriteriaList([stop]))
     return result[0]["generated_text"].replace(text, "")
 def user(user_message, history):
-    return "", history + [[user_message, ""]]
 def bot(history, curr_system_message):
-    messages = curr_system_message + "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]]) for item in history])
     output = generate(messages)
     history[-1][1] = output
     time.sleep(1)
-    return history
 with gr.Blocks() as demo:
-    num = gr.State(value=0)
     gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
     gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
-    chatbot = gr.Chatbot([])
-    clear = gr.Button("Clear Chat History")
-    system_msg = gr.Textbox(start_message, label="System Message", interactive=False,visible=False)
-    #system_msg = start_message
-    msg = gr.Textbox(label="Chat Message Box")
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
-        bot, [chatbot, system_msg], chatbot
-    )
-    clear.click(lambda: None, None, chatbot, queue=True)
 demo.queue(concurrency_count=5)
-demo.launch()

 import time
 import numpy as np
 from torch.nn import functional as F
+import os
+# auth_key = os.environ["HF_ACCESS_TOKEN"]
 print(f"Starting to load the model to memory")
+m = AutoModelForCausalLM.from_pretrained(
+    "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
+tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
 def contrastive_generate(text, bad_text):
     with torch.no_grad():
+        tokens = tok(text, return_tensors="pt")[
+            'input_ids'].cuda()[:, :4096-1024]
+        bad_tokens = tok(bad_text, return_tensors="pt")[
+            'input_ids'].cuda()[:, :4096-1024]
         history = None
         bad_history = None
         curr_output = list()
             out = m(tokens, past_key_values=history, use_cache=True)
             logits = out.logits
             history = out.past_key_values
+            bad_out = m(bad_tokens, past_key_values=bad_history,
+                        use_cache=True)
             bad_logits = bad_out.logits
             bad_history = bad_out.past_key_values
             probs = F.softmax(logits.float(), dim=-1)[0][-1].cpu()
                 tokens.device)
         return tok.decode(curr_output)
 def generate(text, bad_text=None):
     stop = StopOnTokens()
+    result = generator(text, max_new_tokens=1024, num_return_sequences=1, num_beams=1, do_sample=True,
+                       temperature=1.0, top_p=0.95, top_k=1000, stopping_criteria=StoppingCriteriaList([stop]))
     return result[0]["generated_text"].replace(text, "")
 def user(user_message, history):
+    history = history + [[user_message, ""]]
+    return "", history, history
 def bot(history, curr_system_message):
+    messages = curr_system_message + \
+        "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
+                for item in history])
     output = generate(messages)
     history[-1][1] = output
     time.sleep(1)
+    return history, history
 with gr.Blocks() as demo:
+    history = gr.State([])
     gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
     gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
+    chatbot = gr.Chatbot().style(height=500)
+    with gr.Row():
+        with gr.Column(scale=0.70):
+            msg = gr.Textbox(label="", placeholder="Chat Message Box")
+        with gr.Column(scale=0.30, min_width=0):
+          with gr.Row():
+              submit = gr.Button("Submit")
+              clear = gr.Button("Clear")
+    system_msg = gr.Textbox(
+        start_message, label="System Message", interactive=False, visible=False)
+    msg.submit(fn=user, inputs=[msg, history], outputs=[msg, chatbot, history], queue=False).then(
+        fn=bot, inputs=[chatbot, system_msg], outputs=[chatbot, history], queue=True)
+    submit.click(fn=user, inputs=[msg, history], outputs=[msg, chatbot, history], queue=False).then(
+        fn=bot, inputs=[chatbot, system_msg], outputs=[chatbot, history], queue=True)
+    clear.click(lambda: [None, []], None, [chatbot, history], queue=False)
 demo.queue(concurrency_count=5)
+demo.launch()