Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| from flask import Flask, render_template, request, jsonify | |
| from nltk.corpus import stopwords | |
| import os | |
| stop = stopwords.words('english') | |
| def text_preprocessing(text): | |
| # make all words with lower letters | |
| text = text.lower() | |
| # getting rid of any punctution | |
| # text = text.replace('http\S+|www.\S+|@|%|:|,|', '', case=False) | |
| # spliting each sentence to words to apply previous funtions on them | |
| word_tokens = text.split(' ') | |
| keywords = [item for item in word_tokens if item not in stop] | |
| # assemble words of each sentence again and assign them in new column | |
| return ' '.join(keywords) | |
| def concat_content(title, value): | |
| return f"{title}: {value}" | |
| def df_to_text(df): | |
| text = [] | |
| titles = ["Product ID", "Product Name", "Gender", "Price (INR)", "Primary Color"] | |
| cols = ["ProductID", "ProductName", "Gender", "Price (INR)", "PrimaryColor"] | |
| for idx in range(df.shape[0]): | |
| for title, col in zip(titles, cols): | |
| text.append(concat_content(title, df[col].iloc[idx])) | |
| text.append('-------------------------------') | |
| return '<br>'.join(text) | |
| df = pd.read_csv("data/dataset.csv").reset_index(drop=True) | |
| embedding_df = pd.read_csv("data/embedding.csv", header=None) | |
| docs = embedding_df.values | |
| HF_TOKEN=os.environ.get("HF_TOKEN") | |
| model = SentenceTransformer("bert-base-nli-mean-tokens", cache_folder = "/code/", use_auth_token=HF_TOKEN) | |
| app = Flask(__name__) | |
| def index(): | |
| return render_template("chat.html") | |
| def chat(): | |
| data = request.get_json() | |
| msg = data.get("msg") | |
| try: | |
| output_df = get_chat_response(msg) | |
| output_text = df_to_text(output_df) | |
| return jsonify({"response": True, "message": output_text}) | |
| except Exception as e: | |
| print(e) | |
| error_message = f'Error: {str(e)}' | |
| return jsonify({"message": error_message, "response": False}) | |
| def get_chat_response(text): | |
| query_vector = model.encode(text_preprocessing(text)).astype(float) | |
| results = util.pytorch_cos_sim(query_vector, docs) | |
| top_n = 3 | |
| sort_idx = results.argsort(descending=True, axis=1)[0][:top_n] | |
| return df.iloc[sort_idx] | |