Spaces:

HopeLiang
/

hugging-face-project

Runtime error

Hope-Liang

update

f53d6a1 almost 3 years ago

4.66 kB

	import streamlit as st
	import pandas as pd
	from sodapy import Socrata
	import hopsworks
	import joblib
	import xgboost as xgb

	def unencode_weekday(fri, mon, sat, sun, thu, tue, wed):
	if fri==1.0:
	return "Friday"
	elif mon==1.0:
	return "Monday"
	elif sat==1.0:
	return "Saturday"
	elif sun==1.0:
	return "Sunday"
	elif thu==1.0:
	return "Thursday"
	elif tue==1.0:
	return "Tuesday"
	elif wed==1.0:
	return "Wednesday"
	else:
	return "Invalid Weekday"

	def unencode_report_type_code(ii, iss, vi, vs):
	if ii==1.0:
	return "II"
	elif iss==1.0:
	return "IS"
	elif vi==1.0:
	return "VI"
	elif vs==1.0:
	return "VS"
	else:
	return "Invalid Report Type Code"

	def unencode_police_district(bay, cen, ing, mis, nor, out, par, ric, sou, tar, ten):
	if bay==1.0:
	return "Bayview"
	elif cen==1.0:
	return "Central"
	elif ing==1.0:
	return "Ingleside"
	elif mis==1.0:
	return "Mission"
	elif nor==1.0:
	return "Northern"
	elif out==1.0:
	return "OutOfSF"
	elif par==1.0:
	return "Park"
	elif ric==1.0:
	return "Richmond"
	elif sou==1.0:
	return "Southern"
	elif tar==1.0:
	return "Taraval"
	elif ten==1.0:
	return "Tenderloin"
	else:
	return "Invalid Police District"


	st.set_page_config(layout="wide")
	st.title('Latest SF Incident Category Prediction')

	client = Socrata("data.sfgov.org", "gZmg4iarmENBTk1Vzsb94bnse", username="[email protected]", password="Xw990504")
	results = client.get("wg3w-h783", limit=800000)
	results_df = pd.DataFrame.from_records(results)

	from preprocessor_pipeline import preprocessing_incident
	results_df_preprocessed = preprocessing_incident(results_df)
	results_df_preprocessed.incident_datetime=pd.to_datetime(results_df_preprocessed.incident_datetime)
	results_df_preprocessed.sort_values(by='incident_datetime', ascending = False, inplace = True)
	results_df_preprocessed = results_df_preprocessed[:100]

	project = hopsworks.login()
	fs = project.get_feature_store()
	mr = project.get_model_registry()
	model = mr.get_model("incident_modal", version=1)
	model_dir = model.download()
	model = joblib.load(model_dir + "/incident_model.pkl")

	batch_data = results_df_preprocessed.copy()
	batch_data.drop(columns=['incident_datetime','incident_category'], inplace=True)
	y_pred = model.predict(batch_data)

	results_df_preprocessed["incident_day_of_week"]=results_df_preprocessed.apply(lambda x:unencode_weekday(x.incident_day_of_week_Friday,x.incident_day_of_week_Monday,x.incident_day_of_week_Saturday,x.incident_day_of_week_Sunday,x.incident_day_of_week_Thursday,x.incident_day_of_week_Tuesday,x.incident_day_of_week_Wednesday),axis=1)
	results_df_preprocessed.drop(columns=["incident_day_of_week_Friday","incident_day_of_week_Monday","incident_day_of_week_Saturday","incident_day_of_week_Sunday","incident_day_of_week_Thursday","incident_day_of_week_Tuesday","incident_day_of_week_Wednesday"],inplace=True)
	results_df_preprocessed["report_type_code"]=results_df_preprocessed.apply(lambda x:unencode_report_type_code(x.report_type_code_II,x.report_type_code_IS,x.report_type_code_VI,x.report_type_code_VS),axis=1)
	results_df_preprocessed.drop(columns=["report_type_code_II","report_type_code_IS","report_type_code_VI","report_type_code_VS"],inplace=True)
	results_df_preprocessed["police_district"]=results_df_preprocessed.apply(lambda x:unencode_police_district(x.police_district_Bayview,x.police_district_Central,x.police_district_Ingleside,x.police_district_Mission,x.police_district_Northern,x.police_district_OutOfSF,x.police_district_Park,x.police_district_Richmond,x.police_district_Southern,x.police_district_Taraval,x.police_district_Tenderloin),axis=1)
	results_df_preprocessed.drop(columns=["police_district_Bayview","police_district_Central","police_district_Ingleside","police_district_Mission","police_district_Northern","police_district_OutOfSF","police_district_Park","police_district_Richmond","police_district_Southern","police_district_Taraval","police_district_Tenderloin"],inplace=True)
	results_df_preprocessed.reset_index(inplace=True)
	results_df_preprocessed["idx_temp"]=results_df_preprocessed.index
	results_df_preprocessed["incident_category_pred"]=y_pred[results_df_preprocessed.idx_temp]
	results_df_preprocessed.drop(columns=["idx_temp"],inplace=True)

	df = results_df_preprocessed[['incident_datetime', 'latitude', 'longitude', 'incident_day_of_week', 'report_type_code', 'police_district', 'incident_category', 'incident_category_pred']]

	st.write(df)
	st.button("Re-run")