Hope-Liang
update
f53d6a1
import streamlit as st
import pandas as pd
from sodapy import Socrata
import hopsworks
import joblib
import xgboost as xgb
def unencode_weekday(fri, mon, sat, sun, thu, tue, wed):
if fri==1.0:
return "Friday"
elif mon==1.0:
return "Monday"
elif sat==1.0:
return "Saturday"
elif sun==1.0:
return "Sunday"
elif thu==1.0:
return "Thursday"
elif tue==1.0:
return "Tuesday"
elif wed==1.0:
return "Wednesday"
else:
return "Invalid Weekday"
def unencode_report_type_code(ii, iss, vi, vs):
if ii==1.0:
return "II"
elif iss==1.0:
return "IS"
elif vi==1.0:
return "VI"
elif vs==1.0:
return "VS"
else:
return "Invalid Report Type Code"
def unencode_police_district(bay, cen, ing, mis, nor, out, par, ric, sou, tar, ten):
if bay==1.0:
return "Bayview"
elif cen==1.0:
return "Central"
elif ing==1.0:
return "Ingleside"
elif mis==1.0:
return "Mission"
elif nor==1.0:
return "Northern"
elif out==1.0:
return "OutOfSF"
elif par==1.0:
return "Park"
elif ric==1.0:
return "Richmond"
elif sou==1.0:
return "Southern"
elif tar==1.0:
return "Taraval"
elif ten==1.0:
return "Tenderloin"
else:
return "Invalid Police District"
st.set_page_config(layout="wide")
st.title('Latest SF Incident Category Prediction')
client = Socrata("data.sfgov.org", "gZmg4iarmENBTk1Vzsb94bnse", username="[email protected]", password="Xw990504")
results = client.get("wg3w-h783", limit=800000)
results_df = pd.DataFrame.from_records(results)
from preprocessor_pipeline import preprocessing_incident
results_df_preprocessed = preprocessing_incident(results_df)
results_df_preprocessed.incident_datetime=pd.to_datetime(results_df_preprocessed.incident_datetime)
results_df_preprocessed.sort_values(by='incident_datetime', ascending = False, inplace = True)
results_df_preprocessed = results_df_preprocessed[:100]
project = hopsworks.login()
fs = project.get_feature_store()
mr = project.get_model_registry()
model = mr.get_model("incident_modal", version=1)
model_dir = model.download()
model = joblib.load(model_dir + "/incident_model.pkl")
batch_data = results_df_preprocessed.copy()
batch_data.drop(columns=['incident_datetime','incident_category'], inplace=True)
y_pred = model.predict(batch_data)
results_df_preprocessed["incident_day_of_week"]=results_df_preprocessed.apply(lambda x:unencode_weekday(x.incident_day_of_week_Friday,x.incident_day_of_week_Monday,x.incident_day_of_week_Saturday,x.incident_day_of_week_Sunday,x.incident_day_of_week_Thursday,x.incident_day_of_week_Tuesday,x.incident_day_of_week_Wednesday),axis=1)
results_df_preprocessed.drop(columns=["incident_day_of_week_Friday","incident_day_of_week_Monday","incident_day_of_week_Saturday","incident_day_of_week_Sunday","incident_day_of_week_Thursday","incident_day_of_week_Tuesday","incident_day_of_week_Wednesday"],inplace=True)
results_df_preprocessed["report_type_code"]=results_df_preprocessed.apply(lambda x:unencode_report_type_code(x.report_type_code_II,x.report_type_code_IS,x.report_type_code_VI,x.report_type_code_VS),axis=1)
results_df_preprocessed.drop(columns=["report_type_code_II","report_type_code_IS","report_type_code_VI","report_type_code_VS"],inplace=True)
results_df_preprocessed["police_district"]=results_df_preprocessed.apply(lambda x:unencode_police_district(x.police_district_Bayview,x.police_district_Central,x.police_district_Ingleside,x.police_district_Mission,x.police_district_Northern,x.police_district_OutOfSF,x.police_district_Park,x.police_district_Richmond,x.police_district_Southern,x.police_district_Taraval,x.police_district_Tenderloin),axis=1)
results_df_preprocessed.drop(columns=["police_district_Bayview","police_district_Central","police_district_Ingleside","police_district_Mission","police_district_Northern","police_district_OutOfSF","police_district_Park","police_district_Richmond","police_district_Southern","police_district_Taraval","police_district_Tenderloin"],inplace=True)
results_df_preprocessed.reset_index(inplace=True)
results_df_preprocessed["idx_temp"]=results_df_preprocessed.index
results_df_preprocessed["incident_category_pred"]=y_pred[results_df_preprocessed.idx_temp]
results_df_preprocessed.drop(columns=["idx_temp"],inplace=True)
df = results_df_preprocessed[['incident_datetime', 'latitude', 'longitude', 'incident_day_of_week', 'report_type_code', 'police_district', 'incident_category', 'incident_category_pred']]
st.write(df)
st.button("Re-run")