Update app.py
Browse files
app.py
CHANGED
|
@@ -2,15 +2,8 @@
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import gradio as gr
|
| 5 |
-
import plotly.express as px
|
| 6 |
-
import plotly.graph_objects as go
|
| 7 |
-
from sklearn.linear_model import LinearRegression
|
| 8 |
-
from sklearn.preprocessing import LabelEncoder
|
| 9 |
-
import xgboost as xgb
|
| 10 |
-
import pickle
|
| 11 |
-
import os
|
| 12 |
|
| 13 |
-
# Sample data generation
|
| 14 |
def generate_sample_data():
|
| 15 |
np.random.seed(42)
|
| 16 |
n_samples = 1000
|
|
@@ -39,44 +32,38 @@ towns_list = data['town'].unique().tolist()
|
|
| 39 |
flat_types = data['flat_type'].unique().tolist()
|
| 40 |
flat_models = data['flat_model'].unique().tolist()
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
#
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
features = [
|
| 57 |
-
'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
|
| 58 |
-
'transaction_year', 'flat_type_encoded', 'town_encoded',
|
| 59 |
-
'flat_model_encoded'
|
| 60 |
-
]
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
|
| 67 |
-
lr_model.fit(X_train, y_train)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
return
|
| 74 |
-
'linear_regression': lr_model,
|
| 75 |
-
'xgboost': xgb_model
|
| 76 |
-
}
|
| 77 |
-
|
| 78 |
-
# Train models
|
| 79 |
-
models = create_and_train_models(data)
|
| 80 |
|
| 81 |
def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
|
| 82 |
"""Preprocess user input into a format suitable for the models."""
|
|
@@ -84,96 +71,46 @@ def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level,
|
|
| 84 |
'town': town,
|
| 85 |
'flat_type': flat_type,
|
| 86 |
'flat_model': flat_model,
|
| 87 |
-
'floor_area_sqm': floor_area_sqm,
|
| 88 |
-
'storey_level': storey_level,
|
| 89 |
-
'flat_age': flat_age,
|
| 90 |
-
'remaining_lease': 99 - flat_age
|
| 91 |
-
'transaction_year': 2025,
|
| 92 |
-
'flat_type_encoded': flat_types.index(flat_type),
|
| 93 |
-
'town_encoded': towns_list.index(town),
|
| 94 |
-
'flat_model_encoded': flat_models.index(flat_model),
|
| 95 |
}
|
| 96 |
|
| 97 |
-
|
| 98 |
-
df = pd.DataFrame([input_data])
|
| 99 |
-
|
| 100 |
-
# We select the features that the models were trained on
|
| 101 |
-
features = [
|
| 102 |
-
'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
|
| 103 |
-
'transaction_year', 'flat_type_encoded', 'town_encoded',
|
| 104 |
-
'flat_model_encoded'
|
| 105 |
-
]
|
| 106 |
-
|
| 107 |
-
return df[features].values
|
| 108 |
|
| 109 |
-
def create_market_insights_chart(data, town, flat_type,
|
| 110 |
"""
|
| 111 |
-
Generate a
|
| 112 |
"""
|
| 113 |
# Filter data for the specific town and flat type
|
| 114 |
filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
|
| 115 |
|
| 116 |
if filtered_data.empty:
|
| 117 |
-
|
| 118 |
-
fig = go.Figure()
|
| 119 |
-
fig.add_annotation(
|
| 120 |
-
text="No historical data for this combination.",
|
| 121 |
-
xref="paper", yref="paper",
|
| 122 |
-
x=0.5, y=0.5, showarrow=False,
|
| 123 |
-
font=dict(size=16)
|
| 124 |
-
)
|
| 125 |
-
fig.update_layout(
|
| 126 |
-
title=f'Resale Prices in {town} for {flat_type} Flats',
|
| 127 |
-
xaxis_title="Flat Age (Years)",
|
| 128 |
-
yaxis_title="Resale Price ($)"
|
| 129 |
-
)
|
| 130 |
-
return fig
|
| 131 |
-
|
| 132 |
-
# Create the scatter plot
|
| 133 |
-
fig = px.scatter(
|
| 134 |
-
filtered_data,
|
| 135 |
-
x='flat_age',
|
| 136 |
-
y='resale_price',
|
| 137 |
-
hover_data=['floor_area_sqm', 'flat_model'],
|
| 138 |
-
title=f'Resale Prices in {town} for {flat_type} Flats',
|
| 139 |
-
labels={'flat_age': 'Flat Age (Years)', 'resale_price': 'Resale Price ($)'},
|
| 140 |
-
color_discrete_sequence=['#A2CFFE']
|
| 141 |
-
)
|
| 142 |
|
| 143 |
-
#
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
mode='lines',
|
| 149 |
-
name='Average Price Trend',
|
| 150 |
-
line=dict(color='gray', width=2, dash='dash')
|
| 151 |
-
))
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
avg_flat_age = filtered_data['flat_age'].mean()
|
| 156 |
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
))
|
| 164 |
-
|
| 165 |
-
fig.add_trace(go.Scatter(
|
| 166 |
-
x=[avg_flat_age],
|
| 167 |
-
y=[predicted_prices['Linear Regression']],
|
| 168 |
-
mode='markers',
|
| 169 |
-
name='Linear Regression Prediction',
|
| 170 |
-
marker=dict(symbol='star', size=15, color='green')
|
| 171 |
-
))
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age
|
| 177 |
"""Predict the HDB resale price using the selected model."""
|
| 178 |
|
| 179 |
# Validate inputs
|
|
@@ -182,29 +119,20 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
|
|
| 182 |
storey_level = int(storey_level)
|
| 183 |
flat_age = int(flat_age)
|
| 184 |
except ValueError:
|
| 185 |
-
return "Please enter valid numbers for floor area, storey level, and flat age.",
|
| 186 |
|
| 187 |
# Preprocess the user input
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
# Make predictions with both models to generate the chart
|
| 191 |
-
predicted_price_xgboost = models['xgboost'].predict(preprocessed_data)[0]
|
| 192 |
-
predicted_price_linear = models['linear_regression'].predict(preprocessed_data)[0]
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
else:
|
| 197 |
-
final_predicted_price = predicted_price_linear
|
| 198 |
|
| 199 |
-
# Generate insights
|
| 200 |
-
|
| 201 |
data=data,
|
| 202 |
town=town,
|
| 203 |
flat_type=flat_type,
|
| 204 |
-
|
| 205 |
-
"XGBoost": predicted_price_xgboost,
|
| 206 |
-
"Linear Regression": predicted_price_linear
|
| 207 |
-
}
|
| 208 |
)
|
| 209 |
|
| 210 |
summary = f"""
|
|
@@ -219,12 +147,12 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
|
|
| 219 |
---
|
| 220 |
|
| 221 |
### Prediction Summary
|
| 222 |
-
The
|
| 223 |
|
| 224 |
-
*Note: This is a demo with
|
| 225 |
"""
|
| 226 |
|
| 227 |
-
return f"${
|
| 228 |
|
| 229 |
# Create the Gradio interface
|
| 230 |
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
|
|
@@ -239,29 +167,28 @@ with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as de
|
|
| 239 |
floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
|
| 240 |
storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
|
| 241 |
flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
|
| 242 |
-
model_choice = gr.Radio(choices=["XGBoost", "Linear Regression"], label="Model Choice", value="XGBoost")
|
| 243 |
predict_btn = gr.Button("Predict Price", variant="primary")
|
| 244 |
|
| 245 |
with gr.Column():
|
| 246 |
price_output = gr.Label(label="Predicted Resale Price")
|
| 247 |
-
|
| 248 |
summary_output = gr.Markdown()
|
| 249 |
|
| 250 |
predict_btn.click(
|
| 251 |
fn=predict_hdb_price,
|
| 252 |
-
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age
|
| 253 |
-
outputs=[price_output,
|
| 254 |
)
|
| 255 |
|
| 256 |
gr.Examples(
|
| 257 |
examples=[
|
| 258 |
-
["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10
|
| 259 |
-
["BEDOK", "3 ROOM", "New Generation", 80, 8, 5
|
| 260 |
-
["TAMPINES", "5 ROOM", "Model A", 120, 12, 15
|
| 261 |
],
|
| 262 |
-
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age
|
| 263 |
)
|
| 264 |
|
| 265 |
# Launch the application
|
| 266 |
if __name__ == "__main__":
|
| 267 |
-
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
import numpy as np
|
| 4 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# Sample data generation
|
| 7 |
def generate_sample_data():
|
| 8 |
np.random.seed(42)
|
| 9 |
n_samples = 1000
|
|
|
|
| 32 |
flat_types = data['flat_type'].unique().tolist()
|
| 33 |
flat_models = data['flat_model'].unique().tolist()
|
| 34 |
|
| 35 |
+
def simple_xgboost_emulation(input_data):
|
| 36 |
+
"""Emulate XGBoost with a slightly different formula"""
|
| 37 |
+
# Different arbitrary weights to simulate a different model
|
| 38 |
+
weights = {
|
| 39 |
+
'floor_area_sqm': 5200,
|
| 40 |
+
'storey_level': 1800,
|
| 41 |
+
'flat_age': -2800,
|
| 42 |
+
'remaining_lease': 1200,
|
| 43 |
+
'town_factor': 9500,
|
| 44 |
+
'flat_type_factor': 14500,
|
| 45 |
+
'flat_model_factor': 8500,
|
| 46 |
+
'base_price': 220000,
|
| 47 |
+
'interaction_factor': 500 # Simulate tree interactions
|
| 48 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# Calculate factors
|
| 51 |
+
town_factor = towns_list.index(input_data['town']) * weights['town_factor']
|
| 52 |
+
flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
|
| 53 |
+
flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
|
| 54 |
|
| 55 |
+
# Simulate tree interactions
|
| 56 |
+
interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
|
|
|
|
| 57 |
|
| 58 |
+
# Calculate price
|
| 59 |
+
price = (weights['base_price'] +
|
| 60 |
+
input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
|
| 61 |
+
input_data['storey_level'] * weights['storey_level'] +
|
| 62 |
+
input_data['flat_age'] * weights['flat_age'] +
|
| 63 |
+
input_data['remaining_lease'] * weights['remaining_lease'] +
|
| 64 |
+
town_factor + flat_type_factor + flat_model_factor + interaction)
|
| 65 |
|
| 66 |
+
return max(price, 100000) # Ensure price is at least 100,000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
|
| 69 |
"""Preprocess user input into a format suitable for the models."""
|
|
|
|
| 71 |
'town': town,
|
| 72 |
'flat_type': flat_type,
|
| 73 |
'flat_model': flat_model,
|
| 74 |
+
'floor_area_sqm': float(floor_area_sqm),
|
| 75 |
+
'storey_level': int(storey_level),
|
| 76 |
+
'flat_age': int(flat_age),
|
| 77 |
+
'remaining_lease': 99 - int(flat_age)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
}
|
| 79 |
|
| 80 |
+
return input_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
+
def create_market_insights_chart(data, town, flat_type, predicted_price):
|
| 83 |
"""
|
| 84 |
+
Generate a simple text-based market insight since we can't use Plotly.
|
| 85 |
"""
|
| 86 |
# Filter data for the specific town and flat type
|
| 87 |
filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
|
| 88 |
|
| 89 |
if filtered_data.empty:
|
| 90 |
+
return "No historical data available for this town and flat type combination."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
+
# Calculate some basic statistics
|
| 93 |
+
avg_price = filtered_data['resale_price'].mean()
|
| 94 |
+
min_price = filtered_data['resale_price'].min()
|
| 95 |
+
max_price = filtered_data['resale_price'].max()
|
| 96 |
+
count = len(filtered_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
insight_text = f"""
|
| 99 |
+
## Market Insights for {town} - {flat_type}
|
|
|
|
| 100 |
|
| 101 |
+
- Historical transactions: {count}
|
| 102 |
+
- Average price: ${avg_price:,.2f}
|
| 103 |
+
- Price range: ${min_price:,.2f} - ${max_price:,.2f}
|
| 104 |
+
|
| 105 |
+
### Prediction:
|
| 106 |
+
- Predicted Price: ${predicted_price:,.2f}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
*Note: These insights are based on simulated data.*
|
| 109 |
+
"""
|
| 110 |
+
|
| 111 |
+
return insight_text
|
| 112 |
|
| 113 |
+
def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
|
| 114 |
"""Predict the HDB resale price using the selected model."""
|
| 115 |
|
| 116 |
# Validate inputs
|
|
|
|
| 119 |
storey_level = int(storey_level)
|
| 120 |
flat_age = int(flat_age)
|
| 121 |
except ValueError:
|
| 122 |
+
return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
|
| 123 |
|
| 124 |
# Preprocess the user input
|
| 125 |
+
input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
# Make prediction
|
| 128 |
+
predicted_price = simple_xgboost_emulation(input_data)
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
# Generate insights
|
| 131 |
+
insights = create_market_insights_chart(
|
| 132 |
data=data,
|
| 133 |
town=town,
|
| 134 |
flat_type=flat_type,
|
| 135 |
+
predicted_price=predicted_price
|
|
|
|
|
|
|
|
|
|
| 136 |
)
|
| 137 |
|
| 138 |
summary = f"""
|
|
|
|
| 147 |
---
|
| 148 |
|
| 149 |
### Prediction Summary
|
| 150 |
+
The predicted price is **${predicted_price:,.2f}**.
|
| 151 |
|
| 152 |
+
*Note: This is a demo with simulated data and simple prediction models.*
|
| 153 |
"""
|
| 154 |
|
| 155 |
+
return f"${predicted_price:,.2f}", insights, summary
|
| 156 |
|
| 157 |
# Create the Gradio interface
|
| 158 |
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
|
|
|
|
| 167 |
floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
|
| 168 |
storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
|
| 169 |
flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
|
|
|
|
| 170 |
predict_btn = gr.Button("Predict Price", variant="primary")
|
| 171 |
|
| 172 |
with gr.Column():
|
| 173 |
price_output = gr.Label(label="Predicted Resale Price")
|
| 174 |
+
insights_output = gr.Markdown()
|
| 175 |
summary_output = gr.Markdown()
|
| 176 |
|
| 177 |
predict_btn.click(
|
| 178 |
fn=predict_hdb_price,
|
| 179 |
+
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
|
| 180 |
+
outputs=[price_output, insights_output, summary_output]
|
| 181 |
)
|
| 182 |
|
| 183 |
gr.Examples(
|
| 184 |
examples=[
|
| 185 |
+
["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
|
| 186 |
+
["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
|
| 187 |
+
["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
|
| 188 |
],
|
| 189 |
+
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
|
| 190 |
)
|
| 191 |
|
| 192 |
# Launch the application
|
| 193 |
if __name__ == "__main__":
|
| 194 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|