|
|
|
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
import joblib |
|
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
|
|
|
|
|
try: |
|
|
model = joblib.load('best_model_xgboost.joblib') |
|
|
print("XGBoost model loaded successfully!") |
|
|
except FileNotFoundError: |
|
|
print("Warning: best_model_xgboost.joblib not found. Using fallback model.") |
|
|
model = None |
|
|
except Exception as e: |
|
|
print(f"Error loading model: {e}") |
|
|
model = None |
|
|
|
|
|
|
|
|
def generate_sample_data(): |
|
|
np.random.seed(42) |
|
|
n_samples = 1000 |
|
|
|
|
|
towns = ['ANG MO KIO', 'BEDOK', 'BISHAN', 'BUKIT BATOK', 'BUKIT MERAH', 'BUKIT PANJANG', 'BUKIT TIMAH', 'CENTRAL AREA', 'CHOA CHU KANG', 'CLEMENTI', 'GEYLANG', 'HOUGANG', 'JURONG EAST', 'JURONG WEST', 'KALLANG/WHAMPOA', 'MARINE PARADE', 'PASIR RIS', 'PUNGGOL', 'QUEENSTOWN', 'SEMBAWANG', 'SENGKANG', 'SERANGOON', 'TAMPINES', 'TOA PAYOH', 'WOODLANDS', 'YISHUN' ] |
|
|
flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE', 'MULTI-GENERATION'] |
|
|
flat_models = ['2 ROOM', '3Gen', 'Adjoined flat', 'Apartment', 'DBSS', 'Improved', 'Improved-Maisonette', 'Maisonette', 'Model A', 'Model A-Maisonette', 'Model-A2', 'MULTI-GENERATION', 'New Generation', 'Premium Apartment', 'Premium Apartment Loft', 'Simplified', 'Standard', 'Type S1', 'Type S2'] |
|
|
|
|
|
data = { |
|
|
'town': np.random.choice(towns, n_samples), |
|
|
'flat_type': np.random.choice(flat_types, n_samples), |
|
|
'flat_model': np.random.choice(flat_models, n_samples), |
|
|
'floor_area_sqm': np.random.uniform(60, 150, n_samples), |
|
|
'storey_level': np.random.randint(1, 25, n_samples), |
|
|
'flat_age': np.random.randint(0, 50, n_samples), |
|
|
'resale_price': np.random.uniform(200000, 800000, n_samples) |
|
|
} |
|
|
|
|
|
return pd.DataFrame(data) |
|
|
|
|
|
|
|
|
data = generate_sample_data() |
|
|
|
|
|
|
|
|
towns_list = sorted(data['town'].unique().tolist()) |
|
|
flat_types = sorted(data['flat_type'].unique().tolist()) |
|
|
flat_models = sorted(data['flat_model'].unique().tolist()) |
|
|
|
|
|
|
|
|
town_encoder = LabelEncoder() |
|
|
flat_type_encoder = LabelEncoder() |
|
|
flat_model_encoder = LabelEncoder() |
|
|
|
|
|
|
|
|
town_encoder.fit(towns_list) |
|
|
flat_type_encoder.fit(flat_types) |
|
|
flat_model_encoder.fit(flat_models) |
|
|
|
|
|
def simple_xgboost_emulation(input_data): |
|
|
"""Fallback function if the model is not available""" |
|
|
weights = { |
|
|
'floor_area_sqm': 5200, |
|
|
'storey_level': 1800, |
|
|
'flat_age': -2800, |
|
|
'remaining_lease': 1200, |
|
|
'town_factor': 9500, |
|
|
'flat_type_factor': 14500, |
|
|
'flat_model_factor': 8500, |
|
|
'base_price': 220000, |
|
|
'interaction_factor': 500 |
|
|
} |
|
|
|
|
|
|
|
|
town_factor = towns_list.index(input_data['town']) * weights['town_factor'] |
|
|
flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor'] |
|
|
flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor'] |
|
|
|
|
|
|
|
|
interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor'] |
|
|
|
|
|
|
|
|
price = (weights['base_price'] + |
|
|
input_data['floor_area_sqm'] * weights['floor_area_sqm'] + |
|
|
input_data['storey_level'] * weights['storey_level'] + |
|
|
input_data['flat_age'] * weights['flat_age'] + |
|
|
input_data['remaining_lease'] * weights['remaining_lease'] + |
|
|
town_factor + flat_type_factor + flat_model_factor + interaction) |
|
|
|
|
|
return max(price, 100000) |
|
|
|
|
|
def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age): |
|
|
"""Preprocess user input into a format suitable for the model.""" |
|
|
input_data = { |
|
|
'town': town, |
|
|
'flat_type': flat_type, |
|
|
'flat_model': flat_model, |
|
|
'floor_area_sqm': float(floor_area_sqm), |
|
|
'storey_level': int(storey_level), |
|
|
'flat_age': int(flat_age), |
|
|
'remaining_lease': 99 - int(flat_age) |
|
|
} |
|
|
|
|
|
return input_data |
|
|
|
|
|
def prepare_features_for_model(input_data): |
|
|
"""Prepare features in the exact format expected by the trained model""" |
|
|
|
|
|
features = pd.DataFrame([{ |
|
|
'town': input_data['town'], |
|
|
'flat_type': input_data['flat_type'], |
|
|
'flat_model': input_data['flat_model'], |
|
|
'floor_area_sqm': input_data['floor_area_sqm'], |
|
|
'storey_level': input_data['storey_level'], |
|
|
'flat_age': input_data['flat_age'], |
|
|
'remaining_lease': input_data['remaining_lease'] |
|
|
}]) |
|
|
|
|
|
|
|
|
features['town_encoded'] = town_encoder.transform([input_data['town']])[0] |
|
|
features['flat_type_encoded'] = flat_type_encoder.transform([input_data['flat_type']])[0] |
|
|
features['flat_model_encoded'] = flat_model_encoder.transform([input_data['flat_model']])[0] |
|
|
|
|
|
|
|
|
numerical_features = features[['floor_area_sqm', 'storey_level', 'flat_age', |
|
|
'remaining_lease', 'town_encoded', |
|
|
'flat_type_encoded', 'flat_model_encoded']] |
|
|
|
|
|
return numerical_features |
|
|
|
|
|
def predict_with_xgboost(input_data): |
|
|
"""Make prediction using the loaded XGBoost model""" |
|
|
if model is None: |
|
|
return simple_xgboost_emulation(input_data) |
|
|
|
|
|
try: |
|
|
|
|
|
features = prepare_features_for_model(input_data) |
|
|
|
|
|
|
|
|
prediction = model.predict(features)[0] |
|
|
|
|
|
return max(prediction, 100000) |
|
|
except Exception as e: |
|
|
print(f"Prediction error: {e}") |
|
|
|
|
|
return simple_xgboost_emulation(input_data) |
|
|
|
|
|
def create_market_insights_chart(data, town, flat_type, predicted_price): |
|
|
""" |
|
|
Generate a simple text-based market insight. |
|
|
""" |
|
|
|
|
|
filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)] |
|
|
|
|
|
if filtered_data.empty: |
|
|
return "No historical data available for this town and flat type combination." |
|
|
|
|
|
|
|
|
avg_price = filtered_data['resale_price'].mean() |
|
|
min_price = filtered_data['resale_price'].min() |
|
|
max_price = filtered_data['resale_price'].max() |
|
|
count = len(filtered_data) |
|
|
|
|
|
|
|
|
price_difference = predicted_price - avg_price |
|
|
percentage_diff = (price_difference / avg_price) * 100 if avg_price > 0 else 0 |
|
|
|
|
|
insight_text = f""" |
|
|
## Market Insights for {town} - {flat_type} |
|
|
|
|
|
- Historical transactions: {count} |
|
|
- Average price: ${avg_price:,.2f} |
|
|
- Price range: ${min_price:,.2f} - ${max_price:,.2f} |
|
|
|
|
|
### Prediction Analysis: |
|
|
- Predicted Price: ${predicted_price:,.2f} |
|
|
- Difference from average: {percentage_diff:+.1f}% |
|
|
|
|
|
*Note: Market insights are based on simulated data. Prediction uses {'XGBoost model' if model else 'fallback model'}.* |
|
|
""" |
|
|
|
|
|
return insight_text |
|
|
|
|
|
def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age): |
|
|
"""Predict the HDB resale price using the selected model.""" |
|
|
|
|
|
|
|
|
try: |
|
|
floor_area_sqm = float(floor_area_sqm) |
|
|
storey_level = int(storey_level) |
|
|
flat_age = int(flat_age) |
|
|
|
|
|
if floor_area_sqm <= 0 or storey_level <= 0 or flat_age < 0: |
|
|
return "Invalid input: Please enter positive values.", "Invalid input", "Invalid input" |
|
|
|
|
|
except ValueError: |
|
|
return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input" |
|
|
|
|
|
|
|
|
input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age) |
|
|
|
|
|
|
|
|
predicted_price = predict_with_xgboost(input_data) |
|
|
|
|
|
|
|
|
insights = create_market_insights_chart( |
|
|
data=data, |
|
|
town=town, |
|
|
flat_type=flat_type, |
|
|
predicted_price=predicted_price |
|
|
) |
|
|
|
|
|
model_source = "XGBoost model" if model else "fallback model" |
|
|
|
|
|
summary = f""" |
|
|
### Property Details π‘ |
|
|
- **Town:** {town} |
|
|
- **Flat Type:** {flat_type} |
|
|
- **Flat Model:** {flat_model} |
|
|
- **Floor Area:** {floor_area_sqm} sqm |
|
|
- **Storey Level:** {storey_level} |
|
|
- **Flat Age:** {flat_age} years |
|
|
|
|
|
--- |
|
|
|
|
|
### Prediction Summary |
|
|
The predicted price is **${predicted_price:,.2f}**. |
|
|
|
|
|
*Prediction made using {model_source}. Market insights based on simulated data.* |
|
|
""" |
|
|
|
|
|
return f"${predicted_price:,.2f}", insights, summary |
|
|
|
|
|
|
|
|
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# ποΈ HDB Resale Price Predictor") |
|
|
gr.Markdown("Estimate the resale price of HDB flats in Singapore based on property features.") |
|
|
|
|
|
|
|
|
if model: |
|
|
gr.Markdown("β
**XGBoost model loaded successfully!**") |
|
|
else: |
|
|
gr.Markdown("β οΈ **Using fallback model - XGBoost model not found**") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
town = gr.Dropdown(choices=towns_list, label="Town", value="ANG MO KIO") |
|
|
flat_type = gr.Dropdown(choices=flat_types, label="Flat Type", value="4 ROOM") |
|
|
flat_model = gr.Dropdown(choices=flat_models, label="Flat Model", value="Improved") |
|
|
floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100, minimum=1, maximum=500) |
|
|
storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5) |
|
|
flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10) |
|
|
predict_btn = gr.Button("Predict Price", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
price_output = gr.Label(label="Predicted Resale Price") |
|
|
insights_output = gr.Markdown() |
|
|
summary_output = gr.Markdown() |
|
|
|
|
|
|
|
|
predict_btn.click( |
|
|
fn=predict_hdb_price, |
|
|
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age], |
|
|
outputs=[price_output, insights_output, summary_output] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10], |
|
|
["BEDOK", "3 ROOM", "New Generation", 80, 8, 5], |
|
|
["TAMPINES", "5 ROOM", "Model A", 120, 12, 15] |
|
|
], |
|
|
inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |