File size: 7,274 Bytes
70fb406
afddd20
 
 
70fb406
e6649ef
afddd20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70fb406
afddd20
 
70fb406
afddd20
 
 
 
 
e6649ef
 
 
 
 
 
 
 
 
 
 
 
 
 
afddd20
e6649ef
 
 
 
afddd20
e6649ef
 
afddd20
e6649ef
 
 
 
 
 
 
afddd20
e6649ef
ae3892f
 
 
 
 
 
 
e6649ef
 
 
 
ae3892f
 
e6649ef
ae3892f
e6649ef
afddd20
e6649ef
afddd20
 
 
 
 
e6649ef
afddd20
e6649ef
 
 
 
 
afddd20
e6649ef
 
afddd20
e6649ef
 
 
 
 
 
afddd20
e6649ef
 
 
 
afddd20
e6649ef
ae3892f
 
afddd20
 
 
 
 
 
e6649ef
afddd20
ae3892f
e6649ef
ae3892f
e6649ef
 
ae3892f
e6649ef
 
ae3892f
 
 
e6649ef
ae3892f
 
 
 
 
 
afddd20
ae3892f
 
 
 
 
 
 
e6649ef
afddd20
e6649ef
ae3892f
 
e6649ef
ae3892f
afddd20
 
 
 
ae3892f
afddd20
 
 
 
 
 
 
 
 
 
 
 
e6649ef
afddd20
ae3892f
afddd20
 
e6649ef
 
afddd20
ae3892f
afddd20
 
e6649ef
 
 
afddd20
e6649ef
afddd20
ae3892f
afddd20
 
e6649ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import gradio as gr

# Sample data generation
def generate_sample_data():
    np.random.seed(42)
    n_samples = 1000
    
    towns = ['ANG MO KIO', 'BEDOK', 'CLEMENTI', 'QUEENSTOWN', 'TAMPINES']
    flat_types = ['2 ROOM', '3 ROOM', '4 ROOM', '5 ROOM', 'EXECUTIVE']
    flat_models = ['Improved', 'New Generation', 'Model A', 'Standard', 'Premium']
    
    data = {
        'town': np.random.choice(towns, n_samples),
        'flat_type': np.random.choice(flat_types, n_samples),
        'flat_model': np.random.choice(flat_models, n_samples),
        'floor_area_sqm': np.random.uniform(60, 150, n_samples),
        'storey_level': np.random.randint(1, 25, n_samples),
        'flat_age': np.random.randint(0, 50, n_samples),
        'resale_price': np.random.uniform(200000, 800000, n_samples)
    }
    
    return pd.DataFrame(data)

# Load or create sample data
data = generate_sample_data()

# Create encoders for categorical variables
towns_list = data['town'].unique().tolist()
flat_types = data['flat_type'].unique().tolist()
flat_models = data['flat_model'].unique().tolist()

def simple_xgboost_emulation(input_data):
    """Emulate XGBoost with a slightly different formula"""
    # Different arbitrary weights to simulate a different model
    weights = {
        'floor_area_sqm': 5200,
        'storey_level': 1800,
        'flat_age': -2800,
        'remaining_lease': 1200,
        'town_factor': 9500,
        'flat_type_factor': 14500,
        'flat_model_factor': 8500,
        'base_price': 220000,
        'interaction_factor': 500  # Simulate tree interactions
    }
    
    # Calculate factors
    town_factor = towns_list.index(input_data['town']) * weights['town_factor']
    flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
    flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
    
    # Simulate tree interactions
    interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
    
    # Calculate price
    price = (weights['base_price'] +
             input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
             input_data['storey_level'] * weights['storey_level'] +
             input_data['flat_age'] * weights['flat_age'] +
             input_data['remaining_lease'] * weights['remaining_lease'] +
             town_factor + flat_type_factor + flat_model_factor + interaction)
    
    return max(price, 100000)  # Ensure price is at least 100,000

def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
    """Preprocess user input into a format suitable for the models."""
    input_data = {
        'town': town,
        'flat_type': flat_type,
        'flat_model': flat_model,
        'floor_area_sqm': float(floor_area_sqm),
        'storey_level': int(storey_level),
        'flat_age': int(flat_age),
        'remaining_lease': 99 - int(flat_age)
    }
    
    return input_data

def create_market_insights_chart(data, town, flat_type, predicted_price):
    """
    Generate a simple text-based market insight since we can't use Plotly.
    """
    # Filter data for the specific town and flat type
    filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
    
    if filtered_data.empty:
        return "No historical data available for this town and flat type combination."
    
    # Calculate some basic statistics
    avg_price = filtered_data['resale_price'].mean()
    min_price = filtered_data['resale_price'].min()
    max_price = filtered_data['resale_price'].max()
    count = len(filtered_data)
    
    insight_text = f"""
    ## Market Insights for {town} - {flat_type}
    
    - Historical transactions: {count}
    - Average price: ${avg_price:,.2f}
    - Price range: ${min_price:,.2f} - ${max_price:,.2f}
    
    ### Prediction:
    - Predicted Price: ${predicted_price:,.2f}
    
    *Note: These insights are based on simulated data.*
    """
    
    return insight_text

def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
    """Predict the HDB resale price using the selected model."""
    
    # Validate inputs
    try:
        floor_area_sqm = float(floor_area_sqm)
        storey_level = int(storey_level)
        flat_age = int(flat_age)
    except ValueError:
        return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
    
    # Preprocess the user input
    input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)

    # Make prediction
    predicted_price = simple_xgboost_emulation(input_data)

    # Generate insights
    insights = create_market_insights_chart(
        data=data, 
        town=town, 
        flat_type=flat_type, 
        predicted_price=predicted_price
    )
    
    summary = f"""
    ### Property Details 🏡
    - **Town:** {town}
    - **Flat Type:** {flat_type}
    - **Flat Model:** {flat_model}
    - **Floor Area:** {floor_area_sqm} sqm
    - **Storey Level:** {storey_level}
    - **Flat Age:** {flat_age} years

    ---

    ### Prediction Summary
    The predicted price is **${predicted_price:,.2f}**.
    
    *Note: This is a demo with simulated data and simple prediction models.*
    """
    
    return f"${predicted_price:,.2f}", insights, summary

# Create the Gradio interface
with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🏘️ HDB Resale Price Predictor")
    gr.Markdown("Estimate the resale price of HDB flats in Singapore based on property features.")
    
    with gr.Row():
        with gr.Column():
            town = gr.Dropdown(choices=towns_list, label="Town", value="ANG MO KIO")
            flat_type = gr.Dropdown(choices=flat_types, label="Flat Type", value="4 ROOM")
            flat_model = gr.Dropdown(choices=flat_models, label="Flat Model", value="Improved")
            floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
            storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
            flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
            predict_btn = gr.Button("Predict Price", variant="primary")
        
        with gr.Column():
            price_output = gr.Label(label="Predicted Resale Price")
            insights_output = gr.Markdown()
            summary_output = gr.Markdown()
    
    predict_btn.click(
        fn=predict_hdb_price,
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
        outputs=[price_output, insights_output, summary_output]
    )
    
    gr.Examples(
        examples=[
            ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
            ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
            ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
        ],
        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
    )

# Launch the application
if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)