Lesterchia174 commited on
Commit
e6649ef
·
verified ·
1 Parent(s): afddd20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -146
app.py CHANGED
@@ -2,15 +2,8 @@
2
  import pandas as pd
3
  import numpy as np
4
  import gradio as gr
5
- import plotly.express as px
6
- import plotly.graph_objects as go
7
- from sklearn.linear_model import LinearRegression
8
- from sklearn.preprocessing import LabelEncoder
9
- import xgboost as xgb
10
- import pickle
11
- import os
12
 
13
- # Sample data generation (in a real app, you would load your actual dataset)
14
  def generate_sample_data():
15
  np.random.seed(42)
16
  n_samples = 1000
@@ -39,44 +32,38 @@ towns_list = data['town'].unique().tolist()
39
  flat_types = data['flat_type'].unique().tolist()
40
  flat_models = data['flat_model'].unique().tolist()
41
 
42
- # Create and train models (in a real app, you would load pre-trained models)
43
- def create_and_train_models(data):
44
- # Prepare features
45
- le_town = LabelEncoder()
46
- le_flat_type = LabelEncoder()
47
- le_flat_model = LabelEncoder()
48
-
49
- X = data.copy()
50
- X['town_encoded'] = le_town.fit_transform(data['town'])
51
- X['flat_type_encoded'] = le_flat_type.fit_transform(data['flat_type'])
52
- X['flat_model_encoded'] = le_flat_model.fit_transform(data['flat_model'])
53
- X['remaining_lease'] = 99 - data['flat_age']
54
- X['transaction_year'] = 2023 # Example year
55
-
56
- features = [
57
- 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
58
- 'transaction_year', 'flat_type_encoded', 'town_encoded',
59
- 'flat_model_encoded'
60
- ]
61
 
62
- X_train = X[features]
63
- y_train = data['resale_price']
 
 
64
 
65
- # Train Linear Regression
66
- lr_model = LinearRegression()
67
- lr_model.fit(X_train, y_train)
68
 
69
- # Train XGBoost
70
- xgb_model = xgb.XGBRegressor(random_state=42)
71
- xgb_model.fit(X_train, y_train)
 
 
 
 
72
 
73
- return {
74
- 'linear_regression': lr_model,
75
- 'xgboost': xgb_model
76
- }
77
-
78
- # Train models
79
- models = create_and_train_models(data)
80
 
81
  def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
82
  """Preprocess user input into a format suitable for the models."""
@@ -84,96 +71,46 @@ def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level,
84
  'town': town,
85
  'flat_type': flat_type,
86
  'flat_model': flat_model,
87
- 'floor_area_sqm': floor_area_sqm,
88
- 'storey_level': storey_level,
89
- 'flat_age': flat_age,
90
- 'remaining_lease': 99 - flat_age,
91
- 'transaction_year': 2025,
92
- 'flat_type_encoded': flat_types.index(flat_type),
93
- 'town_encoded': towns_list.index(town),
94
- 'flat_model_encoded': flat_models.index(flat_model),
95
  }
96
 
97
- # The models are expecting a 2D array, so we create a DataFrame with one row
98
- df = pd.DataFrame([input_data])
99
-
100
- # We select the features that the models were trained on
101
- features = [
102
- 'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
103
- 'transaction_year', 'flat_type_encoded', 'town_encoded',
104
- 'flat_model_encoded'
105
- ]
106
-
107
- return df[features].values
108
 
109
- def create_market_insights_chart(data, town, flat_type, predicted_prices):
110
  """
111
- Generate a Plotly chart with market insights and the predicted price.
112
  """
113
  # Filter data for the specific town and flat type
114
  filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
115
 
116
  if filtered_data.empty:
117
- # Create an empty figure with a message
118
- fig = go.Figure()
119
- fig.add_annotation(
120
- text="No historical data for this combination.",
121
- xref="paper", yref="paper",
122
- x=0.5, y=0.5, showarrow=False,
123
- font=dict(size=16)
124
- )
125
- fig.update_layout(
126
- title=f'Resale Prices in {town} for {flat_type} Flats',
127
- xaxis_title="Flat Age (Years)",
128
- yaxis_title="Resale Price ($)"
129
- )
130
- return fig
131
-
132
- # Create the scatter plot
133
- fig = px.scatter(
134
- filtered_data,
135
- x='flat_age',
136
- y='resale_price',
137
- hover_data=['floor_area_sqm', 'flat_model'],
138
- title=f'Resale Prices in {town} for {flat_type} Flats',
139
- labels={'flat_age': 'Flat Age (Years)', 'resale_price': 'Resale Price ($)'},
140
- color_discrete_sequence=['#A2CFFE']
141
- )
142
 
143
- # Add a line showing the average trend
144
- avg_trend = filtered_data.groupby('flat_age')['resale_price'].mean().reset_index()
145
- fig.add_trace(go.Scatter(
146
- x=avg_trend['flat_age'],
147
- y=avg_trend['resale_price'],
148
- mode='lines',
149
- name='Average Price Trend',
150
- line=dict(color='gray', width=2, dash='dash')
151
- ))
152
 
153
- # Add predicted price points for both models
154
- # Use the average flat age from the filtered data for positioning
155
- avg_flat_age = filtered_data['flat_age'].mean()
156
 
157
- fig.add_trace(go.Scatter(
158
- x=[avg_flat_age],
159
- y=[predicted_prices['XGBoost']],
160
- mode='markers',
161
- name='XGBoost Prediction',
162
- marker=dict(symbol='diamond', size=15, color='red')
163
- ))
164
-
165
- fig.add_trace(go.Scatter(
166
- x=[avg_flat_age],
167
- y=[predicted_prices['Linear Regression']],
168
- mode='markers',
169
- name='Linear Regression Prediction',
170
- marker=dict(symbol='star', size=15, color='green')
171
- ))
172
 
173
- fig.update_layout(showlegend=True)
174
- return fig
 
 
175
 
176
- def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice):
177
  """Predict the HDB resale price using the selected model."""
178
 
179
  # Validate inputs
@@ -182,29 +119,20 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
182
  storey_level = int(storey_level)
183
  flat_age = int(flat_age)
184
  except ValueError:
185
- return "Please enter valid numbers for floor area, storey level, and flat age.", None, None
186
 
187
  # Preprocess the user input
188
- preprocessed_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
189
-
190
- # Make predictions with both models to generate the chart
191
- predicted_price_xgboost = models['xgboost'].predict(preprocessed_data)[0]
192
- predicted_price_linear = models['linear_regression'].predict(preprocessed_data)[0]
193
 
194
- if model_choice == 'XGBoost':
195
- final_predicted_price = predicted_price_xgboost
196
- else:
197
- final_predicted_price = predicted_price_linear
198
 
199
- # Generate insights and chart
200
- chart = create_market_insights_chart(
201
  data=data,
202
  town=town,
203
  flat_type=flat_type,
204
- predicted_prices={
205
- "XGBoost": predicted_price_xgboost,
206
- "Linear Regression": predicted_price_linear
207
- }
208
  )
209
 
210
  summary = f"""
@@ -219,12 +147,12 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
219
  ---
220
 
221
  ### Prediction Summary
222
- The **{model_choice}** model predicts a price of **${final_predicted_price:,.2f}**.
223
 
224
- *Note: This is a demo with sample data. For accurate predictions, use real historical data.*
225
  """
226
 
227
- return f"${final_predicted_price:,.2f}", chart, summary
228
 
229
  # Create the Gradio interface
230
  with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
@@ -239,29 +167,28 @@ with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as de
239
  floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
240
  storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
241
  flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
242
- model_choice = gr.Radio(choices=["XGBoost", "Linear Regression"], label="Model Choice", value="XGBoost")
243
  predict_btn = gr.Button("Predict Price", variant="primary")
244
 
245
  with gr.Column():
246
  price_output = gr.Label(label="Predicted Resale Price")
247
- chart_output = gr.Plot(label="Market Insights")
248
  summary_output = gr.Markdown()
249
 
250
  predict_btn.click(
251
  fn=predict_hdb_price,
252
- inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice],
253
- outputs=[price_output, chart_output, summary_output]
254
  )
255
 
256
  gr.Examples(
257
  examples=[
258
- ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10, "XGBoost"],
259
- ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5, "Linear Regression"],
260
- ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15, "XGBoost"]
261
  ],
262
- inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice]
263
  )
264
 
265
  # Launch the application
266
  if __name__ == "__main__":
267
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
2
  import pandas as pd
3
  import numpy as np
4
  import gradio as gr
 
 
 
 
 
 
 
5
 
6
+ # Sample data generation
7
  def generate_sample_data():
8
  np.random.seed(42)
9
  n_samples = 1000
 
32
  flat_types = data['flat_type'].unique().tolist()
33
  flat_models = data['flat_model'].unique().tolist()
34
 
35
+ def simple_xgboost_emulation(input_data):
36
+ """Emulate XGBoost with a slightly different formula"""
37
+ # Different arbitrary weights to simulate a different model
38
+ weights = {
39
+ 'floor_area_sqm': 5200,
40
+ 'storey_level': 1800,
41
+ 'flat_age': -2800,
42
+ 'remaining_lease': 1200,
43
+ 'town_factor': 9500,
44
+ 'flat_type_factor': 14500,
45
+ 'flat_model_factor': 8500,
46
+ 'base_price': 220000,
47
+ 'interaction_factor': 500 # Simulate tree interactions
48
+ }
 
 
 
 
 
49
 
50
+ # Calculate factors
51
+ town_factor = towns_list.index(input_data['town']) * weights['town_factor']
52
+ flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
53
+ flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
54
 
55
+ # Simulate tree interactions
56
+ interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
 
57
 
58
+ # Calculate price
59
+ price = (weights['base_price'] +
60
+ input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
61
+ input_data['storey_level'] * weights['storey_level'] +
62
+ input_data['flat_age'] * weights['flat_age'] +
63
+ input_data['remaining_lease'] * weights['remaining_lease'] +
64
+ town_factor + flat_type_factor + flat_model_factor + interaction)
65
 
66
+ return max(price, 100000) # Ensure price is at least 100,000
 
 
 
 
 
 
67
 
68
  def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
69
  """Preprocess user input into a format suitable for the models."""
 
71
  'town': town,
72
  'flat_type': flat_type,
73
  'flat_model': flat_model,
74
+ 'floor_area_sqm': float(floor_area_sqm),
75
+ 'storey_level': int(storey_level),
76
+ 'flat_age': int(flat_age),
77
+ 'remaining_lease': 99 - int(flat_age)
 
 
 
 
78
  }
79
 
80
+ return input_data
 
 
 
 
 
 
 
 
 
 
81
 
82
+ def create_market_insights_chart(data, town, flat_type, predicted_price):
83
  """
84
+ Generate a simple text-based market insight since we can't use Plotly.
85
  """
86
  # Filter data for the specific town and flat type
87
  filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
88
 
89
  if filtered_data.empty:
90
+ return "No historical data available for this town and flat type combination."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # Calculate some basic statistics
93
+ avg_price = filtered_data['resale_price'].mean()
94
+ min_price = filtered_data['resale_price'].min()
95
+ max_price = filtered_data['resale_price'].max()
96
+ count = len(filtered_data)
 
 
 
 
97
 
98
+ insight_text = f"""
99
+ ## Market Insights for {town} - {flat_type}
 
100
 
101
+ - Historical transactions: {count}
102
+ - Average price: ${avg_price:,.2f}
103
+ - Price range: ${min_price:,.2f} - ${max_price:,.2f}
104
+
105
+ ### Prediction:
106
+ - Predicted Price: ${predicted_price:,.2f}
 
 
 
 
 
 
 
 
 
107
 
108
+ *Note: These insights are based on simulated data.*
109
+ """
110
+
111
+ return insight_text
112
 
113
+ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
114
  """Predict the HDB resale price using the selected model."""
115
 
116
  # Validate inputs
 
119
  storey_level = int(storey_level)
120
  flat_age = int(flat_age)
121
  except ValueError:
122
+ return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
123
 
124
  # Preprocess the user input
125
+ input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
 
 
 
 
126
 
127
+ # Make prediction
128
+ predicted_price = simple_xgboost_emulation(input_data)
 
 
129
 
130
+ # Generate insights
131
+ insights = create_market_insights_chart(
132
  data=data,
133
  town=town,
134
  flat_type=flat_type,
135
+ predicted_price=predicted_price
 
 
 
136
  )
137
 
138
  summary = f"""
 
147
  ---
148
 
149
  ### Prediction Summary
150
+ The predicted price is **${predicted_price:,.2f}**.
151
 
152
+ *Note: This is a demo with simulated data and simple prediction models.*
153
  """
154
 
155
+ return f"${predicted_price:,.2f}", insights, summary
156
 
157
  # Create the Gradio interface
158
  with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
 
167
  floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
168
  storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
169
  flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
 
170
  predict_btn = gr.Button("Predict Price", variant="primary")
171
 
172
  with gr.Column():
173
  price_output = gr.Label(label="Predicted Resale Price")
174
+ insights_output = gr.Markdown()
175
  summary_output = gr.Markdown()
176
 
177
  predict_btn.click(
178
  fn=predict_hdb_price,
179
+ inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
180
+ outputs=[price_output, insights_output, summary_output]
181
  )
182
 
183
  gr.Examples(
184
  examples=[
185
+ ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
186
+ ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
187
+ ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
188
  ],
189
+ inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
190
  )
191
 
192
  # Launch the application
193
  if __name__ == "__main__":
194
+ demo.launch(server_name="0.0.0.0", server_port=7860)