Spaces:

Lesterchia174
/

HDB_Price_Predictor_R1

Sleeping

App Files Files Community

Lesterchia174 commited on Aug 26

Commit

e6649ef

verified ·

1 Parent(s): afddd20

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -146

app.py CHANGED Viewed

@@ -2,15 +2,8 @@
 import pandas as pd
 import numpy as np
 import gradio as gr
-import plotly.express as px
-import plotly.graph_objects as go
-from sklearn.linear_model import LinearRegression
-from sklearn.preprocessing import LabelEncoder
-import xgboost as xgb
-import pickle
-import os
-# Sample data generation (in a real app, you would load your actual dataset)
 def generate_sample_data():
     np.random.seed(42)
     n_samples = 1000
@@ -39,44 +32,38 @@ towns_list = data['town'].unique().tolist()
 flat_types = data['flat_type'].unique().tolist()
 flat_models = data['flat_model'].unique().tolist()
-# Create and train models (in a real app, you would load pre-trained models)
-def create_and_train_models(data):
-    # Prepare features
-    le_town = LabelEncoder()
-    le_flat_type = LabelEncoder()
-    le_flat_model = LabelEncoder()
-    X = data.copy()
-    X['town_encoded'] = le_town.fit_transform(data['town'])
-    X['flat_type_encoded'] = le_flat_type.fit_transform(data['flat_type'])
-    X['flat_model_encoded'] = le_flat_model.fit_transform(data['flat_model'])
-    X['remaining_lease'] = 99 - data['flat_age']
-    X['transaction_year'] = 2023  # Example year
-    features = [
-        'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
-        'transaction_year', 'flat_type_encoded', 'town_encoded',
-        'flat_model_encoded'
-    ]
-    X_train = X[features]
-    y_train = data['resale_price']
-    # Train Linear Regression
-    lr_model = LinearRegression()
-    lr_model.fit(X_train, y_train)
-    # Train XGBoost
-    xgb_model = xgb.XGBRegressor(random_state=42)
-    xgb_model.fit(X_train, y_train)
-    return {
-        'linear_regression': lr_model,
-        'xgboost': xgb_model
-    }
-# Train models
-models = create_and_train_models(data)
 def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
     """Preprocess user input into a format suitable for the models."""
@@ -84,96 +71,46 @@ def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level,
         'town': town,
         'flat_type': flat_type,
         'flat_model': flat_model,
-        'floor_area_sqm': floor_area_sqm,
-        'storey_level': storey_level,
-        'flat_age': flat_age,
-        'remaining_lease': 99 - flat_age,
-        'transaction_year': 2025,
-        'flat_type_encoded': flat_types.index(flat_type),
-        'town_encoded': towns_list.index(town),
-        'flat_model_encoded': flat_models.index(flat_model),
     }
-    # The models are expecting a 2D array, so we create a DataFrame with one row
-    df = pd.DataFrame([input_data])
-    # We select the features that the models were trained on
-    features = [
-        'floor_area_sqm', 'storey_level', 'flat_age', 'remaining_lease',
-        'transaction_year', 'flat_type_encoded', 'town_encoded',
-        'flat_model_encoded'
-    ]
-    return df[features].values
-def create_market_insights_chart(data, town, flat_type, predicted_prices):
     """
-    Generate a Plotly chart with market insights and the predicted price.
     """
     # Filter data for the specific town and flat type
     filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
     if filtered_data.empty:
-        # Create an empty figure with a message
-        fig = go.Figure()
-        fig.add_annotation(
-            text="No historical data for this combination.",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, showarrow=False,
-            font=dict(size=16)
-        )
-        fig.update_layout(
-            title=f'Resale Prices in {town} for {flat_type} Flats',
-            xaxis_title="Flat Age (Years)",
-            yaxis_title="Resale Price ($)"
-        )
-        return fig
-    # Create the scatter plot
-    fig = px.scatter(
-        filtered_data,
-        x='flat_age',
-        y='resale_price',
-        hover_data=['floor_area_sqm', 'flat_model'],
-        title=f'Resale Prices in {town} for {flat_type} Flats',
-        labels={'flat_age': 'Flat Age (Years)', 'resale_price': 'Resale Price ($)'},
-        color_discrete_sequence=['#A2CFFE']
-    )
-    # Add a line showing the average trend
-    avg_trend = filtered_data.groupby('flat_age')['resale_price'].mean().reset_index()
-    fig.add_trace(go.Scatter(
-        x=avg_trend['flat_age'],
-        y=avg_trend['resale_price'],
-        mode='lines',
-        name='Average Price Trend',
-        line=dict(color='gray', width=2, dash='dash')
-    ))
-    # Add predicted price points for both models
-    # Use the average flat age from the filtered data for positioning
-    avg_flat_age = filtered_data['flat_age'].mean()
-    fig.add_trace(go.Scatter(
-        x=[avg_flat_age],
-        y=[predicted_prices['XGBoost']],
-        mode='markers',
-        name='XGBoost Prediction',
-        marker=dict(symbol='diamond', size=15, color='red')
-    ))
-    fig.add_trace(go.Scatter(
-        x=[avg_flat_age],
-        y=[predicted_prices['Linear Regression']],
-        mode='markers',
-        name='Linear Regression Prediction',
-        marker=dict(symbol='star', size=15, color='green')
-    ))
-    fig.update_layout(showlegend=True)
-    return fig
-def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice):
     """Predict the HDB resale price using the selected model."""
     # Validate inputs
@@ -182,29 +119,20 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
         storey_level = int(storey_level)
         flat_age = int(flat_age)
     except ValueError:
-        return "Please enter valid numbers for floor area, storey level, and flat age.", None, None
     # Preprocess the user input
-    preprocessed_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
-    # Make predictions with both models to generate the chart
-    predicted_price_xgboost = models['xgboost'].predict(preprocessed_data)[0]
-    predicted_price_linear = models['linear_regression'].predict(preprocessed_data)[0]
-    if model_choice == 'XGBoost':
-        final_predicted_price = predicted_price_xgboost
-    else:
-        final_predicted_price = predicted_price_linear
-    # Generate insights and chart
-    chart = create_market_insights_chart(
         data=data,
         town=town,
         flat_type=flat_type,
-        predicted_prices={
-            "XGBoost": predicted_price_xgboost,
-            "Linear Regression": predicted_price_linear
-        }
     )
     summary = f"""
@@ -219,12 +147,12 @@ def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level,
     ---
     ### Prediction Summary
-    The **{model_choice}** model predicts a price of **${final_predicted_price:,.2f}**.
-    *Note: This is a demo with sample data. For accurate predictions, use real historical data.*
     """
-    return f"${final_predicted_price:,.2f}", chart, summary
 # Create the Gradio interface
 with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
@@ -239,29 +167,28 @@ with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as de
             floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
             storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
             flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
-            model_choice = gr.Radio(choices=["XGBoost", "Linear Regression"], label="Model Choice", value="XGBoost")
             predict_btn = gr.Button("Predict Price", variant="primary")
         with gr.Column():
             price_output = gr.Label(label="Predicted Resale Price")
-            chart_output = gr.Plot(label="Market Insights")
             summary_output = gr.Markdown()
     predict_btn.click(
         fn=predict_hdb_price,
-        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice],
-        outputs=[price_output, chart_output, summary_output]
     )
     gr.Examples(
         examples=[
-            ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10, "XGBoost"],
-            ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5, "Linear Regression"],
-            ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15, "XGBoost"]
         ],
-        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age, model_choice]
     )
 # Launch the application
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import pandas as pd
 import numpy as np
 import gradio as gr
+# Sample data generation
 def generate_sample_data():
     np.random.seed(42)
     n_samples = 1000
 flat_types = data['flat_type'].unique().tolist()
 flat_models = data['flat_model'].unique().tolist()
+def simple_xgboost_emulation(input_data):
+    """Emulate XGBoost with a slightly different formula"""
+    # Different arbitrary weights to simulate a different model
+    weights = {
+        'floor_area_sqm': 5200,
+        'storey_level': 1800,
+        'flat_age': -2800,
+        'remaining_lease': 1200,
+        'town_factor': 9500,
+        'flat_type_factor': 14500,
+        'flat_model_factor': 8500,
+        'base_price': 220000,
+        'interaction_factor': 500  # Simulate tree interactions
+    }
+    # Calculate factors
+    town_factor = towns_list.index(input_data['town']) * weights['town_factor']
+    flat_type_factor = flat_types.index(input_data['flat_type']) * weights['flat_type_factor']
+    flat_model_factor = flat_models.index(input_data['flat_model']) * weights['flat_model_factor']
+    # Simulate tree interactions
+    interaction = (input_data['floor_area_sqm'] * input_data['storey_level']) / 100 * weights['interaction_factor']
+    # Calculate price
+    price = (weights['base_price'] +
+             input_data['floor_area_sqm'] * weights['floor_area_sqm'] +
+             input_data['storey_level'] * weights['storey_level'] +
+             input_data['flat_age'] * weights['flat_age'] +
+             input_data['remaining_lease'] * weights['remaining_lease'] +
+             town_factor + flat_type_factor + flat_model_factor + interaction)
+    return max(price, 100000)  # Ensure price is at least 100,000
 def preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
     """Preprocess user input into a format suitable for the models."""
         'town': town,
         'flat_type': flat_type,
         'flat_model': flat_model,
+        'floor_area_sqm': float(floor_area_sqm),
+        'storey_level': int(storey_level),
+        'flat_age': int(flat_age),
+        'remaining_lease': 99 - int(flat_age)
     }
+    return input_data
+def create_market_insights_chart(data, town, flat_type, predicted_price):
     """
+    Generate a simple text-based market insight since we can't use Plotly.
     """
     # Filter data for the specific town and flat type
     filtered_data = data[(data['town'] == town) & (data['flat_type'] == flat_type)]
     if filtered_data.empty:
+        return "No historical data available for this town and flat type combination."
+    # Calculate some basic statistics
+    avg_price = filtered_data['resale_price'].mean()
+    min_price = filtered_data['resale_price'].min()
+    max_price = filtered_data['resale_price'].max()
+    count = len(filtered_data)
+    insight_text = f"""
+    ## Market Insights for {town} - {flat_type}
+    - Historical transactions: {count}
+    - Average price: ${avg_price:,.2f}
+    - Price range: ${min_price:,.2f} - ${max_price:,.2f}
+    ### Prediction:
+    - Predicted Price: ${predicted_price:,.2f}
+    *Note: These insights are based on simulated data.*
+    """
+    return insight_text
+def predict_hdb_price(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age):
     """Predict the HDB resale price using the selected model."""
     # Validate inputs
         storey_level = int(storey_level)
         flat_age = int(flat_age)
     except ValueError:
+        return "Please enter valid numbers for floor area, storey level, and flat age.", "Invalid input", "Invalid input"
     # Preprocess the user input
+    input_data = preprocess_input(town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age)
+    # Make prediction
+    predicted_price = simple_xgboost_emulation(input_data)
+    # Generate insights
+    insights = create_market_insights_chart(
         data=data,
         town=town,
         flat_type=flat_type,
+        predicted_price=predicted_price
     )
     summary = f"""
     ---
     ### Prediction Summary
+    The predicted price is **${predicted_price:,.2f}**.
+    *Note: This is a demo with simulated data and simple prediction models.*
     """
+    return f"${predicted_price:,.2f}", insights, summary
 # Create the Gradio interface
 with gr.Blocks(title="HDB Resale Price Predictor", theme=gr.themes.Soft()) as demo:
             floor_area_sqm = gr.Number(label="Floor Area (sqm)", value=100)
             storey_level = gr.Slider(minimum=1, maximum=50, step=1, label="Storey Level", value=5)
             flat_age = gr.Slider(minimum=0, maximum=99, step=1, label="Flat Age (years)", value=10)
             predict_btn = gr.Button("Predict Price", variant="primary")
         with gr.Column():
             price_output = gr.Label(label="Predicted Resale Price")
+            insights_output = gr.Markdown()
             summary_output = gr.Markdown()
     predict_btn.click(
         fn=predict_hdb_price,
+        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age],
+        outputs=[price_output, insights_output, summary_output]
     )
     gr.Examples(
         examples=[
+            ["ANG MO KIO", "4 ROOM", "Improved", 100, 5, 10],
+            ["BEDOK", "3 ROOM", "New Generation", 80, 8, 5],
+            ["TAMPINES", "5 ROOM", "Model A", 120, 12, 15]
         ],
+        inputs=[town, flat_type, flat_model, floor_area_sqm, storey_level, flat_age]
     )
 # Launch the application
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)