tt / app.py
Mohammed Foud
Add application file
0aee734
import gradio as gr
from transformers import pipeline
from textblob import TextBlob
from collections import defaultdict
import pandas as pd
from tabulate import tabulate
# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def generate_category_summaries(df):
"""Generate product summaries in table format"""
summaries = {}
for category in df['cluster_name'].unique():
category_df = df[df['cluster_name'] == category]
if len(category_df) < 10:
continue
product_stats = get_product_stats(category_df)
if len(product_stats) < 3:
continue
top_products, worst_product = get_top_and_worst_products(product_stats)
product_details = analyze_top_products(top_products)
# Format as tables
summary_tables = format_tables(category, product_details, worst_product)
summaries[category] = summary_tables
return summaries
def format_tables(category, product_details, worst_product):
"""Format all sections as tables"""
tables = []
# Top Products Table
top_table = []
for product in product_details:
top_table.append([
product['name'],
f"★{product['rating']:.1f}",
product['review_count'],
"\n".join(product['pros']),
"\n".join(product['cons'])
])
tables.append({
'section': f"TOP PRODUCTS IN {category.upper()}",
'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
'data': top_table
})
# Key Differences Table
common_pros = set(product_details[0]['pros'])
for product in product_details[1:]:
common_pros.intersection_update(product['pros'])
diff_table = []
for product in product_details:
unique_pros = [p for p in product['pros'] if p not in common_pros]
if unique_pros:
diff_table.append([product['name'], ", ".join(unique_pros)])
if diff_table:
tables.append({
'section': "KEY DIFFERENCES",
'headers': ["Product", "Unique Features"],
'data': diff_table
})
# Worst Product Table
if not worst_product.empty:
worst = worst_product.iloc[0]
_, cons = analyze_sentiment(worst['reviews'])
tables.append({
'section': "PRODUCT TO AVOID",
'headers': ["Product", "Rating", "Reasons to Avoid"],
'data': [[
worst_product.index[0],
f"★{worst['avg_rating']:.1f}",
", ".join(cons[:3]) if cons else "Consistently poor ratings"
]]
})
return tables
def get_product_stats(category_df):
"""Calculate product statistics from dataframe"""
stats = category_df.groupby('name').agg({
'rating': ['mean', 'count'],
'text': list
})
stats.columns = ['avg_rating', 'review_count', 'reviews']
return stats[stats['review_count'] >= 5]
def get_top_and_worst_products(product_stats):
"""Identify best and worst performing products"""
return (
product_stats.nlargest(3, 'avg_rating'),
product_stats.nsmallest(1, 'avg_rating')
)
def analyze_top_products(top_products):
"""Extract pros/cons from top products' reviews"""
product_details = []
for product, row in top_products.iterrows():
pros, cons = analyze_sentiment(row['reviews'])
product_details.append({
'name': product,
'rating': row['avg_rating'],
'review_count': row['review_count'],
'pros': pros[:3] or ["no significant positive feedback"],
'cons': cons[:3] or ["no major complaints"]
})
return product_details
def analyze_sentiment(reviews):
"""Perform sentiment analysis on reviews"""
pros = defaultdict(int)
cons = defaultdict(int)
for review in reviews:
blob = TextBlob(review)
for sentence in blob.sentences:
polarity = sentence.sentiment.polarity
words = [word for word, tag in blob.tags
if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]
if polarity > 0.3: # Positive
for word in words:
pros[word] += 1
elif polarity < -0.3: # Negative
for word in words:
cons[word] += 1
# Filter and sort results
pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []
return pros_sorted, cons_sorted
def format_for_gradio(summaries):
"""Convert summary tables to HTML for Gradio display"""
outputs = []
for category, tables in summaries.items():
category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"
for table in tables:
table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
table_html += tabulate(
table['data'],
headers=table['headers'],
tablefmt="html",
stralign="left",
numalign="center"
)
table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
category_html += table_html
outputs.append(category_html)
return "<hr>".join(outputs)
def analyze_reviews(df):
"""Main function to process data and generate summaries"""
summaries = generate_category_summaries(df)
return format_for_gradio(summaries)
# Create Gradio interface
with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
gr.Markdown("# Amazon Product Review Analyzer")
gr.Markdown("Analyzing top products and reviews across categories")
with gr.Row():
with gr.Column():
gr.Markdown("### Product Categories Found")
category_dropdown = gr.Dropdown(
choices=df['cluster_name'].unique().tolist(),
label="Select a Category",
interactive=True
)
analyze_btn = gr.Button("Analyze Selected Category", variant="primary")
with gr.Column():
gr.Markdown("### All Categories Summary")
all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")
output_html = gr.HTML(label="Analysis Results")
# Button actions
category_dropdown.change(
fn=lambda x: gr.update(interactive=bool(x)),
inputs=category_dropdown,
outputs=analyze_btn
)
analyze_btn.click(
fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
inputs=category_dropdown,
outputs=output_html
)
all_categories_btn.click(
fn=lambda: analyze_reviews(df),
outputs=output_html
)
# Launch the interface
demo.launch()