Spaces:

mfoud444
/

tt

Paused

tt / app.py

Mohammed Foud

Add application file

0aee734 9 months ago

7.28 kB

	import gradio as gr
	from transformers import pipeline
	from textblob import TextBlob
	from collections import defaultdict
	import pandas as pd
	from tabulate import tabulate

	# Initialize summarization pipeline
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def generate_category_summaries(df):
	"""Generate product summaries in table format"""
	summaries = {}

	for category in df['cluster_name'].unique():
	category_df = df[df['cluster_name'] == category]

	if len(category_df) < 10:
	continue

	product_stats = get_product_stats(category_df)
	if len(product_stats) < 3:
	continue

	top_products, worst_product = get_top_and_worst_products(product_stats)
	product_details = analyze_top_products(top_products)

	# Format as tables
	summary_tables = format_tables(category, product_details, worst_product)
	summaries[category] = summary_tables

	return summaries

	def format_tables(category, product_details, worst_product):
	"""Format all sections as tables"""
	tables = []

	# Top Products Table
	top_table = []
	for product in product_details:
	top_table.append([
	product['name'],
	f"★{product['rating']:.1f}",
	product['review_count'],
	"\n".join(product['pros']),
	"\n".join(product['cons'])
	])

	tables.append({
	'section': f"TOP PRODUCTS IN {category.upper()}",
	'headers': ["Product", "Rating", "Reviews", "Pros", "Cons"],
	'data': top_table
	})

	# Key Differences Table
	common_pros = set(product_details[0]['pros'])
	for product in product_details[1:]:
	common_pros.intersection_update(product['pros'])

	diff_table = []
	for product in product_details:
	unique_pros = [p for p in product['pros'] if p not in common_pros]
	if unique_pros:
	diff_table.append([product['name'], ", ".join(unique_pros)])

	if diff_table:
	tables.append({
	'section': "KEY DIFFERENCES",
	'headers': ["Product", "Unique Features"],
	'data': diff_table
	})

	# Worst Product Table
	if not worst_product.empty:
	worst = worst_product.iloc[0]
	_, cons = analyze_sentiment(worst['reviews'])
	tables.append({
	'section': "PRODUCT TO AVOID",
	'headers': ["Product", "Rating", "Reasons to Avoid"],
	'data': [[
	worst_product.index[0],
	f"★{worst['avg_rating']:.1f}",
	", ".join(cons[:3]) if cons else "Consistently poor ratings"
	]]
	})

	return tables

	def get_product_stats(category_df):
	"""Calculate product statistics from dataframe"""
	stats = category_df.groupby('name').agg({
	'rating': ['mean', 'count'],
	'text': list
	})
	stats.columns = ['avg_rating', 'review_count', 'reviews']
	return stats[stats['review_count'] >= 5]

	def get_top_and_worst_products(product_stats):
	"""Identify best and worst performing products"""
	return (
	product_stats.nlargest(3, 'avg_rating'),
	product_stats.nsmallest(1, 'avg_rating')
	)

	def analyze_top_products(top_products):
	"""Extract pros/cons from top products' reviews"""
	product_details = []
	for product, row in top_products.iterrows():
	pros, cons = analyze_sentiment(row['reviews'])
	product_details.append({
	'name': product,
	'rating': row['avg_rating'],
	'review_count': row['review_count'],
	'pros': pros[:3] or ["no significant positive feedback"],
	'cons': cons[:3] or ["no major complaints"]
	})
	return product_details

	def analyze_sentiment(reviews):
	"""Perform sentiment analysis on reviews"""
	pros = defaultdict(int)
	cons = defaultdict(int)

	for review in reviews:
	blob = TextBlob(review)
	for sentence in blob.sentences:
	polarity = sentence.sentiment.polarity
	words = [word for word, tag in blob.tags
	if tag in ('NN', 'NNS', 'JJ', 'JJR', 'JJS')]

	if polarity > 0.3: # Positive
	for word in words:
	pros[word] += 1
	elif polarity < -0.3: # Negative
	for word in words:
	cons[word] += 1

	# Filter and sort results
	pros_sorted = [k for k, _ in sorted(pros.items(), key=lambda x: -x[1])] if pros else []
	cons_sorted = [k for k, _ in sorted(cons.items(), key=lambda x: -x[1])] if cons else []

	return pros_sorted, cons_sorted

	def format_for_gradio(summaries):
	"""Convert summary tables to HTML for Gradio display"""
	outputs = []
	for category, tables in summaries.items():
	category_html = f"<h2 style='color: #4a6baf;'>{category.upper()}</h2>"

	for table in tables:
	table_html = f"<h3 style='color: #3a5a8a;'>{table['section']}</h3>"
	table_html += tabulate(
	table['data'],
	headers=table['headers'],
	tablefmt="html",
	stralign="left",
	numalign="center"
	)
	table_html = table_html.replace('<table>', '<table style="width:100%; border-collapse: collapse; margin-bottom: 20px;">')
	table_html = table_html.replace('<th>', '<th style="background-color: #f2f2f2; padding: 8px; text-align: left; border: 1px solid #ddd;">')
	table_html = table_html.replace('<td>', '<td style="padding: 8px; border: 1px solid #ddd;">')
	category_html += table_html

	outputs.append(category_html)

	return "<hr>".join(outputs)

	def analyze_reviews(df):
	"""Main function to process data and generate summaries"""
	summaries = generate_category_summaries(df)
	return format_for_gradio(summaries)

	# Create Gradio interface
	with gr.Blocks(title="Amazon Product Review Analyzer", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Amazon Product Review Analyzer")
	gr.Markdown("Analyzing top products and reviews across categories")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### Product Categories Found")
	category_dropdown = gr.Dropdown(
	choices=df['cluster_name'].unique().tolist(),
	label="Select a Category",
	interactive=True
	)
	analyze_btn = gr.Button("Analyze Selected Category", variant="primary")

	with gr.Column():
	gr.Markdown("### All Categories Summary")
	all_categories_btn = gr.Button("Analyze All Categories", variant="secondary")

	output_html = gr.HTML(label="Analysis Results")

	# Button actions
	category_dropdown.change(
	fn=lambda x: gr.update(interactive=bool(x)),
	inputs=category_dropdown,
	outputs=analyze_btn
	)

	analyze_btn.click(
	fn=lambda cat: analyze_reviews(df[df['cluster_name'] == cat]),
	inputs=category_dropdown,
	outputs=output_html
	)

	all_categories_btn.click(
	fn=lambda: analyze_reviews(df),
	outputs=output_html
	)

	# Launch the interface
	demo.launch()