Spaces:

Emeritus-21
/

Research-paper-Summarizer-and-Assistant

Sleeping

App Files Files Community

Research-paper-Summarizer-and-Assistant / app.py

Emeritus-21

Update app.py

2b8d6a4 verified 5 months ago

raw

history blame contribute delete

12.1 kB

	# app.py - Hugging Face Space App (PDF Summarizer & QnA)
	import os
	import gc
	import tempfile
	import gradio as gr
	import torch
	import numpy as np
	import faiss
	from typing import Tuple, Dict, Any, Optional
	import spaces

	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	# Avoid tokenizer parallelism warnings
	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	# ------------------ CONFIG ------------------ #
	LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
	EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5"
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
	MAX_PROMPT_LENGTH = 28000 # Max characters to feed into the LLM

	# ------------------ PROMPT TEMPLATES ------------------ #
	QA_PROMPT_TEMPLATE = (
	"System: You are a helpful assistant. Answer the user's question based only on the provided context. "
	"If the answer is not found in the context, state that clearly.\n\n"
	"Context:\n---\n{context}\n---\n\nQuestion: {question}\n\nAnswer:"
	)

	SUMMARY_PROMPTS = {
	"Quick": (
	"You are an expert academic summarizer. Provide a single, concise paragraph that summarizes the absolute key takeaway of the following document. "
	"Be brief and direct.\n\nDocument:\n---\n{text}\n---\n\nQuick Summary:"
	),
	"Standard": (
	"You are an expert academic summarizer. Provide a detailed, well-structured summary of the following document. "
	"Cover the key points, methodology, findings, and conclusions.\n\n"
	"Document:\n---\n{text}\n---\n\nStandard Summary:"
	),
	"Detailed": (
	"You are an expert academic summarizer. Provide a highly detailed and comprehensive summary of the following document. "
	"Go into depth on the methodology, specific results, limitations, and any mention of future work. Use multiple paragraphs for structure.\n\n"
	"Document:\n---\n{text}\n---\n\nDetailed Summary:"
	)
	}

	# ------------------ MEMORY & MODEL MANAGEMENT ------------------ #
	class ModelManager:
	_llm_pipe = None
	_embed_model = None

	@classmethod
	def _clear_gpu_memory(cls):
	"""Frees up GPU memory by deleting models and clearing the cache."""
	models = [cls._llm_pipe, cls._embed_model]
	for model in models:
	if model:
	try:
	del model
	except Exception:
	pass
	cls._llm_pipe = None
	cls._embed_model = None
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	print("[Memory] GPU Memory Cleared.")

	@classmethod
	def get_llm_pipeline(cls):
	"""Loads and returns the LLM pipeline, ensuring no other models are loaded."""
	if cls._llm_pipe is None:
	cls._clear_gpu_memory()
	print("[LLM] Loading model...")
	try:
	tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(
	LLM_MODEL_NAME,
	device_map=DEVICE,
	torch_dtype=torch.bfloat16
	)
	cls._llm_pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=1024,
	temperature=0.2,
	top_p=0.95,
	)
	print("[LLM] Model loaded successfully.")
	except Exception as e:
	print(f"[LLM] Failed to load model: {e}")
	return None
	return cls._llm_pipe

	@classmethod
	def get_embedding_model(cls):
	"""Loads and returns the embedding model, ensuring the LLM is not loaded."""
	# import locally to avoid import-time cost if not needed
	from langchain_huggingface import HuggingFaceEmbeddings
	if cls._embed_model is None:
	cls._clear_gpu_memory()
	print("[Embed] Loading embedding model...")
	try:
	cls._embed_model = HuggingFaceEmbeddings(
	model_name=EMBED_MODEL_NAME,
	model_kwargs={"device": DEVICE},
	encode_kwargs={"normalize_embeddings": True}
	)
	print("[Embed] Embedding model loaded successfully.")
	except Exception as e:
	print(f"[Embed] Failed to load model: {e}")
	return None
	return cls._embed_model

	# ------------------ CORE LOGIC FUNCTIONS ------------------ #
	@spaces.GPU
	def invoke_llm(prompt_str: str) -> str:
	"""Invokes the LLM with a given prompt."""
	if len(prompt_str) > MAX_PROMPT_LENGTH:
	prompt_str = prompt_str[:MAX_PROMPT_LENGTH]
	print(f"[invoke_llm] Prompt truncated to {MAX_PROMPT_LENGTH} characters.")

	try:
	pipe = ModelManager.get_llm_pipeline()
	if not pipe:
	return "Error: LLM could not be loaded."

	with torch.no_grad():
	outputs = pipe(prompt_str)

	if isinstance(outputs, list) and outputs and "generated_text" in outputs[0]:
	# remove the prompt if the model echoed it
	return outputs[0]["generated_text"].replace(prompt_str, "").strip()
	return "No valid response was generated."

	except Exception as e:
	print(f"[invoke_llm] Error: {e}")
	return f"LLM invocation failed: {e}"

	@spaces.GPU
	def process_pdf_and_index(pdf_path: str) -> Tuple[str, Optional[Dict[str, Any]]]:
	"""Processes a PDF, creates embeddings, and builds a FAISS index."""
	from langchain_community.document_loaders import PyMuPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter

	if not pdf_path:
	return "No file path provided.", None

	try:
	print("[Process] Loading and splitting PDF...")
	docs = PyMuPDFLoader(pdf_path).load()
	chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150).split_documents(docs)
	texts = [c.page_content for c in chunks if c.page_content.strip()]

	if not texts:
	return "No text could be extracted from the PDF.", None
	print(f"[Process] Extracted {len(texts)} text chunks.")

	embed_model = ModelManager.get_embedding_model()
	if not embed_model:
	return "Could not load embedding model.", None

	print(f"[Process] Creating embeddings...")
	embeddings = embed_model.embed_documents(texts)
	emb_np = np.array(embeddings, dtype=np.float32)

	print("[Process] Building and saving FAISS index...")
	index = faiss.IndexFlatL2(emb_np.shape[1])
	index.add(emb_np)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".faiss") as f:
	index_path = f.name
	faiss.write_index(index, index_path)

	state_bundle = {"index_path": index_path, "texts": texts}
	return f"Successfully processed and indexed {len(texts)} chunks.", state_bundle

	except Exception as e:
	print(f"[process_pdf] Exception: {e}")
	return f"Error processing PDF: {e}", None

	@spaces.GPU
	def retrieve_and_answer(question: str, state_bundle: Dict[str, Any]) -> Tuple[str, str]:
	"""Retrieves context and generates an answer for a given question."""
	if not (state_bundle and "index_path" in state_bundle):
	return "Please upload and process a PDF first.", ""

	try:
	embed_model = ModelManager.get_embedding_model()
	if not embed_model:
	return "Error loading embedding model.", ""

	index = faiss.read_index(state_bundle["index_path"])
	texts = state_bundle.get("texts", [])

	query_embedding = embed_model.embed_query(question)
	q_arr = np.array([query_embedding], dtype=np.float32)

	_, indices = index.search(q_arr, k=5)

	sources = [texts[idx] for idx in indices[0] if 0 <= idx < len(texts)]
	if not sources:
	return "Could not find relevant information.", ""

	context = "\n\n---\n\n".join(sources)
	sources_preview = "\n\n---\n\n".join(s[:500] + "..." for s in sources)

	prompt = QA_PROMPT_TEMPLATE.format(context=context, question=question)
	answer = invoke_llm(prompt)

	return answer, sources_preview

	except Exception as e:
	print(f"[retrieve_and_answer] Error: {e}")
	return f"An error occurred: {e}", ""

	@spaces.GPU
	def summarize_document(state_bundle: Dict[str, Any], summary_type: str) -> Tuple[str, Optional[str]]:
	"""Generates a summary of the document and saves it to a temporary file."""
	if not (state_bundle and "texts" in state_bundle):
	return "Please upload and process a PDF first.", None

	texts = state_bundle.get("texts", [])
	if not texts:
	return "No text available to summarize.", None

	full_text = "\n\n".join(texts)

	prompt_template = SUMMARY_PROMPTS.get(summary_type, SUMMARY_PROMPTS["Standard"])
	prompt = prompt_template.format(text=full_text)

	print(f"[Summarize] Generating '{summary_type}' summary...")
	final_summary = invoke_llm(prompt)

	# Save the summary to a temporary text file
	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
	temp_file.write(final_summary)
	temp_file.close()

	return final_summary, temp_file.name

	# ------------------ GRADIO UI ------------------ #
	with gr.Blocks(title="PDF Summarizer & Assistant", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📚 PDF Summarizer & Q&A Assistant")
	gr.Markdown("Upload a PDF to generate a summary or ask questions about its content.")

	state = gr.State()

	with gr.Row():
	pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
	process_btn = gr.Button("Process PDF", variant="primary")

	status_output = gr.Textbox(label="Status", interactive=False)

	with gr.Tabs():
	with gr.TabItem("Summarization"):
	gr.Markdown("### Generate a Summary")
	gr.Markdown("Select the level of detail you want in the summary.")
	summary_type_radio = gr.Radio(
	choices=["Quick", "Standard", "Detailed"],
	value="Standard",
	label="Summary Type"
	)
	summary_btn = gr.Button("Generate Summary", variant="secondary")
	out_summary = gr.Textbox(label="Document Summary", lines=20, max_lines=25)
	download_btn = gr.DownloadButton("Download Summary", visible=False)

	with gr.TabItem("Question & Answer"):
	gr.Markdown("### Ask a Question")
	gr.Markdown("Ask a specific question about the document's content.")
	q_text = gr.Textbox(label="Your Question", placeholder="e.g., What was the main conclusion of the study?")
	q_btn = gr.Button("Get Answer", variant="secondary")
	q_out = gr.Textbox(label="Answer", lines=8)
	q_sources = gr.Textbox(label="Retrieved Sources", lines=8, max_lines=10)

	# Event Handlers - wrapper for process to return exactly the outputs wired to the UI
	def handle_process(pdf_file):
	"""Wrapper to handle PDF processing and clear old outputs."""
	if pdf_file is None:
	return "Please upload a file first.", None, "", "", "", "", None
	status_msg, bundle = process_pdf_and_index(pdf_file.name)
	# return: status, state, out_summary, q_text, q_out, q_sources, download_file
	return status_msg, bundle, "", "", "", "", None

	process_btn.click(
	fn=handle_process,
	inputs=[pdf_in],
	outputs=[status_output, state, out_summary, q_text, q_out, q_sources, download_btn]
	)

	q_btn.click(
	fn=retrieve_and_answer,
	inputs=[q_text, state],
	outputs=[q_out, q_sources]
	)

	summary_btn.click(
	fn=summarize_document,
	inputs=[state, summary_type_radio],
	outputs=[out_summary, download_btn]
	)

	if __name__ == "__main__":
	demo.launch(share=False, show_error=True)