Emeritus-21's picture
Update app.py
2b8d6a4 verified
# app.py - Hugging Face Space App (PDF Summarizer & QnA)
import os
import gc
import tempfile
import gradio as gr
import torch
import numpy as np
import faiss
from typing import Tuple, Dict, Any, Optional
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# Avoid tokenizer parallelism warnings
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# ------------------ CONFIG ------------------ #
LLM_MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct"
EMBED_MODEL_NAME = "BAAI/bge-large-en-v1.5"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MAX_PROMPT_LENGTH = 28000 # Max characters to feed into the LLM
# ------------------ PROMPT TEMPLATES ------------------ #
QA_PROMPT_TEMPLATE = (
"System: You are a helpful assistant. Answer the user's question based *only* on the provided context. "
"If the answer is not found in the context, state that clearly.\n\n"
"Context:\n---\n{context}\n---\n\nQuestion: {question}\n\nAnswer:"
)
SUMMARY_PROMPTS = {
"Quick": (
"You are an expert academic summarizer. Provide a single, concise paragraph that summarizes the absolute key takeaway of the following document. "
"Be brief and direct.\n\nDocument:\n---\n{text}\n---\n\nQuick Summary:"
),
"Standard": (
"You are an expert academic summarizer. Provide a detailed, well-structured summary of the following document. "
"Cover the key points, methodology, findings, and conclusions.\n\n"
"Document:\n---\n{text}\n---\n\nStandard Summary:"
),
"Detailed": (
"You are an expert academic summarizer. Provide a highly detailed and comprehensive summary of the following document. "
"Go into depth on the methodology, specific results, limitations, and any mention of future work. Use multiple paragraphs for structure.\n\n"
"Document:\n---\n{text}\n---\n\nDetailed Summary:"
)
}
# ------------------ MEMORY & MODEL MANAGEMENT ------------------ #
class ModelManager:
_llm_pipe = None
_embed_model = None
@classmethod
def _clear_gpu_memory(cls):
"""Frees up GPU memory by deleting models and clearing the cache."""
models = [cls._llm_pipe, cls._embed_model]
for model in models:
if model:
try:
del model
except Exception:
pass
cls._llm_pipe = None
cls._embed_model = None
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
print("[Memory] GPU Memory Cleared.")
@classmethod
def get_llm_pipeline(cls):
"""Loads and returns the LLM pipeline, ensuring no other models are loaded."""
if cls._llm_pipe is None:
cls._clear_gpu_memory()
print("[LLM] Loading model...")
try:
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
LLM_MODEL_NAME,
device_map=DEVICE,
torch_dtype=torch.bfloat16
)
cls._llm_pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=1024,
temperature=0.2,
top_p=0.95,
)
print("[LLM] Model loaded successfully.")
except Exception as e:
print(f"[LLM] Failed to load model: {e}")
return None
return cls._llm_pipe
@classmethod
def get_embedding_model(cls):
"""Loads and returns the embedding model, ensuring the LLM is not loaded."""
# import locally to avoid import-time cost if not needed
from langchain_huggingface import HuggingFaceEmbeddings
if cls._embed_model is None:
cls._clear_gpu_memory()
print("[Embed] Loading embedding model...")
try:
cls._embed_model = HuggingFaceEmbeddings(
model_name=EMBED_MODEL_NAME,
model_kwargs={"device": DEVICE},
encode_kwargs={"normalize_embeddings": True}
)
print("[Embed] Embedding model loaded successfully.")
except Exception as e:
print(f"[Embed] Failed to load model: {e}")
return None
return cls._embed_model
# ------------------ CORE LOGIC FUNCTIONS ------------------ #
@spaces.GPU
def invoke_llm(prompt_str: str) -> str:
"""Invokes the LLM with a given prompt."""
if len(prompt_str) > MAX_PROMPT_LENGTH:
prompt_str = prompt_str[:MAX_PROMPT_LENGTH]
print(f"[invoke_llm] Prompt truncated to {MAX_PROMPT_LENGTH} characters.")
try:
pipe = ModelManager.get_llm_pipeline()
if not pipe:
return "Error: LLM could not be loaded."
with torch.no_grad():
outputs = pipe(prompt_str)
if isinstance(outputs, list) and outputs and "generated_text" in outputs[0]:
# remove the prompt if the model echoed it
return outputs[0]["generated_text"].replace(prompt_str, "").strip()
return "No valid response was generated."
except Exception as e:
print(f"[invoke_llm] Error: {e}")
return f"LLM invocation failed: {e}"
@spaces.GPU
def process_pdf_and_index(pdf_path: str) -> Tuple[str, Optional[Dict[str, Any]]]:
"""Processes a PDF, creates embeddings, and builds a FAISS index."""
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
if not pdf_path:
return "No file path provided.", None
try:
print("[Process] Loading and splitting PDF...")
docs = PyMuPDFLoader(pdf_path).load()
chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150).split_documents(docs)
texts = [c.page_content for c in chunks if c.page_content.strip()]
if not texts:
return "No text could be extracted from the PDF.", None
print(f"[Process] Extracted {len(texts)} text chunks.")
embed_model = ModelManager.get_embedding_model()
if not embed_model:
return "Could not load embedding model.", None
print(f"[Process] Creating embeddings...")
embeddings = embed_model.embed_documents(texts)
emb_np = np.array(embeddings, dtype=np.float32)
print("[Process] Building and saving FAISS index...")
index = faiss.IndexFlatL2(emb_np.shape[1])
index.add(emb_np)
with tempfile.NamedTemporaryFile(delete=False, suffix=".faiss") as f:
index_path = f.name
faiss.write_index(index, index_path)
state_bundle = {"index_path": index_path, "texts": texts}
return f"Successfully processed and indexed {len(texts)} chunks.", state_bundle
except Exception as e:
print(f"[process_pdf] Exception: {e}")
return f"Error processing PDF: {e}", None
@spaces.GPU
def retrieve_and_answer(question: str, state_bundle: Dict[str, Any]) -> Tuple[str, str]:
"""Retrieves context and generates an answer for a given question."""
if not (state_bundle and "index_path" in state_bundle):
return "Please upload and process a PDF first.", ""
try:
embed_model = ModelManager.get_embedding_model()
if not embed_model:
return "Error loading embedding model.", ""
index = faiss.read_index(state_bundle["index_path"])
texts = state_bundle.get("texts", [])
query_embedding = embed_model.embed_query(question)
q_arr = np.array([query_embedding], dtype=np.float32)
_, indices = index.search(q_arr, k=5)
sources = [texts[idx] for idx in indices[0] if 0 <= idx < len(texts)]
if not sources:
return "Could not find relevant information.", ""
context = "\n\n---\n\n".join(sources)
sources_preview = "\n\n---\n\n".join(s[:500] + "..." for s in sources)
prompt = QA_PROMPT_TEMPLATE.format(context=context, question=question)
answer = invoke_llm(prompt)
return answer, sources_preview
except Exception as e:
print(f"[retrieve_and_answer] Error: {e}")
return f"An error occurred: {e}", ""
@spaces.GPU
def summarize_document(state_bundle: Dict[str, Any], summary_type: str) -> Tuple[str, Optional[str]]:
"""Generates a summary of the document and saves it to a temporary file."""
if not (state_bundle and "texts" in state_bundle):
return "Please upload and process a PDF first.", None
texts = state_bundle.get("texts", [])
if not texts:
return "No text available to summarize.", None
full_text = "\n\n".join(texts)
prompt_template = SUMMARY_PROMPTS.get(summary_type, SUMMARY_PROMPTS["Standard"])
prompt = prompt_template.format(text=full_text)
print(f"[Summarize] Generating '{summary_type}' summary...")
final_summary = invoke_llm(prompt)
# Save the summary to a temporary text file
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8")
temp_file.write(final_summary)
temp_file.close()
return final_summary, temp_file.name
# ------------------ GRADIO UI ------------------ #
with gr.Blocks(title="PDF Summarizer & Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 📚 PDF Summarizer & Q&A Assistant")
gr.Markdown("Upload a PDF to generate a summary or ask questions about its content.")
state = gr.State()
with gr.Row():
pdf_in = gr.File(label="Upload PDF", file_types=[".pdf"], type="filepath")
process_btn = gr.Button("Process PDF", variant="primary")
status_output = gr.Textbox(label="Status", interactive=False)
with gr.Tabs():
with gr.TabItem("Summarization"):
gr.Markdown("### Generate a Summary")
gr.Markdown("Select the level of detail you want in the summary.")
summary_type_radio = gr.Radio(
choices=["Quick", "Standard", "Detailed"],
value="Standard",
label="Summary Type"
)
summary_btn = gr.Button("Generate Summary", variant="secondary")
out_summary = gr.Textbox(label="Document Summary", lines=20, max_lines=25)
download_btn = gr.DownloadButton("Download Summary", visible=False)
with gr.TabItem("Question & Answer"):
gr.Markdown("### Ask a Question")
gr.Markdown("Ask a specific question about the document's content.")
q_text = gr.Textbox(label="Your Question", placeholder="e.g., What was the main conclusion of the study?")
q_btn = gr.Button("Get Answer", variant="secondary")
q_out = gr.Textbox(label="Answer", lines=8)
q_sources = gr.Textbox(label="Retrieved Sources", lines=8, max_lines=10)
# Event Handlers - wrapper for process to return exactly the outputs wired to the UI
def handle_process(pdf_file):
"""Wrapper to handle PDF processing and clear old outputs."""
if pdf_file is None:
return "Please upload a file first.", None, "", "", "", "", None
status_msg, bundle = process_pdf_and_index(pdf_file.name)
# return: status, state, out_summary, q_text, q_out, q_sources, download_file
return status_msg, bundle, "", "", "", "", None
process_btn.click(
fn=handle_process,
inputs=[pdf_in],
outputs=[status_output, state, out_summary, q_text, q_out, q_sources, download_btn]
)
q_btn.click(
fn=retrieve_and_answer,
inputs=[q_text, state],
outputs=[q_out, q_sources]
)
summary_btn.click(
fn=summarize_document,
inputs=[state, summary_type_radio],
outputs=[out_summary, download_btn]
)
if __name__ == "__main__":
demo.launch(share=False, show_error=True)