Spaces:

crossroderick
/

semanticdala

Sleeping

App Files Files Community

crossroderick commited on May 6

Commit

2341eb2

1 Parent(s): 9d99321

Removed the logging code

Browse files

Files changed (1) hide show

app.py +18 -84

app.py CHANGED Viewed

@@ -26,59 +26,6 @@ vector_db = VectorStore()
 topic_modeller = TopicModeller()
-def print_recent_logs(n: int = 5):
-    """
-    Print the last N log lines to the container logs for developer monitoring.
-    """
-    log_file = "semanticdala_log.csv"
-    if os.path.exists(log_file):
-        print(f"\n[SEMANTICDALA USAGE LOG - Last {n} Entries]")
-        with open(log_file, "r") as f:
-            lines = f.readlines()
-            for line in lines[-n:]:
-                print(line.strip())
-        print("[END LOG SNAPSHOT]\n")
-def log_submission(filename: str, num_chunks: int, start_time: float, status: str, session_id: str = "anonymous") -> None:
-    """
-    Basic logging utility to keep track of app usage.
-    """
-    log_file = "semanticdala_log.csv"
-    end_time = time.time()
-    duration = round(end_time - start_time, 2)
-    # Anonymise filename for privacy
-    anonymized_name = hashlib.sha256(filename.encode()).hexdigest()[:10]
-    # Get file size in bytes
-    file_size = os.path.getsize(filename) if os.path.exists(filename) else 0
-    file_size_mb = round(file_size / (1024 * 1024), 2)
-    log_entry = {
-        "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
-        "filename_hash": anonymized_name,
-        "file_size_mb": file_size_mb,
-        "num_chunks": num_chunks,
-        "processing_time_sec": duration,
-        "status": status,
-        "session_id": session_id
-    }
-    file_exists = os.path.isfile(log_file)
-    with open(log_file, mode = 'a', newline = "") as f:
-        writer = csv.DictWriter(f, fieldnames = log_entry.keys())
-        if not file_exists:
-            writer.writeheader()
-        writer.writerow(log_entry)
 def extract_text(file: Any) -> str:
     """
@@ -105,45 +52,32 @@ def process_file(file: Any) -> Tuple[List[Tuple[str, int]], Any, Any]:
     Main file processing function, which will also chunk, transliterate and cluster
     the file contents, as well as plot the clusters.
     """
-    start = time.time()
-    try:
-        raw_text = extract_text(file)
-        chunks = chunk_text(raw_text)
-        # Deduplicate and embed embedding
-        translits = translit.batch_transliterate(chunks)
-        dedup_translits = deduplicate_chunks(translits, embedder)
-        embeddings = embedder.embed_batch(dedup_translits)
-        # Clear previous entries before adding
-        vector_db.index.reset()
-        vector_db.metadata = []
-        metadata = [{"id": f"{file.name}_chunk{i}", "text": t} for i, t in enumerate(dedup_translits)]
-        vector_db.add(embeddings, metadata)
-        # Topic modelling
-        topics, fig, topic_labels, umap_fig = topic_modeller.fit(dedup_translits, embeddings)
-        # Get a list of rows for topic labels
-        overview_table = [[k, v] for k, v in topic_labels.items()]
-        # Zip back transliterated text with topic IDs
-        annotated = list(zip(dedup_translits, topics))
-        # Log success
-        log_submission(file.name, len(chunks), start, status = "success")
-        print_recent_logs()
-        return annotated, fig, overview_table, umap_fig
-    except Exception as e:
-        log_submission(file.name, 0, start, status = f"error: {str(e)}")
-        print_recent_logs()
-        raise e
 def search_text(query: str):

 topic_modeller = TopicModeller()
 def extract_text(file: Any) -> str:
     """
     Main file processing function, which will also chunk, transliterate and cluster
     the file contents, as well as plot the clusters.
     """
+    raw_text = extract_text(file)
+    chunks = chunk_text(raw_text)
+    # Deduplicate and embed embedding
+    translits = translit.batch_transliterate(chunks)
+    dedup_translits = deduplicate_chunks(translits, embedder)
+    embeddings = embedder.embed_batch(dedup_translits)
+    # Clear previous entries before adding
+    vector_db.index.reset()
+    vector_db.metadata = []
+    metadata = [{"id": f"{file.name}_chunk{i}", "text": t} for i, t in enumerate(dedup_translits)]
+    vector_db.add(embeddings, metadata)
+    # Topic modelling
+    topics, fig, topic_labels, umap_fig = topic_modeller.fit(dedup_translits, embeddings)
+    # Get a list of rows for topic labels
+    overview_table = [[k, v] for k, v in topic_labels.items()]
+    # Zip back transliterated text with topic IDs
+    annotated = list(zip(dedup_translits, topics))
+    return annotated, fig, overview_table, umap_fig
 def search_text(query: str):