Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Test BERTopic API endpoint on HF Spaces. | |
| """ | |
| import requests | |
| import json | |
| # HF Spaces URL | |
| API_URL = "https://zedwrkc-news-stance-detection.hf.space" | |
| # Test data | |
| TEST_ARTICLES = [ | |
| # ๋ถ๋์ฐ ๋์ฑ | |
| { | |
| "article_id": 1, | |
| "title": "๋ณต๊ธฐ์ \"15์ต ์ด์ ์ฃผํ์ ์๋ง\"...์๋ฏผ๋ค ๊ฐ์ด์ ๋ ๋ถ์ง๋ฅด๋", | |
| "summary": "๊ตญํ ๊ตญํ ๊ตํต์์ํ ์ฌ๋น ๊ฐ์ฌ์ธ ๋๋ถ์ด๋ฏผ์ฃผ๋น ๋ณต๊ธฐ์ ์์์ 10ยท15 ๋ถ๋์ฐ ๋์ฑ ์ '์ฌ๋ค๋ฆฌ ๊ฑท์ด์ฐจ๊ธฐ'๋ผ๊ณ ํ๋ ๊ฑด ์ค์ฒด๊ฐ ์๋ ๊ณต๊ฒฉ์ด๋ผ๊ณ ๋นํํ๋ค." | |
| }, | |
| { | |
| "article_id": 2, | |
| "title": "่๋ณต๊ธฐ์ \"15์ต์ด ์๋ฏผ ์ํํธ\"โฆ๊ตญํ \"์ง ๋ชป์ฐ ๋ ์ฒ๋ฏผ์ด๋\"", | |
| "summary": "๊ตญํ ๊ตญํ ๊ตํต์ ๋๋ถ์ด๋ฏผ์ฃผ๋น ๊ฐ์ฌ์ธ ๋ณต๊ธฐ์ ์์์ 23์ผ 10ยท15 ๋ถ๋์ฐ ๋์ฑ ์ ๋ํ '์ฌ๋ค๋ฆฌ ๊ฑท์ด์ฐจ๊ธฐ' ๋นํ์ ๋ฐ๋ฐํ๋ ๊ณผ์ ์์ '15์ต ์ ๋๋ ์๋ฏผ ์ํํธ'๋ผ๊ณ ๋ฐ์ธํ๋ค." | |
| }, | |
| { | |
| "article_id": 3, | |
| "title": "์ฌ์ผ, ๊ตญํ ์์ ์ด์๊ฒฝ '๋ถ๋์ฐ ๋ฐ์ธ' ์งํโฆ์ฌํด์ด๊ตฌ์์ ์ด๊ฒฌ", | |
| "summary": "23์ผ ๊ตญํ ๊ตญํ ๊ตํต์์ํ ๊ตญ์ ๊ฐ์ฌ์์ 10ยท15 ๋ถ๋์ฐ ๋์ฑ ๊ณผ ๊ด๋ จํด ๋ ผ๋์ด ๋ ์ด์๊ฒฝ ๊ตญํ ๊ตํต๋ถ ์ 1์ฐจ๊ด์ ๋ํด ์ฌ์ผ๊ฐ ์งํํ๋ค." | |
| }, | |
| # ํ๋ฏธ ๊ด์ธํ์ | |
| { | |
| "article_id": 4, | |
| "title": "ๆ๋ํต๋ น \"๊ด์ธํ์, ์๊ฐ ๊ฑธ๋ฆฌ๋๋ผ๋ ํฉ๋ฆฌ์ ๊ฒฐ๊ณผ ๋๋ฌํ ๊ฒ\"", | |
| "summary": "์ด์ฌ๋ช ๋ํต๋ น์ ๊ฒฝ์ฃผ APEC ์ ์ํ์์์ ํ๋ฏธ ๊ด์ธํ์์ด ํ๊ฒฐ๋ ๊ฐ๋ฅ์ฑ์ ๋ํด ํฉ๋ฆฌ์ ์ธ ๊ฒฐ๊ณผ์ ์ด๋ฅด๊ฒ ๋ ๊ฒ์ด๋ผ๊ณ ํ์ ํ๋ค๊ณ ๋งํ๋ค." | |
| }, | |
| { | |
| "article_id": 5, | |
| "title": "\"ํยท๋ฏธ๋ ๋๋งน์ด๊ณ ์์ ๊ฐ๊ณ ์์ด, ๊ด์ธํ์์ ํฉ๋ฆฌ์ ๊ฒฐ๊ณผ ๋์ฌ ๊ฒ\"", | |
| "summary": "ํ๋ฏธ ์๊ตญ์ ๋๋งน ๊ด๊ณ์ด๋ฉฐ ์์์ ๊ฐ๊ณ ์์ด ๊ด์ธํ์์์ ํฉ๋ฆฌ์ ์ธ ๊ฒฐ๊ณผ๊ฐ ๋์ฌ ๊ฒ์ด๋ผ๋ ์ ๋ง์ด ๋์๋ค." | |
| }, | |
| { | |
| "article_id": 6, | |
| "title": "ํธ๋ผํ ๊ด์ธ ์ ์ฑ , ํ๋ฏธ ํ์ ๋ํญ ์์", | |
| "summary": "ํธ๋ผํ ํ์ ๋ถ์ ๊ด์ธ ์ ์ฑ ์ผ๋ก ์ธํด ํ๋ฏธ ๋ฌด์ญ ํ์์ด ๋ํญ์ ๊ฒช์ ๊ฒ์ผ๋ก ์์๋๋ค." | |
| }, | |
| # ๋ถํ ๋ฏธ์ฌ์ผ | |
| { | |
| "article_id": 7, | |
| "title": "๋ถํ \"์๋ฌด๊ธฐ์ฒด๊ณ ๊ทน์ด์์ ๋นํ์ฒด ์ํ๋ฐ์ฌ\"...๊น์ ์ ์ฐธ๊ด ์ํด", | |
| "summary": "๋ถํ์ด ๋ฏธ์ฌ์ผ ๋ฌด๊ธฐ์ฒด๊ณ์ธ ๊ทน์ด์์ ๋ฏธ์ฌ์ผ์ ์ํ ๋ฐ์ฌํ๋ค๊ณ ๋ฐํ๋ค." | |
| }, | |
| { | |
| "article_id": 8, | |
| "title": "๋ถํ \"์๋ก์ด ๋ฌด๊ธฐ์ฒด๊ณ ๋ฐ์ฌ...๋ชฉํ์ ๊ฐํ\"", | |
| "summary": "๋ถํ์ด ์๋ก์ด ๋ฌด๊ธฐ์ฒด๊ณ๋ฅผ ๋ฐ์ฌํ์ผ๋ฉฐ ๋ชฉํ์ ์ ์ ํํ ๊ฐํํ๋ค๊ณ ๋ฐํํ๋ค." | |
| }, | |
| { | |
| "article_id": 9, | |
| "title": "๋ถ \"๊ทน์ด์์ ๋นํ์ฒด ์ํ๋ฐ์ฌ\"...๊น์ ์ ์ฐธ๊ด ์ ํด", | |
| "summary": "๋ถํ์ด ๊ทน์ด์์ ๋นํ์ฒด๋ฅผ ์ํ๋ฐ์ฌํ์ผ๋ ๊น์ ์ ์์์ฅ์ ์ฐธ๊ดํ์ง ์์๋ค." | |
| }, | |
| # ์ฌ๊ฐ๋ถ ํ์ง | |
| { | |
| "article_id": 10, | |
| "title": "'๋ณธ๋ถ'๋ก ๊ฒฉํ๋ ์ฌ๊ฐ๋ถ...\"๊ณต๋ฃก ๋ณต์ง๋ถ์์ ์ฑํ๋ฑ ์ ์ฑ ๋ฌปํ ๊ฒ\" ์ฐ๋ ค", | |
| "summary": "์ ๋ถ์กฐ์ง ๊ฐํธ์์ ๋ฐ๋ผ ์ฌ์ฑ๊ฐ์กฑ๋ถ๊ฐ ์ถ๋ฒ 21๋ ๋ง์ ๋ ๋ฆฝ๋ถ์ฒ์์ ๋ณด๊ฑด๋ณต์ง๋ถ ์ฐํ ๋ณธ๋ถ๋ก ๊ฒฉํ๋ ์๊ธฐ์ ์ฒํ๋ค." | |
| }, | |
| { | |
| "article_id": 11, | |
| "title": "์ฌ๊ฐ๋ถ ํ์ง ์ ๋ถ์กฐ์ง๊ฐํธ์ ํ์ ", | |
| "summary": "์ ๋ถ๊ฐ ์ฌ์ฑ๊ฐ์กฑ๋ถ๋ฅผ ํ์งํ๊ณ ๋ณด๊ฑด๋ณต์ง๋ถ ์ฐํ ๋ณธ๋ถ๋ก ๊ฒฉํํ๋ ์ ๋ถ์กฐ์ง๊ฐํธ์์ ํ์ ํ๋ค." | |
| }, | |
| { | |
| "article_id": 12, | |
| "title": "์ฌ์ฑ๊ณ, ์ฌ๊ฐ๋ถ ํ์ง ๋ฐ๋ ๋ชฉ์๋ฆฌ", | |
| "summary": "์ฌ์ฑ๊ณ์์ ์ฌ์ฑ๊ฐ์กฑ๋ถ ํ์ง์ ๋ํ ๋ฐ๋ ๋ชฉ์๋ฆฌ๊ฐ ๋์์ง๊ณ ์๋ค." | |
| }, | |
| # ํด๊ตฐ ์ ๋ ฅ๋ชจํจ | |
| { | |
| "article_id": 13, | |
| "title": "ํด๊ตฐ \"3๋งt๊ธ ํ๊ตญํ ์ ยท๋ฌด์ธ ์ ๋ ฅ๋ชจํจ ํ๋ณด ์ถ์ง\"", | |
| "summary": "ํด๊ตฐ์ด ์ ์ธ๊ธฐ ์ด์ฉ ์์ฃผ์ ๊ฒฝํญ๋ชจ ๋์ 3๋งํค๊ธ ํ๊ตญํ ์ ๋ฌด์ธ ์ ๋ ฅ๋ชจํจ์ 2030๋ ๋ ํ๋ฐ๊น์ง ํ๋ณดํ๋ ๊ณํ์ ๋ณด๊ณ ํ๋ค." | |
| }, | |
| { | |
| "article_id": 14, | |
| "title": "ํด๊ตฐ \"๊ฒฝํญ๋ชจ ๋์ 3๋ง ํค๊ธ ์ ๋ ฅ๋ชจํจ ํ๋ณด ์ถ์ง\"", | |
| "summary": "ํด๊ตฐ์ด ๊ฒฝํญ๋ชจ ๋์ ์ ๋ฌด์ธ ๊ฒธ์ฉ ์ ๋ ฅ๋ชจํจ ํ๋ณด๋ฅผ ์ถ์งํ๋ค๊ณ ๋ฐํ๋ค." | |
| }, | |
| { | |
| "article_id": 15, | |
| "title": "ํด๊ตฐ, ๊ฒฝํญ๋ชจ ๋์ ์ ๋ ฅ๋ชจํจ ๊ฑด์กฐ ์ถ์ง", | |
| "summary": "ํด๊ตฐ์ด ๊ฒฝํญ๋ชจ ๊ฑด์กฐ ๊ณํ์ ์ ๋ ฅ๋ชจํจ์ผ๋ก ๋ณ๊ฒฝํ์ฌ ์ถ์งํ๊ณ ์๋ค." | |
| } | |
| ] | |
| def test_bertopic_api(): | |
| """Test BERTopic API endpoint.""" | |
| print("="*100) | |
| print("BERTopic API Test") | |
| print("="*100) | |
| print(f"\nAPI URL: {API_URL}") | |
| print(f"Test articles: {len(TEST_ARTICLES)}") | |
| # Check health first | |
| print("\nโณ Checking API health...") | |
| try: | |
| response = requests.get(f"{API_URL}/health", timeout=10) | |
| if response.status_code == 200: | |
| print("โ API is healthy") | |
| health = response.json() | |
| print(f" Embedding model: {health.get('embedding_model')}") | |
| else: | |
| print(f"โ Health check failed: {response.status_code}") | |
| return | |
| except Exception as e: | |
| print(f"โ Cannot connect to API: {e}") | |
| return | |
| # Call BERTopic endpoint | |
| print("\nโณ Calling /bertopic-clustering...") | |
| payload = { | |
| "articles": TEST_ARTICLES, | |
| "min_topic_size": 2, | |
| "nr_topics": "auto" | |
| } | |
| try: | |
| response = requests.post( | |
| f"{API_URL}/bertopic-clustering", | |
| json=payload, | |
| timeout=120 | |
| ) | |
| if response.status_code != 200: | |
| print(f"โ API call failed: {response.status_code}") | |
| print(f" Error: {response.text}") | |
| return | |
| result = response.json() | |
| print("โ BERTopic clustering completed!") | |
| # Display results | |
| print(f"\n{'='*100}") | |
| print("RESULTS") | |
| print(f"{'='*100}") | |
| print(f"\n๐ Statistics:") | |
| print(f" Total topics: {result['total_topics']}") | |
| print(f" Total articles: {result['total_articles']}") | |
| print(f" Outliers: {result['outliers']}") | |
| print(f" Processing time: {result['processing_time_seconds']}s") | |
| print(f"\n{'โ'*100}") | |
| print("Topics:") | |
| print(f"{'โ'*100}") | |
| for topic in result['topics']: | |
| topic_id = topic['topic_id'] | |
| topic_title = topic['topic_title'] | |
| count = topic['article_count'] | |
| if topic_id == -1: | |
| print(f"\n๐ธ Topic -1 (Outliers): {count} articles") | |
| print(f" Article IDs: {topic['article_ids']}") | |
| continue | |
| print(f"\n๐น Topic {topic_id}: {topic_title} ({count} articles)") | |
| # Keywords | |
| if topic['keywords']: | |
| print(f" Keywords:") | |
| for kw in topic['keywords']: | |
| print(f" - {kw['keyword']:30} (score: {kw['score']:.3f})") | |
| # Articles | |
| print(f" Article IDs: {topic['article_ids']}") | |
| # Show article titles | |
| for article_id in topic['article_ids']: | |
| article = next((a for a in TEST_ARTICLES if a['article_id'] == article_id), None) | |
| if article: | |
| print(f" - [{article_id}] {article['title'][:60]}...") | |
| # Summary | |
| print(f"\n\n{'='*100}") | |
| print("SUMMARY") | |
| print(f"{'='*100}") | |
| print(f"\nโ BERTopic successfully clustered {result['total_articles']} articles") | |
| print(f" - {result['total_topics']} distinct topics found") | |
| print(f" - {result['outliers']} outliers (acceptable)") | |
| print(f" - Processing time: {result['processing_time_seconds']}s") | |
| print(f"\n๐ก Expected topics:") | |
| print(f" - ๋ถ๋์ฐ ๋์ฑ ") | |
| print(f" - ํ๋ฏธ ๊ด์ธ ํ์") | |
| print(f" - ๋ถํ ๋ฏธ์ฌ์ผ ์ํ ๋ฐ์ฌ") | |
| print(f" - ์ฌ๊ฐ๋ถ ํ์ง") | |
| print(f" - ํด๊ตฐ ์ ๋ ฅ๋ชจํจ ํ๋ณด ์ถ์ง") | |
| print(f"\n๐ Actual topics:") | |
| for topic in result['topics']: | |
| if topic['topic_id'] != -1: | |
| print(f" - Topic {topic['topic_id']}: {topic['topic_title']}") | |
| print("="*100) | |
| except Exception as e: | |
| print(f"โ Error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| print("\nโณ Waiting for HF Spaces to build (2-3 minutes)...") | |
| print("Press Enter when build is complete...") | |
| input() | |
| test_bertopic_api() | |