news-stance-detection / test_bertopic_api.py
ZedwrKc's picture
Initial deployment to Pro account
d12e82b
#!/usr/bin/env python3
"""
Test BERTopic API endpoint on HF Spaces.
"""
import requests
import json
# HF Spaces URL
API_URL = "https://zedwrkc-news-stance-detection.hf.space"
# Test data
TEST_ARTICLES = [
# ๋ถ€๋™์‚ฐ ๋Œ€์ฑ…
{
"article_id": 1,
"title": "๋ณต๊ธฐ์™• \"15์–ต ์ด์ƒ ์ฃผํƒ์€ ์š•๋ง\"...์„œ๋ฏผ๋“ค ๊ฐ€์Šด์— ๋˜ ๋ถˆ์ง€๋ฅด๋‚˜",
"summary": "๊ตญํšŒ ๊ตญํ† ๊ตํ†ต์œ„์›ํšŒ ์—ฌ๋‹น ๊ฐ„์‚ฌ์ธ ๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น ๋ณต๊ธฐ์™• ์˜์›์€ 10ยท15 ๋ถ€๋™์‚ฐ ๋Œ€์ฑ…์„ '์‚ฌ๋‹ค๋ฆฌ ๊ฑท์–ด์ฐจ๊ธฐ'๋ผ๊ณ  ํ•˜๋Š” ๊ฑด ์‹ค์ฒด๊ฐ€ ์—†๋Š” ๊ณต๊ฒฉ์ด๋ผ๊ณ  ๋น„ํŒํ–ˆ๋‹ค."
},
{
"article_id": 2,
"title": "่ˆ‡๋ณต๊ธฐ์™• \"15์–ต์ด ์„œ๋ฏผ ์•„ํŒŒํŠธ\"โ€ฆ๊ตญํž˜ \"์ง‘ ๋ชป์‚ฐ ๋‚œ ์ฒœ๋ฏผ์ด๋ƒ\"",
"summary": "๊ตญํšŒ ๊ตญํ† ๊ตํ†ต์œ„ ๋”๋ถˆ์–ด๋ฏผ์ฃผ๋‹น ๊ฐ„์‚ฌ์ธ ๋ณต๊ธฐ์™• ์˜์›์€ 23์ผ 10ยท15 ๋ถ€๋™์‚ฐ ๋Œ€์ฑ…์— ๋Œ€ํ•œ '์‚ฌ๋‹ค๋ฆฌ ๊ฑท์–ด์ฐจ๊ธฐ' ๋น„ํŒ์„ ๋ฐ˜๋ฐ•ํ•˜๋Š” ๊ณผ์ •์—์„œ '15์–ต ์ •๋„๋Š” ์„œ๋ฏผ ์•„ํŒŒํŠธ'๋ผ๊ณ  ๋ฐœ์–ธํ–ˆ๋‹ค."
},
{
"article_id": 3,
"title": "์—ฌ์•ผ, ๊ตญํ† ์œ„์„œ ์ด์ƒ๊ฒฝ '๋ถ€๋™์‚ฐ ๋ฐœ์–ธ' ์งˆํƒ€โ€ฆ์‚ฌํ‡ด์ด‰๊ตฌ์•ˆ์—” ์ด๊ฒฌ",
"summary": "23์ผ ๊ตญํšŒ ๊ตญํ† ๊ตํ†ต์œ„์›ํšŒ ๊ตญ์ •๊ฐ์‚ฌ์—์„œ 10ยท15 ๋ถ€๋™์‚ฐ ๋Œ€์ฑ…๊ณผ ๊ด€๋ จํ•ด ๋…ผ๋ž€์ด ๋œ ์ด์ƒ๊ฒฝ ๊ตญํ† ๊ตํ†ต๋ถ€ ์ œ1์ฐจ๊ด€์— ๋Œ€ํ•ด ์—ฌ์•ผ๊ฐ€ ์งˆํƒ€ํ–ˆ๋‹ค."
},
# ํ•œ๋ฏธ ๊ด€์„ธํ˜‘์ƒ
{
"article_id": 4,
"title": "ๆŽ๋Œ€ํ†ต๋ น \"๊ด€์„ธํ˜‘์ƒ, ์‹œ๊ฐ„ ๊ฑธ๋ฆฌ๋”๋ผ๋„ ํ•ฉ๋ฆฌ์  ๊ฒฐ๊ณผ ๋„๋‹ฌํ•  ๊ฒƒ\"",
"summary": "์ด์žฌ๋ช… ๋Œ€ํ†ต๋ น์€ ๊ฒฝ์ฃผ APEC ์ •์ƒํšŒ์˜์—์„œ ํ•œ๋ฏธ ๊ด€์„ธํ˜‘์ƒ์ด ํƒ€๊ฒฐ๋  ๊ฐ€๋Šฅ์„ฑ์— ๋Œ€ํ•ด ํ•ฉ๋ฆฌ์ ์ธ ๊ฒฐ๊ณผ์— ์ด๋ฅด๊ฒŒ ๋  ๊ฒƒ์ด๋ผ๊ณ  ํ™•์‹ ํ•œ๋‹ค๊ณ  ๋งํ–ˆ๋‹ค."
},
{
"article_id": 5,
"title": "\"ํ•œยท๋ฏธ๋Š” ๋™๋งน์ด๊ณ  ์ƒ์‹ ๊ฐ–๊ณ  ์žˆ์–ด, ๊ด€์„ธํ˜‘์ƒ์„œ ํ•ฉ๋ฆฌ์  ๊ฒฐ๊ณผ ๋‚˜์˜ฌ ๊ฒƒ\"",
"summary": "ํ•œ๋ฏธ ์–‘๊ตญ์€ ๋™๋งน ๊ด€๊ณ„์ด๋ฉฐ ์ƒ์‹์„ ๊ฐ–๊ณ  ์žˆ์–ด ๊ด€์„ธํ˜‘์ƒ์—์„œ ํ•ฉ๋ฆฌ์ ์ธ ๊ฒฐ๊ณผ๊ฐ€ ๋‚˜์˜ฌ ๊ฒƒ์ด๋ผ๋Š” ์ „๋ง์ด ๋‚˜์™”๋‹ค."
},
{
"article_id": 6,
"title": "ํŠธ๋Ÿผํ”„ ๊ด€์„ธ ์ •์ฑ…, ํ•œ๋ฏธ ํ˜‘์ƒ ๋‚œํ•ญ ์˜ˆ์ƒ",
"summary": "ํŠธ๋Ÿผํ”„ ํ–‰์ •๋ถ€์˜ ๊ด€์„ธ ์ •์ฑ…์œผ๋กœ ์ธํ•ด ํ•œ๋ฏธ ๋ฌด์—ญ ํ˜‘์ƒ์ด ๋‚œํ•ญ์„ ๊ฒช์„ ๊ฒƒ์œผ๋กœ ์˜ˆ์ƒ๋œ๋‹ค."
},
# ๋ถํ•œ ๋ฏธ์‚ฌ์ผ
{
"article_id": 7,
"title": "๋ถํ•œ \"์ƒˆ๋ฌด๊ธฐ์ฒด๊ณ„ ๊ทน์ดˆ์Œ์† ๋น„ํ–‰์ฒด ์‹œํ—˜๋ฐœ์‚ฌ\"...๊น€์ •์€ ์ฐธ๊ด€ ์•ˆํ•ด",
"summary": "๋ถํ•œ์ด ๋ฏธ์‚ฌ์ผ ๋ฌด๊ธฐ์ฒด๊ณ„์ธ ๊ทน์ดˆ์Œ์† ๋ฏธ์‚ฌ์ผ์„ ์‹œํ—˜ ๋ฐœ์‚ฌํ–ˆ๋‹ค๊ณ  ๋ฐํ˜”๋‹ค."
},
{
"article_id": 8,
"title": "๋ถํ•œ \"์ƒˆ๋กœ์šด ๋ฌด๊ธฐ์ฒด๊ณ„ ๋ฐœ์‚ฌ...๋ชฉํ‘œ์  ๊ฐ•ํƒ€\"",
"summary": "๋ถํ•œ์ด ์ƒˆ๋กœ์šด ๋ฌด๊ธฐ์ฒด๊ณ„๋ฅผ ๋ฐœ์‚ฌํ–ˆ์œผ๋ฉฐ ๋ชฉํ‘œ์ ์„ ์ •ํ™•ํžˆ ๊ฐ•ํƒ€ํ–ˆ๋‹ค๊ณ  ๋ฐœํ‘œํ–ˆ๋‹ค."
},
{
"article_id": 9,
"title": "๋ถ \"๊ทน์ดˆ์Œ์† ๋น„ํ–‰์ฒด ์‹œํ—˜๋ฐœ์‚ฌ\"...๊น€์ •์€ ์ฐธ๊ด€ ์•ˆ ํ•ด",
"summary": "๋ถํ•œ์ด ๊ทน์ดˆ์Œ์† ๋น„ํ–‰์ฒด๋ฅผ ์‹œํ—˜๋ฐœ์‚ฌํ–ˆ์œผ๋‚˜ ๊น€์ •์€ ์œ„์›์žฅ์€ ์ฐธ๊ด€ํ•˜์ง€ ์•Š์•˜๋‹ค."
},
# ์—ฌ๊ฐ€๋ถ€ ํ์ง€
{
"article_id": 10,
"title": "'๋ณธ๋ถ€'๋กœ ๊ฒฉํ•˜๋œ ์—ฌ๊ฐ€๋ถ€...\"๊ณต๋ฃก ๋ณต์ง€๋ถ€์—์„œ ์„ฑํ‰๋“ฑ ์ •์ฑ… ๋ฌปํž ๊ฒƒ\" ์šฐ๋ ค",
"summary": "์ •๋ถ€์กฐ์ง ๊ฐœํŽธ์•ˆ์— ๋”ฐ๋ผ ์—ฌ์„ฑ๊ฐ€์กฑ๋ถ€๊ฐ€ ์ถœ๋ฒ” 21๋…„ ๋งŒ์— ๋…๋ฆฝ๋ถ€์ฒ˜์—์„œ ๋ณด๊ฑด๋ณต์ง€๋ถ€ ์‚ฐํ•˜ ๋ณธ๋ถ€๋กœ ๊ฒฉํ•˜๋  ์œ„๊ธฐ์— ์ฒ˜ํ–ˆ๋‹ค."
},
{
"article_id": 11,
"title": "์—ฌ๊ฐ€๋ถ€ ํ์ง€ ์ •๋ถ€์กฐ์ง๊ฐœํŽธ์•ˆ ํ™•์ •",
"summary": "์ •๋ถ€๊ฐ€ ์—ฌ์„ฑ๊ฐ€์กฑ๋ถ€๋ฅผ ํ์ง€ํ•˜๊ณ  ๋ณด๊ฑด๋ณต์ง€๋ถ€ ์‚ฐํ•˜ ๋ณธ๋ถ€๋กœ ๊ฒฉํ•˜ํ•˜๋Š” ์ •๋ถ€์กฐ์ง๊ฐœํŽธ์•ˆ์„ ํ™•์ •ํ–ˆ๋‹ค."
},
{
"article_id": 12,
"title": "์—ฌ์„ฑ๊ณ„, ์—ฌ๊ฐ€๋ถ€ ํ์ง€ ๋ฐ˜๋Œ€ ๋ชฉ์†Œ๋ฆฌ",
"summary": "์—ฌ์„ฑ๊ณ„์—์„œ ์—ฌ์„ฑ๊ฐ€์กฑ๋ถ€ ํ์ง€์— ๋Œ€ํ•œ ๋ฐ˜๋Œ€ ๋ชฉ์†Œ๋ฆฌ๊ฐ€ ๋†’์•„์ง€๊ณ  ์žˆ๋‹ค."
},
# ํ•ด๊ตฐ ์ „๋ ฅ๋ชจํ•จ
{
"article_id": 13,
"title": "ํ•ด๊ตฐ \"3๋งŒt๊ธ‰ ํ•œ๊ตญํ˜• ์œ ยท๋ฌด์ธ ์ „๋ ฅ๋ชจํ•จ ํ™•๋ณด ์ถ”์ง„\"",
"summary": "ํ•ด๊ตฐ์ด ์œ ์ธ๊ธฐ ์šด์šฉ ์œ„์ฃผ์˜ ๊ฒฝํ•ญ๋ชจ ๋Œ€์‹  3๋งŒํ†ค๊ธ‰ ํ•œ๊ตญํ˜• ์œ ๋ฌด์ธ ์ „๋ ฅ๋ชจํ•จ์„ 2030๋…„๋Œ€ ํ›„๋ฐ˜๊นŒ์ง€ ํ™•๋ณดํ•˜๋Š” ๊ณ„ํš์„ ๋ณด๊ณ ํ–ˆ๋‹ค."
},
{
"article_id": 14,
"title": "ํ•ด๊ตฐ \"๊ฒฝํ•ญ๋ชจ ๋Œ€์‹  3๋งŒ ํ†ค๊ธ‰ ์ „๋ ฅ๋ชจํ•จ ํ™•๋ณด ์ถ”์ง„\"",
"summary": "ํ•ด๊ตฐ์ด ๊ฒฝํ•ญ๋ชจ ๋Œ€์‹  ์œ ๋ฌด์ธ ๊ฒธ์šฉ ์ „๋ ฅ๋ชจํ•จ ํ™•๋ณด๋ฅผ ์ถ”์ง„ํ•œ๋‹ค๊ณ  ๋ฐํ˜”๋‹ค."
},
{
"article_id": 15,
"title": "ํ•ด๊ตฐ, ๊ฒฝํ•ญ๋ชจ ๋Œ€์‹  ์ „๋ ฅ๋ชจํ•จ ๊ฑด์กฐ ์ถ”์ง„",
"summary": "ํ•ด๊ตฐ์ด ๊ฒฝํ•ญ๋ชจ ๊ฑด์กฐ ๊ณ„ํš์„ ์ „๋ ฅ๋ชจํ•จ์œผ๋กœ ๋ณ€๊ฒฝํ•˜์—ฌ ์ถ”์ง„ํ•˜๊ณ  ์žˆ๋‹ค."
}
]
def test_bertopic_api():
"""Test BERTopic API endpoint."""
print("="*100)
print("BERTopic API Test")
print("="*100)
print(f"\nAPI URL: {API_URL}")
print(f"Test articles: {len(TEST_ARTICLES)}")
# Check health first
print("\nโณ Checking API health...")
try:
response = requests.get(f"{API_URL}/health", timeout=10)
if response.status_code == 200:
print("โœ… API is healthy")
health = response.json()
print(f" Embedding model: {health.get('embedding_model')}")
else:
print(f"โŒ Health check failed: {response.status_code}")
return
except Exception as e:
print(f"โŒ Cannot connect to API: {e}")
return
# Call BERTopic endpoint
print("\nโณ Calling /bertopic-clustering...")
payload = {
"articles": TEST_ARTICLES,
"min_topic_size": 2,
"nr_topics": "auto"
}
try:
response = requests.post(
f"{API_URL}/bertopic-clustering",
json=payload,
timeout=120
)
if response.status_code != 200:
print(f"โŒ API call failed: {response.status_code}")
print(f" Error: {response.text}")
return
result = response.json()
print("โœ… BERTopic clustering completed!")
# Display results
print(f"\n{'='*100}")
print("RESULTS")
print(f"{'='*100}")
print(f"\n๐Ÿ“Š Statistics:")
print(f" Total topics: {result['total_topics']}")
print(f" Total articles: {result['total_articles']}")
print(f" Outliers: {result['outliers']}")
print(f" Processing time: {result['processing_time_seconds']}s")
print(f"\n{'โ”€'*100}")
print("Topics:")
print(f"{'โ”€'*100}")
for topic in result['topics']:
topic_id = topic['topic_id']
topic_title = topic['topic_title']
count = topic['article_count']
if topic_id == -1:
print(f"\n๐Ÿ”ธ Topic -1 (Outliers): {count} articles")
print(f" Article IDs: {topic['article_ids']}")
continue
print(f"\n๐Ÿ”น Topic {topic_id}: {topic_title} ({count} articles)")
# Keywords
if topic['keywords']:
print(f" Keywords:")
for kw in topic['keywords']:
print(f" - {kw['keyword']:30} (score: {kw['score']:.3f})")
# Articles
print(f" Article IDs: {topic['article_ids']}")
# Show article titles
for article_id in topic['article_ids']:
article = next((a for a in TEST_ARTICLES if a['article_id'] == article_id), None)
if article:
print(f" - [{article_id}] {article['title'][:60]}...")
# Summary
print(f"\n\n{'='*100}")
print("SUMMARY")
print(f"{'='*100}")
print(f"\nโœ… BERTopic successfully clustered {result['total_articles']} articles")
print(f" - {result['total_topics']} distinct topics found")
print(f" - {result['outliers']} outliers (acceptable)")
print(f" - Processing time: {result['processing_time_seconds']}s")
print(f"\n๐Ÿ’ก Expected topics:")
print(f" - ๋ถ€๋™์‚ฐ ๋Œ€์ฑ…")
print(f" - ํ•œ๋ฏธ ๊ด€์„ธ ํ˜‘์ƒ")
print(f" - ๋ถํ•œ ๋ฏธ์‚ฌ์ผ ์‹œํ—˜ ๋ฐœ์‚ฌ")
print(f" - ์—ฌ๊ฐ€๋ถ€ ํ์ง€")
print(f" - ํ•ด๊ตฐ ์ „๋ ฅ๋ชจํ•จ ํ™•๋ณด ์ถ”์ง„")
print(f"\n๐Ÿ“Š Actual topics:")
for topic in result['topics']:
if topic['topic_id'] != -1:
print(f" - Topic {topic['topic_id']}: {topic['topic_title']}")
print("="*100)
except Exception as e:
print(f"โŒ Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
print("\nโณ Waiting for HF Spaces to build (2-3 minutes)...")
print("Press Enter when build is complete...")
input()
test_bertopic_api()