Vu Anh Claude commited on
Commit
3715499
·
1 Parent(s): f62e707

Add trained VNTC model and usage demonstration script

Browse files

- Upload sklearn_model.joblib (VNTC model, 92.33% accuracy) for Hugging Face Hub
- Add use_this_model.py to demonstrate model usage from Hub
- Model supports Vietnamese news classification across 10 categories

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show
  1. sklearn_model.joblib +3 -0
  2. use_this_model.py +198 -0
sklearn_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25b914bfacc590165e0ce35e944815cf1fda52d9d2fadf79334c5bc2754b360
3
+ size 2393144
use_this_model.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demonstration script for using Sonar Core 1 models from Hugging Face Hub.
4
+ Shows how to download and use the pre-trained Vietnamese text classification model.
5
+ """
6
+
7
+ from huggingface_hub import hf_hub_download
8
+ import joblib
9
+ import numpy as np
10
+
11
+
12
+ def load_model_from_hub():
13
+ """Load the pre-trained model from Hugging Face Hub"""
14
+ try:
15
+ print("Downloading model from Hugging Face Hub...")
16
+ model_path = hf_hub_download("undertheseanlp/sonar_core_1", "sklearn_model.joblib")
17
+ print(f"Model downloaded to: {model_path}")
18
+
19
+ print("Loading model...")
20
+ model = joblib.load(model_path)
21
+ return model
22
+ except Exception as e:
23
+ print(f"Error downloading from Hub: {e}")
24
+ print("\nFalling back to local model for demonstration...")
25
+
26
+ # Try to find local model
27
+ from pathlib import Path
28
+ runs_dir = Path("runs")
29
+ if runs_dir.exists():
30
+ run_dirs = [d for d in runs_dir.iterdir() if d.is_dir()]
31
+ run_dirs.sort()
32
+
33
+ for run_dir in reversed(run_dirs):
34
+ models_dir = run_dir / "models"
35
+ if models_dir.exists():
36
+ for model_file in models_dir.glob("*.pkl"):
37
+ if "VNTC" in model_file.name:
38
+ print(f"Using local model: {model_file}")
39
+ return joblib.load(model_file)
40
+
41
+ raise FileNotFoundError("No model available. Please upload model to Hugging Face Hub or train locally.")
42
+
43
+
44
+ def predict_vntc_examples(model):
45
+ """Demonstrate predictions on VNTC (news) examples"""
46
+ print("\n" + "="*60)
47
+ print("VIETNAMESE NEWS CLASSIFICATION EXAMPLES")
48
+ print("="*60)
49
+
50
+ # Vietnamese news examples for different categories
51
+ examples = [
52
+ ("Chính trị & Xã hội", "Chính phủ đã thông qua nghị định mới về chính sách xã hội"),
53
+ ("Đời sống", "Xu hướng ăn uống lành mạnh đang được nhiều người quan tâm"),
54
+ ("Khoa học", "Các nhà khoa học đã phát hiện ra loại vi khuẩn mới"),
55
+ ("Kinh doanh", "Thị trường chứng khoán có nhiều biến động trong tuần qua"),
56
+ ("Pháp luật", "Luật an toàn giao thông sẽ có hiệu lực từ tháng sau"),
57
+ ("Sức khỏe", "Tiêm vaccine phòng chống COVID-19 đã đạt tỷ lệ cao"),
58
+ ("Thế giới", "Hội nghị thượng đỉnh quốc tế sẽ diễn ra tại Geneva"),
59
+ ("Thể thao", "Đội tuyển bóng đá Việt Nam giành chiến thắng 2-0"),
60
+ ("Văn hóa", "Lễ hội truyền thống sẽ được tổ chức vào cuối tuần"),
61
+ ("Vi tính", "Công nghệ trí tuệ nhân tạo đang phát triển mạnh mẽ")
62
+ ]
63
+
64
+ print("Testing Vietnamese news classification:")
65
+ print("-" * 60)
66
+
67
+ for expected_category, text in examples:
68
+ try:
69
+ prediction = model.predict([text])[0]
70
+ probabilities = model.predict_proba([text])[0]
71
+ confidence = np.max(probabilities)
72
+
73
+ print(f"Text: {text}")
74
+ print(f"Expected: {expected_category}")
75
+ print(f"Predicted: {prediction}")
76
+ print(f"Confidence: {confidence:.3f}")
77
+
78
+ # Show top 3 predictions
79
+ if hasattr(model, 'classes_'):
80
+ top_indices = np.argsort(probabilities)[-3:][::-1]
81
+ print("Top 3 predictions:")
82
+ for i, idx in enumerate(top_indices, 1):
83
+ category = model.classes_[idx]
84
+ prob = probabilities[idx]
85
+ print(f" {i}. {category}: {prob:.3f}")
86
+
87
+ print("-" * 60)
88
+
89
+ except Exception as e:
90
+ print(f"Error predicting '{text[:30]}...': {e}")
91
+ print("-" * 60)
92
+
93
+
94
+ def interactive_mode(model):
95
+ """Interactive mode for testing custom text"""
96
+ print("\n" + "="*60)
97
+ print("INTERACTIVE MODE - VIETNAMESE TEXT CLASSIFICATION")
98
+ print("="*60)
99
+ print("Enter Vietnamese text to classify (type 'quit' to exit):")
100
+
101
+ while True:
102
+ try:
103
+ user_input = input("\nText: ").strip()
104
+
105
+ if user_input.lower() in ['quit', 'exit', 'q']:
106
+ break
107
+
108
+ if not user_input:
109
+ continue
110
+
111
+ prediction = model.predict([user_input])[0]
112
+ probabilities = model.predict_proba([user_input])[0]
113
+ confidence = np.max(probabilities)
114
+
115
+ print(f"Predicted category: {prediction}")
116
+ print(f"Confidence: {confidence:.3f}")
117
+
118
+ # Show top 3 predictions
119
+ if hasattr(model, 'classes_'):
120
+ top_indices = np.argsort(probabilities)[-3:][::-1]
121
+ print("Top 3 predictions:")
122
+ for i, idx in enumerate(top_indices, 1):
123
+ category = model.classes_[idx]
124
+ prob = probabilities[idx]
125
+ print(f" {i}. {category}: {prob:.3f}")
126
+
127
+ except KeyboardInterrupt:
128
+ print("\nExiting...")
129
+ break
130
+ except Exception as e:
131
+ print(f"Error: {e}")
132
+
133
+
134
+ def simple_usage_example():
135
+ """Show simple usage example"""
136
+ print("\n" + "="*60)
137
+ print("SIMPLE USAGE EXAMPLE")
138
+ print("="*60)
139
+
140
+ print("Code example:")
141
+ print("""
142
+ from huggingface_hub import hf_hub_download
143
+ import joblib
144
+
145
+ # Download and load model
146
+ model = joblib.load(
147
+ hf_hub_download("undertheseanlp/sonar_core_1", "sklearn_model.joblib")
148
+ )
149
+
150
+ # Make prediction
151
+ text = "Việt Nam giành chiến thắng trong trận bán kết"
152
+ prediction = model.predict([text])[0]
153
+ probabilities = model.predict_proba([text])[0]
154
+
155
+ print(f"Predicted category: {prediction}")
156
+ print(f"Confidence: {max(probabilities):.3f}")
157
+ """)
158
+
159
+
160
+ def main():
161
+ """Main demonstration function"""
162
+ print("Sonar Core 1 - Hugging Face Hub Model Usage")
163
+ print("=" * 60)
164
+
165
+ try:
166
+ # Load model from Hugging Face Hub
167
+ model = load_model_from_hub()
168
+
169
+ # Show simple usage example
170
+ simple_usage_example()
171
+
172
+ # Run prediction examples
173
+ predict_vntc_examples(model)
174
+
175
+ # Check if we're in an interactive environment
176
+ try:
177
+ # Try to get input to see if we can run interactive mode
178
+ import sys
179
+ if hasattr(sys, 'ps1') or sys.stdin.isatty():
180
+ response = input("\nWould you like to try interactive mode? (y/n): ")
181
+ if response.lower().startswith('y'):
182
+ interactive_mode(model)
183
+ except (EOFError, OSError):
184
+ print("\nInteractive mode not available in this environment.")
185
+ print("Run this script in a regular terminal to use interactive mode.")
186
+
187
+ print("\nDemonstration complete!")
188
+
189
+ except ImportError:
190
+ print("Error: huggingface_hub is required. Install with:")
191
+ print(" pip install huggingface_hub")
192
+ except Exception as e:
193
+ print(f"Error loading model: {e}")
194
+ print("\nMake sure you have internet connection and try again.")
195
+
196
+
197
+ if __name__ == "__main__":
198
+ main()