Spaces:

egumasa
/

simple-text-analyzer

Building

egumasa commited on Aug 5, 2025

Commit

bb65e54

1 Parent(s): a12eec8

Enhance GPU support with stronger enforcement

- Added spacy.require_gpu() for stronger GPU enforcement in base_analyzer.py
- Created gpu_init.py module to initialize GPU before any SpaCy imports
- Import gpu_init at the very start of app.py to ensure early GPU setup
- Enhanced GPU debugging scripts for HuggingFace Spaces

These changes ensure that when deployed to HuggingFace Spaces with GPU:
1. GPU is initialized before any model loading
2. SpaCy is forced to use GPU with require_gpu()
3. All model components are explicitly moved to GPU
4. GPU usage is verified and reported

Files changed (5) hide show

test_current_gpu.py +56 -0
test_debug_mode_gpu.py +166 -66
text_analyzer/base_analyzer.py +11 -5
web_app/app.py +10 -10
web_app/gpu_init.py +66 -0

test_current_gpu.py ADDED Viewed

	@@ -0,0 +1,56 @@

+#!/usr/bin/env python3
+"""Quick GPU diagnostic for current environment."""
+import sys
+import os
+print("=== GPU Diagnostic ===")
+print(f"Python: {sys.version}")
+print(f"Platform: {sys.platform}")
+print(f"Current directory: {os.getcwd()}")
+# Check PyTorch
+try:
+    import torch
+    print(f"\nPyTorch: {torch.__version__}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    if torch.cuda.is_available():
+        print(f"CUDA version: {torch.version.cuda}")
+        print(f"GPU count: {torch.cuda.device_count()}")
+        for i in range(torch.cuda.device_count()):
+            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+            print(f"  Memory: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
+    else:
+        print("Running on CPU")
+    # Check environment variables
+    print("\nRelevant environment variables:")
+    for var in ['CUDA_VISIBLE_DEVICES', 'CUDA_HOME', 'SPACES', 'SPACE_ID']:
+        print(f"  {var}: {os.environ.get(var, 'Not set')}")
+except ImportError as e:
+    print(f"PyTorch not available: {e}")
+# Check SpaCy
+print("\n--- SpaCy Configuration ---")
+try:
+    import spacy
+    print(f"SpaCy: {spacy.__version__}")
+    # Try to enable GPU
+    gpu_id = spacy.prefer_gpu()
+    print(f"spacy.prefer_gpu(): {gpu_id}")
+    # Check if we can require GPU
+    if torch.cuda.is_available():
+        try:
+            spacy.require_gpu()
+            print("spacy.require_gpu(): Success")
+        except Exception as e:
+            print(f"spacy.require_gpu(): Failed - {e}")
+except ImportError as e:
+    print(f"SpaCy not available: {e}")
+except Exception as e:
+    print(f"SpaCy error: {e}")

test_debug_mode_gpu.py CHANGED Viewed

@@ -1,86 +1,186 @@
 #!/usr/bin/env python3
-"""
-Test script to verify the GPU status display in debug mode works correctly.
-This tests the functionality without running the full Streamlit app.
-"""
-import sys
 import os
-# Add parent directory to path
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from web_app.debug_utils import show_gpu_status
-import streamlit as st
-# Mock streamlit components for testing
-class MockStreamlit:
-    """Mock Streamlit for testing without running the actual app."""
-    @staticmethod
-    def write(*args, **kwargs):
-        print(*args)
-    @staticmethod
-    def columns(n):
-        return [MockContext()] * n
-    @staticmethod
-    def expander(title, expanded=False):
-        print(f"\n=== {title} ===")
-        return MockContext()
-    @staticmethod
-    def info(text):
-        print(f"[INFO] {text}")
-    @staticmethod
-    def warning(text):
-        print(f"[WARNING] {text}")
-    @staticmethod
-    def error(text):
-        print(f"[ERROR] {text}")
-    class session_state:
-        analyzer = None
-        parser = None
-class MockContext:
-    """Mock context manager for with statements."""
-    def __enter__(self):
-        return self
-    def __exit__(self, *args):
-        pass
-    def write(self, *args, **kwargs):
-        print(*args)
-def test_gpu_status_display():
-    """Test the GPU status display functionality."""
-    print("Testing GPU Status Display Function")
-    print("=" * 50)
-    # Replace streamlit with mock for testing
-    import web_app.debug_utils
-    web_app.debug_utils.st = MockStreamlit()
-    # Import the function after mocking
-    from web_app.debug_utils import show_gpu_status
-    try:
-        # Test the function
-        show_gpu_status()
-        print("\n✅ GPU status display function executed successfully!")
-    except Exception as e:
-        print(f"\n❌ Error in GPU status display: {str(e)}")
-        import traceback
-        traceback.print_exc()
-    print("\n" + "=" * 50)
-    print("Test completed!")
-if __name__ == "__main__":
-    test_gpu_status_display()

 #!/usr/bin/env python3
+"""Debug GPU issues in HuggingFace Spaces environment."""
 import os
+import sys
+print("=== Debugging GPU in HuggingFace Spaces ===")
+# Set environment variables BEFORE any imports
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+os.environ['SPACY_PREFER_GPU'] = '1'
+# Now import libraries
+import torch
+import spacy
+print("\n1. Environment Check:")
+print(f"   Platform: {sys.platform}")
+print(f"   Python: {sys.version}")
+print(f"   Working dir: {os.getcwd()}")
+print(f"   CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'Not set')}")
+print(f"   SPACY_PREFER_GPU: {os.environ.get('SPACY_PREFER_GPU', 'Not set')}")
+print(f"   SPACES: {os.environ.get('SPACES', 'Not set')}")
+print("\n2. PyTorch GPU Status:")
+print(f"   PyTorch version: {torch.__version__}")
+print(f"   CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    print(f"   CUDA version: {torch.version.cuda}")
+    print(f"   GPU count: {torch.cuda.device_count()}")
+    print(f"   Current device: {torch.cuda.current_device()}")
+    print(f"   GPU 0: {torch.cuda.get_device_name(0)}")
+    # Force CUDA initialization
+    torch.cuda.init()
+    print("   ✓ CUDA initialized")
+    # Set default device
+    torch.cuda.set_device(0)
+    print("   ✓ Set default CUDA device to 0")
+print("\n3. SpaCy GPU Configuration:")
+print(f"   SpaCy version: {spacy.__version__}")
+# Try multiple methods to enable GPU
+print("\n   Attempting spacy.prefer_gpu()...")
+gpu_id = spacy.prefer_gpu(gpu_id=0)
+print(f"   Result: {gpu_id}")
+if torch.cuda.is_available():
+    print("\n   Attempting spacy.require_gpu()...")
+    try:
+        spacy.require_gpu(gpu_id=0)
+        print("   ✓ spacy.require_gpu() succeeded")
+    except Exception as e:
+        print(f"   ✗ spacy.require_gpu() failed: {e}")
+print("\n4. Test Model Loading:")
+try:
+    # Try loading a small model first
+    print("   Loading en_core_web_md...")
+    nlp_md = spacy.load("en_core_web_md")
+    # Check if components are on GPU
+    print("   Checking MD model components:")
+    for name, component in nlp_md.pipeline:
+        device = "Unknown"
+        if hasattr(component, 'model'):
+            if hasattr(component.model, 'device'):
+                device = str(component.model.device)
+            elif hasattr(component.model, 'parameters'):
+                try:
+                    param = next(component.model.parameters())
+                    device = str(param.device)
+                except:
+                    pass
+        print(f"      {name}: {device}")
+    # Test processing
+    doc = nlp_md("Test sentence")
+    print(f"   ✓ MD model processed {len(doc)} tokens")
+except Exception as e:
+    print(f"   ✗ MD model failed: {e}")
+print("\n5. Test Transformer Model with GPU:")
+try:
+    # Force GPU before loading transformer
+    if torch.cuda.is_available():
+        torch.cuda.set_device(0)
+        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    print("   Loading en_core_web_trf with GPU config...")
+    # Load with explicit GPU configuration
+    config = {
+        "nlp": {
+            "pipeline": ["transformer", "tagger", "parser", "ner", "lemmatizer"]
+        },
+        "components": {
+            "transformer": {
+                "model": {
+                    "mixed_precision": True,
+                    "@architectures": "spacy-transformers.TransformerModel.v3",
+                    "get_spans": {
+                        "@span_getters": "spacy-transformers.strided_spans.v1",
+                        "window": 128,
+                        "stride": 96
+                    }
+                }
+            }
+        }
+    }
+    nlp_trf = spacy.load("en_core_web_trf")
+    # Force components to GPU
+    print("   Forcing transformer components to GPU...")
+    for name, component in nlp_trf.pipeline:
+        if hasattr(component, 'model'):
+            if hasattr(component.model, 'to'):
+                try:
+                    component.model.to('cuda:0')
+                    print(f"      ✓ Moved {name} to GPU")
+                except Exception as e:
+                    print(f"      ✗ Failed to move {name}: {e}")
+    # Verify GPU usage
+    print("\n   Verifying GPU usage:")
+    for name, component in nlp_trf.pipeline:
+        on_gpu = False
+        device_info = "Unknown"
+        if hasattr(component, 'model'):
+            # Check parameters
+            if hasattr(component.model, 'parameters'):
+                try:
+                    for param in component.model.parameters():
+                        if param.is_cuda:
+                            on_gpu = True
+                            device_info = str(param.device)
+                            break
+                except:
+                    pass
+            # Check device attribute
+            if hasattr(component.model, 'device'):
+                device_info = str(component.model.device)
+                on_gpu = 'cuda' in device_info
+        status = "✓ GPU" if on_gpu else "✗ CPU"
+        print(f"      {name}: {status} ({device_info})")
+    # Test processing with timing
+    print("\n   Testing transformer processing...")
+    import time
+    text = "The quick brown fox jumps over the lazy dog. " * 5
+    start = time.time()
+    doc = nlp_trf(text)
+    end = time.time()
+    print(f"   ✓ Processed {len(doc)} tokens in {end-start:.2f}s")
+    # Check memory usage
+    if torch.cuda.is_available():
+        mem_allocated = torch.cuda.memory_allocated(0) / 1024**3
+        mem_reserved = torch.cuda.memory_reserved(0) / 1024**3
+        print(f"\n   GPU Memory:")
+        print(f"      Allocated: {mem_allocated:.2f} GB")
+        print(f"      Reserved: {mem_reserved:.2f} GB")
+except Exception as e:
+    print(f"   ✗ Transformer model failed: {e}")
+    import traceback
+    traceback.print_exc()
+print("\n=== Summary ===")
+if torch.cuda.is_available():
+    print("✓ CUDA is available")
+    print("✓ PyTorch can see GPU")
+    print("→ Check if SpaCy models are using GPU above")
+else:
+    print("✗ No GPU detected in this environment")
+    print("→ This script should be run in HuggingFace Spaces with GPU")

text_analyzer/base_analyzer.py CHANGED Viewed

@@ -128,11 +128,17 @@ class BaseAnalyzer:
             torch.cuda.set_device(device_id)
             os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id)
-            # Force spaCy to use GPU
-            gpu_id = spacy.prefer_gpu(gpu_id=device_id)
-            if gpu_id is False:
-                raise RuntimeError("spacy.prefer_gpu() returned False despite GPU being available")
             logger.info(f"GPU strongly configured for spaCy - using {device_name} (device {device_id})")

             torch.cuda.set_device(device_id)
             os.environ['CUDA_VISIBLE_DEVICES'] = str(device_id)
+            # Force spaCy to use GPU - use require_gpu for stronger enforcement
+            try:
+                spacy.require_gpu(gpu_id=device_id)
+                logger.info(f"Successfully enforced GPU usage with spacy.require_gpu()")
+            except Exception as e:
+                # Fallback to prefer_gpu if require_gpu fails
+                logger.warning(f"spacy.require_gpu() failed: {e}, trying prefer_gpu()")
+                gpu_id = spacy.prefer_gpu(gpu_id=device_id)
+                if gpu_id is False:
+                    raise RuntimeError("spacy.prefer_gpu() returned False despite GPU being available")
             logger.info(f"GPU strongly configured for spaCy - using {device_name} (device {device_id})")

web_app/app.py CHANGED Viewed

@@ -5,7 +5,6 @@ Provides lexical sophistication analysis and POS/dependency parsing.
 Refactored version with modular architecture for better maintainability.
 """
-import streamlit as st
 import sys
 import os
 from pathlib import Path
@@ -13,6 +12,11 @@ from pathlib import Path
 # Add parent directory to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
 # Import custom modules
 from web_app.session_manager import SessionManager
 from web_app.components.ui_components import UIComponents
@@ -40,15 +44,11 @@ def main():
     st.title("�� Linguistic Data Analysis I - Text Analysis Tools")
     st.markdown("*Educational tools for lexical sophistication analysis, POS/dependency parsing, and word frequency visualization*")
-    # GPU verification on startup (following HuggingFace docs)
-    try:
-        import torch
-        if torch.cuda.is_available():
-            logger.info(f"CUDA is available: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-        else:
-            logger.info("CUDA not available - running on CPU")
-    except ImportError:
-        logger.info("PyTorch not installed - GPU support unavailable")
     # Initialize session state
     SessionManager.initialize_session_state()

 Refactored version with modular architecture for better maintainability.
 """
 import sys
 import os
 from pathlib import Path
 # Add parent directory to path for imports
 sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+# CRITICAL: Initialize GPU BEFORE any SpaCy/model imports
+from web_app.gpu_init import GPU_AVAILABLE
+import streamlit as st
 # Import custom modules
 from web_app.session_manager import SessionManager
 from web_app.components.ui_components import UIComponents
     st.title("�� Linguistic Data Analysis I - Text Analysis Tools")
     st.markdown("*Educational tools for lexical sophistication analysis, POS/dependency parsing, and word frequency visualization*")
+    # GPU status is already initialized in gpu_init module
+    if GPU_AVAILABLE:
+        logger.info("GPU initialization successful - models will use GPU")
+    else:
+        logger.info("GPU not available - models will use CPU")
     # Initialize session state
     SessionManager.initialize_session_state()

web_app/gpu_init.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+GPU initialization module that must be imported BEFORE any SpaCy modules.
+This ensures GPU is properly configured before SpaCy loads any models.
+"""
+import os
+import logging
+logger = logging.getLogger(__name__)
+def initialize_gpu_environment():
+    """
+    Initialize GPU environment variables and settings before SpaCy import.
+    This function should be called at the very beginning of the application.
+    """
+    # Set environment variables BEFORE any imports
+    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+    os.environ['SPACY_PREFER_GPU'] = '1'
+    try:
+        import torch
+        if torch.cuda.is_available():
+            # Force CUDA initialization
+            torch.cuda.init()
+            # Set default device
+            torch.cuda.set_device(0)
+            # Log GPU info
+            gpu_name = torch.cuda.get_device_name(0)
+            gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1024**3
+            logger.info(f"GPU initialized: {gpu_name} ({gpu_memory:.1f} GB)")
+            logger.info(f"CUDA version: {torch.version.cuda}")
+            logger.info(f"PyTorch version: {torch.__version__}")
+            # Pre-configure SpaCy for GPU
+            import spacy
+            try:
+                # Try require_gpu first for strong enforcement
+                spacy.require_gpu(gpu_id=0)
+                logger.info("SpaCy GPU enforced with require_gpu()")
+            except Exception as e:
+                # Fallback to prefer_gpu
+                logger.warning(f"require_gpu() failed: {e}, using prefer_gpu()")
+                gpu_id = spacy.prefer_gpu(gpu_id=0)
+                if gpu_id is not False:
+                    logger.info(f"SpaCy GPU enabled with prefer_gpu(): device {gpu_id}")
+                else:
+                    logger.error("SpaCy GPU initialization failed!")
+            return True
+        else:
+            logger.info("No CUDA device available - running on CPU")
+            return False
+    except ImportError:
+        logger.info("PyTorch not installed - GPU support unavailable")
+        return False
+    except Exception as e:
+        logger.error(f"GPU initialization error: {e}")
+        return False
+# Initialize GPU on module import
+GPU_AVAILABLE = initialize_gpu_environment()