magenta-retry-2345-test

Paused

App Files Files Community

adityas129 commited on 10 days ago

Commit

cf5fc2d

verified ·

1 Parent(s): 7dcdc7e

Update app.py

Browse files

Files changed (1) hide show

app.py +436 -146

app.py CHANGED Viewed

@@ -205,6 +205,123 @@ _patch_t5x_for_gpu_coords()
 jam_registry: dict[str, JamWorker] = {}
 jam_lock = threading.Lock()
 @contextmanager
 def mrt_overrides(mrt, **kwargs):
     """Temporarily set attributes on MRT if they exist; restore after."""
@@ -687,43 +804,60 @@ def generate(
     loop_weight: float = Form(1.0),
     loudness_mode: str = Form("auto"),
     loudness_headroom_db: float = Form(1.0),
-    guidance_weight: float = Form(5.0),
-    temperature: float = Form(1.1),
-    topk: int = Form(40),
     target_sample_rate: int | None = Form(None),
     intro_bars_to_drop: int = Form(0),          # <— NEW
 ):
-    # Read file
-    data = loop_audio.file.read()
-    if not data:
-        return {"error": "Empty file"}
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        tmp.write(data)
-        tmp_path = tmp.name
-    # Parse styles + weights
-    extra_styles = [s for s in (styles.split(",") if styles else []) if s.strip()]
-    weights = [float(x) for x in style_weights.split(",")] if style_weights else None
-    mrt = get_mrt()  # warm once, in this worker thread
-    # Temporarily override MRT inference knobs for this request
-    with mrt_overrides(mrt,
-                       guidance_weight=guidance_weight,
-                       temperature=temperature,
-                       topk=topk):
-        wav, loud_stats = generate_loop_continuation_with_mrt(
-            mrt,
-            input_wav_path=tmp_path,
-            bpm=bpm,
-            extra_styles=extra_styles,
-            style_weights=weights,
-            bars=bars,
-            beats_per_bar=beats_per_bar,
-            loop_weight=loop_weight,
-            loudness_mode=loudness_mode,
-            loudness_headroom_db=loudness_headroom_db,
-            intro_bars_to_drop=intro_bars_to_drop,   # <— pass through
-        )
     # 1) Figure out the desired SR
     inp_info = sf.info(tmp_path)
@@ -771,9 +905,9 @@ def generate_style(
     beats_per_bar: int = Form(4),
     styles: str = Form("warmup"),
     style_weights: str = Form(""),
-    guidance_weight: float = Form(1.1),
-    temperature: float = Form(1.1),
-    topk: int = Form(40),
     target_sample_rate: int | None = Form(None),
     intro_bars_to_drop: int = Form(0),
 ):
@@ -781,26 +915,42 @@ def generate_style(
     Style-only, bar-aligned generation (no input audio).
     Seeds with 10s of silent context; outputs exactly `bars` at the requested BPM.
     """
-    mrt = get_mrt()
-    # Override sampling knobs just for this request
-    with mrt_overrides(mrt,
-                       guidance_weight=guidance_weight,
-                       temperature=temperature,
-                       topk=topk):
-        wav, _ = generate_style_only_with_mrt(
-            mrt,
-            bpm=bpm,
-            bars=bars,
-            beats_per_bar=beats_per_bar,
-            styles=styles,
-            style_weights=style_weights,
-            intro_bars_to_drop=intro_bars_to_drop,
-        )
-    # Determine target SR (defaults to model SR = 48k)
-    cur_sr = int(mrt.sample_rate)
-    target_sr = int(target_sample_rate or cur_sr)
     x = wav.samples if wav.samples.ndim == 2 else wav.samples[:, None]
     seconds_per_bar = (60.0 / float(bpm)) * int(beats_per_bar)
@@ -849,87 +999,102 @@ def jam_start(
     loudness_mode: str = Form("auto"),
     loudness_headroom_db: float = Form(1.0),
-    guidance_weight: float = Form(1.1),
-    temperature: float = Form(1.1),
-    topk: int = Form(40),
     target_sample_rate: int | None = Form(None),
 ):
-    asset_manager.ensure_assets_loaded(get_mrt())
-    # enforce single active jam per GPU
-    with jam_lock:
-        for sid, w in list(jam_registry.items()):
-            if w.is_alive():
-                raise HTTPException(status_code=429, detail="A jam is already running. Try again later.")
-    # read input + prep context/style (reuse your existing code)
-    data = loop_audio.file.read()
-    if not data: raise HTTPException(status_code=400, detail="Empty file")
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-        tmp.write(data); tmp_path = tmp.name
-    mrt = get_mrt()
-    loop = au.Waveform.from_file(tmp_path).resample(mrt.sample_rate).as_stereo()
-    # build tail context + style vec (tail-biased)
-    codec_fps = float(mrt.codec.frame_rate)
-    ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
-    loop_tail = take_bar_aligned_tail(loop, bpm, beats_per_bar, ctx_seconds)
-    # Parse client style fields (preserves your semantics)
-    text_list = [s.strip() for s in (styles.split(",") if styles else []) if s.strip()]
-    try:
-        tw = [float(x) for x in style_weights.split(",")] if style_weights else []
-    except ValueError:
-        tw = []
-    try:
-        cw = [float(x) for x in centroid_weights.split(",")] if centroid_weights else []
-    except ValueError:
-        cw = []
-    # Compute loop-tail embed once (same as before)
-    loop_tail_embed = mrt.embed_style(loop_tail)
-    # Build final style vector:
-    # - identical to your previous mix when mean==0 and cw is empty
-    # - otherwise includes mean and centroid components (weights auto-normalized)
-    style_vec = build_style_vector(
-        mrt,
-        text_styles=text_list,
-        text_weights=tw,
-        loop_embed=loop_tail_embed,
-        loop_weight=float(loop_weight),
-        mean_weight=float(mean),
-        centroid_weights=cw,
-    ).astype(np.float32, copy=False)
-    # target SR (default input SR)
-    inp_info = sf.info(tmp_path)
-    input_sr = int(inp_info.samplerate)
-    target_sr = int(target_sample_rate or input_sr)
-    params = JamParams(
-        bpm=bpm,
-        beats_per_bar=beats_per_bar,
-        bars_per_chunk=bars_per_chunk,
-        target_sr=target_sr,
-        loudness_mode=loudness_mode,
-        headroom_db=loudness_headroom_db,
-        style_vec=style_vec,
-        ref_loop=loop_tail,                    # For loudness matching
-        combined_loop=loop,                    # NEW: Full loop for context setup
-        guidance_weight=guidance_weight,
-        temperature=temperature,
-        topk=topk
-    )
-    worker = JamWorker(mrt, params)
-    sid = str(uuid.uuid4())
-    with jam_lock:
-        jam_registry[sid] = worker
-    worker.start()
-    return {"session_id": sid}
 @app.get("/jam/next")
 def jam_next(session_id: str):
@@ -938,13 +1103,17 @@ def jam_next(session_id: str):
     This ensures chunks are delivered in order without gaps.
     """
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Get the next sequential chunk (this blocks until ready)
     chunk = worker.get_next_chunk()
     if chunk is None:
         raise HTTPException(status_code=408, detail="Chunk not ready within timeout")
@@ -963,12 +1132,16 @@ def jam_consume(session_id: str = Form(...), chunk_index: int = Form(...)):
     This helps the worker manage its buffer and generation flow.
     """
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     worker.mark_chunk_consumed(chunk_index)
     return {"consumed": chunk_index}
@@ -976,16 +1149,22 @@ def jam_consume(session_id: str = Form(...), chunk_index: int = Form(...)):
 @app.post("/jam/stop")
 def jam_stop(session_id: str = Body(..., embed=True)):
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None:
         raise HTTPException(status_code=404, detail="Session not found")
     worker.stop()
     worker.join(timeout=5.0)
     if worker.is_alive():
-        # It’s daemon=True, so it won’t block process exit, but report it
         print(f"⚠️ JamWorker {session_id} did not stop within timeout")
     with jam_lock:
         jam_registry.pop(session_id, None)
     return {"stopped": True}
@@ -994,13 +1173,19 @@ def jam_stop(session_id: str = Body(..., embed=True)):
 def jam_stop_all():
     """Force stop all active jam sessions (nuclear option for cleanup)"""
     stopped_sessions = []
     with jam_lock:
-        for session_id, worker in list(jam_registry.items()):
             if worker.is_alive():
                 worker.stop()
                 worker.join(timeout=2.0)
                 stopped_sessions.append(session_id)
             jam_registry.pop(session_id, None)
     return {"stopped_sessions": stopped_sessions, "count": len(stopped_sessions)}
@@ -1024,13 +1209,19 @@ def jam_update(
     mean: Optional[float]            = Form(None),
     centroid_weights: str            = Form(""),
 ):
-    asset_manager.ensure_assets_loaded(get_mrt())
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # 1) fast knob updates
     if any(v is not None for v in (guidance_weight, temperature, topk)):
         worker.update_knobs(
@@ -1098,8 +1289,12 @@ def jam_update(
 @app.post("/jam/reseed")
 def jam_reseed(session_id: str = Form(...), loop_audio: UploadFile = File(None)):
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Option 1: use uploaded new “combined” bounce from the app
@@ -1129,8 +1324,13 @@ def jam_reseed_splice(
     anchor_bars: float = Form(2.0),              # how much of the original to re-inject
     combined_audio: UploadFile = File(None),     # preferred: Swift supplies the current combined mix
 ):
-    worker = jam_registry.get(session_id)
-    if worker is None or not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Build a waveform to reseed from
@@ -1160,11 +1360,12 @@ def jam_reseed_splice(
 @app.get("/jam/status")
 def jam_status(session_id: str):
     with jam_lock:
-        worker = jam_registry.get(session_id)
-    if worker is None:
         raise HTTPException(status_code=404, detail="Session not found")
     running = worker.is_alive()
     # Snapshot safely
@@ -1284,8 +1485,12 @@ async def ws_jam(websocket: WebSocket):
                 # attach or create
                 if sid:
                     with jam_lock:
-                        worker = jam_registry.get(sid)
-                    if worker is None or not worker.is_alive():
                         await send_json({"type":"error","error":"Session not found"})
                         continue
                 else:
@@ -1645,6 +1850,91 @@ def read_root():
         """
     return Response(content=html_content, media_type="text/html")
 @app.get("/lil_demo_540p.mp4")
 def demo_video():
     return FileResponse(Path(__file__).parent / "lil_demo_540p.mp4", media_type="video/mp4")

 jam_registry: dict[str, JamWorker] = {}
 jam_lock = threading.Lock()
+# ============================================================================
+# Global Generation Parameters
+# ============================================================================
+class GlobalGenParams:
+    """Global defaults for temperature, topk, guidance_weight.
+    Applied at MRT initialization. Changes require pool restart."""
+    def __init__(self):
+        self._lock = threading.RLock()
+        self.temperature = 1.1
+        self.topk = 40
+        self.guidance_weight = 1.1
+    def get(self):
+        with self._lock:
+            return {
+                'temperature': self.temperature,
+                'topk': self.topk,
+                'guidance_weight': self.guidance_weight
+            }
+    def update(self, temperature=None, topk=None, guidance_weight=None):
+        """Update requires MRT pool restart to take effect"""
+        with self._lock:
+            if temperature is not None:
+                self.temperature = float(temperature)
+            if topk is not None:
+                self.topk = int(topk)
+            if guidance_weight is not None:
+                self.guidance_weight = float(guidance_weight)
+            return self.get()
+_GLOBAL_GEN_PARAMS = GlobalGenParams()
+# ============================================================================
+# MRT Instance Pool (for parallel requests)
+# ============================================================================
+_MRT_POOL = []
+_MRT_POOL_LOCK = threading.Lock()
+_MRT_AVAILABLE = []
+_POOL_INITIALIZED = False
+_POOL_INIT_LOCK = threading.Lock()
+def init_mrt_pool(pool_size=2):
+    """Initialize MRT pool with global params"""
+    global _MRT_POOL, _MRT_AVAILABLE
+    defaults = _GLOBAL_GEN_PARAMS.get()
+    _MRT_POOL.clear()
+    _MRT_AVAILABLE.clear()
+    for i in range(pool_size):
+        ckpt_dir = CheckpointManager.resolve_checkpoint_dir()
+        mrt = system.MagentaRT(
+            tag=os.getenv("MRT_SIZE", "large"),
+            guidance_weight=defaults['guidance_weight'],
+            device="gpu",
+            checkpoint_dir=ckpt_dir,
+            lazy=True
+        )
+        # Set other params
+        mrt.temperature = defaults['temperature']
+        mrt.topk = defaults['topk']
+        # Load assets if configured
+        if asset_manager.mean_embed is None and asset_manager.centroids is None:
+            repo = os.getenv("MRT_ASSETS_REPO") or os.getenv("MRT_CKPT_REPO")
+            if repo:
+                asset_manager.load_finetune_assets_from_hf(repo, None)
+                _sync_assets_globals_from_manager()
+        _MRT_POOL.append(mrt)
+        _MRT_AVAILABLE.append(True)
+def ensure_pool_initialized():
+    """Lazy init pool on first request"""
+    global _POOL_INITIALIZED
+    if not _POOL_INITIALIZED:
+        with _POOL_INIT_LOCK:
+            if not _POOL_INITIALIZED:
+                init_mrt_pool(pool_size=2)
+                _POOL_INITIALIZED = True
+def get_available_mrt():
+    """Get an available MRT from pool. Returns (index, mrt) or (None, None)"""
+    with _MRT_POOL_LOCK:
+        for i, available in enumerate(_MRT_AVAILABLE):
+            if available:
+                _MRT_AVAILABLE[i] = False
+                return (i, _MRT_POOL[i])
+    return (None, None)
+def release_mrt(index: int):
+    """Release MRT back to pool"""
+    with _MRT_POOL_LOCK:
+        if 0 <= index < len(_MRT_AVAILABLE):
+            _MRT_AVAILABLE[index] = True
+def reset_mrt_pool():
+    """Recreate pool with current global params (requires stopping all sessions)"""
+    global _POOL_INITIALIZED
+    with _POOL_INIT_LOCK:
+        with _MRT_POOL_LOCK:
+            init_mrt_pool(pool_size=2)
+            _POOL_INITIALIZED = True
+# ============================================================================
+# Legacy single MRT support (for backward compatibility)
+# ============================================================================
+_MRT = None
+_MRT_LOCK = threading.Lock()
 @contextmanager
 def mrt_overrides(mrt, **kwargs):
     """Temporarily set attributes on MRT if they exist; restore after."""
     loop_weight: float = Form(1.0),
     loudness_mode: str = Form("auto"),
     loudness_headroom_db: float = Form(1.0),
+    guidance_weight: Optional[float] = Form(None),
+    temperature: Optional[float] = Form(None),
+    topk: Optional[int] = Form(None),
     target_sample_rate: int | None = Form(None),
     intro_bars_to_drop: int = Form(0),          # <— NEW
 ):
+    ensure_pool_initialized()
+    # Get available MRT from pool
+    mrt_index, mrt = get_available_mrt()
+    if mrt is None:
+        raise HTTPException(status_code=503, detail="All slots busy, retry shortly")
+    try:
+        # Apply global defaults if not specified
+        defaults = _GLOBAL_GEN_PARAMS.get()
+        guidance_weight = guidance_weight if guidance_weight is not None else defaults['guidance_weight']
+        temperature = temperature if temperature is not None else defaults['temperature']
+        topk = topk if topk is not None else defaults['topk']
+        # Read file
+        data = loop_audio.file.read()
+        if not data:
+            return {"error": "Empty file"}
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(data)
+            tmp_path = tmp.name
+        # Parse styles + weights
+        extra_styles = [s for s in (styles.split(",") if styles else []) if s.strip()]
+        weights = [float(x) for x in style_weights.split(",")] if style_weights else None
+        # Temporarily override MRT inference knobs for this request
+        with mrt_overrides(mrt,
+                           guidance_weight=guidance_weight,
+                           temperature=temperature,
+                           topk=topk):
+            wav, loud_stats = generate_loop_continuation_with_mrt(
+                mrt,
+                input_wav_path=tmp_path,
+                bpm=bpm,
+                extra_styles=extra_styles,
+                style_weights=weights,
+                bars=bars,
+                beats_per_bar=beats_per_bar,
+                loop_weight=loop_weight,
+                loudness_mode=loudness_mode,
+                loudness_headroom_db=loudness_headroom_db,
+                intro_bars_to_drop=intro_bars_to_drop,   # <— pass through
+            )
+    finally:
+        # Always release MRT back to pool
+        release_mrt(mrt_index)
     # 1) Figure out the desired SR
     inp_info = sf.info(tmp_path)
     beats_per_bar: int = Form(4),
     styles: str = Form("warmup"),
     style_weights: str = Form(""),
+    guidance_weight: Optional[float] = Form(None),
+    temperature: Optional[float] = Form(None),
+    topk: Optional[int] = Form(None),
     target_sample_rate: int | None = Form(None),
     intro_bars_to_drop: int = Form(0),
 ):
     Style-only, bar-aligned generation (no input audio).
     Seeds with 10s of silent context; outputs exactly `bars` at the requested BPM.
     """
+    ensure_pool_initialized()
+    # Get available MRT from pool
+    mrt_index, mrt = get_available_mrt()
+    if mrt is None:
+        raise HTTPException(status_code=503, detail="All slots busy, retry shortly")
+    try:
+        # Apply global defaults if not specified
+        defaults = _GLOBAL_GEN_PARAMS.get()
+        guidance_weight = guidance_weight if guidance_weight is not None else defaults['guidance_weight']
+        temperature = temperature if temperature is not None else defaults['temperature']
+        topk = topk if topk is not None else defaults['topk']
+        # Override sampling knobs just for this request
+        with mrt_overrides(mrt,
+                           guidance_weight=guidance_weight,
+                           temperature=temperature,
+                           topk=topk):
+            wav, _ = generate_style_only_with_mrt(
+                mrt,
+                bpm=bpm,
+                bars=bars,
+                beats_per_bar=beats_per_bar,
+                styles=styles,
+                style_weights=style_weights,
+                intro_bars_to_drop=intro_bars_to_drop,
+            )
+        # Determine target SR (defaults to model SR = 48k)
+        cur_sr = int(mrt.sample_rate)
+        target_sr = int(target_sample_rate or cur_sr)
+    finally:
+        # Always release MRT back to pool
+        release_mrt(mrt_index)
     x = wav.samples if wav.samples.ndim == 2 else wav.samples[:, None]
     seconds_per_bar = (60.0 / float(bpm)) * int(beats_per_bar)
     loudness_mode: str = Form("auto"),
     loudness_headroom_db: float = Form(1.0),
+    guidance_weight: Optional[float] = Form(None),
+    temperature: Optional[float] = Form(None),
+    topk: Optional[int] = Form(None),
     target_sample_rate: int | None = Form(None),
 ):
+    ensure_pool_initialized()
+    # Get available MRT from pool
+    mrt_index, mrt = get_available_mrt()
+    if mrt is None:
+        raise HTTPException(status_code=429, detail="All slots busy (max 2 concurrent JAM sessions)")
+    try:
+        asset_manager.ensure_assets_loaded(mrt)
+        # Apply global defaults if not specified
+        defaults = _GLOBAL_GEN_PARAMS.get()
+        guidance_weight = guidance_weight if guidance_weight is not None else defaults['guidance_weight']
+        temperature = temperature if temperature is not None else defaults['temperature']
+        topk = topk if topk is not None else defaults['topk']
+        # read input + prep context/style (reuse your existing code)
+        data = loop_audio.file.read()
+        if not data: raise HTTPException(status_code=400, detail="Empty file")
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
+            tmp.write(data); tmp_path = tmp.name
+        loop = au.Waveform.from_file(tmp_path).resample(mrt.sample_rate).as_stereo()
+        # build tail context + style vec (tail-biased)
+        codec_fps = float(mrt.codec.frame_rate)
+        ctx_seconds = float(mrt.config.context_length_frames) / codec_fps
+        loop_tail = take_bar_aligned_tail(loop, bpm, beats_per_bar, ctx_seconds)
+        # Parse client style fields (preserves your semantics)
+        text_list = [s.strip() for s in (styles.split(",") if styles else []) if s.strip()]
+        try:
+            tw = [float(x) for x in style_weights.split(",")] if style_weights else []
+        except ValueError:
+            tw = []
+        try:
+            cw = [float(x) for x in centroid_weights.split(",")] if centroid_weights else []
+        except ValueError:
+            cw = []
+        # Compute loop-tail embed once (same as before)
+        loop_tail_embed = mrt.embed_style(loop_tail)
+        # Build final style vector:
+        # - identical to your previous mix when mean==0 and cw is empty
+        # - otherwise includes mean and centroid components (weights auto-normalized)
+        style_vec = build_style_vector(
+            mrt,
+            text_styles=text_list,
+            text_weights=tw,
+            loop_embed=loop_tail_embed,
+            loop_weight=float(loop_weight),
+            mean_weight=float(mean),
+            centroid_weights=cw,
+        ).astype(np.float32, copy=False)
+        # target SR (default input SR)
+        inp_info = sf.info(tmp_path)
+        input_sr = int(inp_info.samplerate)
+        target_sr = int(target_sample_rate or input_sr)
+        params = JamParams(
+            bpm=bpm,
+            beats_per_bar=beats_per_bar,
+            bars_per_chunk=bars_per_chunk,
+            target_sr=target_sr,
+            loudness_mode=loudness_mode,
+            headroom_db=loudness_headroom_db,
+            style_vec=style_vec,
+            ref_loop=loop_tail,                    # For loudness matching
+            combined_loop=loop,                    # NEW: Full loop for context setup
+            guidance_weight=guidance_weight,
+            temperature=temperature,
+            topk=topk
+        )
+        worker = JamWorker(mrt, params)
+        sid = str(uuid.uuid4())
+        with jam_lock:
+            jam_registry[sid] = {
+                'worker': worker,
+                'mrt_index': mrt_index
+            }
+        worker.start()
+        return {"session_id": sid, "slot": mrt_index}
+    except Exception as e:
+        # Release MRT back to pool on failure
+        release_mrt(mrt_index)
+        raise
 @app.get("/jam/next")
 def jam_next(session_id: str):
     This ensures chunks are delivered in order without gaps.
     """
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    if not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Get the next sequential chunk (this blocks until ready)
     chunk = worker.get_next_chunk()
     if chunk is None:
         raise HTTPException(status_code=408, detail="Chunk not ready within timeout")
     This helps the worker manage its buffer and generation flow.
     """
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    if not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     worker.mark_chunk_consumed(chunk_index)
     return {"consumed": chunk_index}
 @app.post("/jam/stop")
 def jam_stop(session_id: str = Body(..., embed=True)):
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
         raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    mrt_index = session_info['mrt_index']
     worker.stop()
     worker.join(timeout=5.0)
     if worker.is_alive():
+        # It's daemon=True, so it won't block process exit, but report it
         print(f"⚠️ JamWorker {session_id} did not stop within timeout")
+    # Release MRT back to pool
+    release_mrt(mrt_index)
     with jam_lock:
         jam_registry.pop(session_id, None)
     return {"stopped": True}
 def jam_stop_all():
     """Force stop all active jam sessions (nuclear option for cleanup)"""
     stopped_sessions = []
     with jam_lock:
+        for session_id, session_info in list(jam_registry.items()):
+            worker = session_info['worker']
+            mrt_index = session_info['mrt_index']
             if worker.is_alive():
                 worker.stop()
                 worker.join(timeout=2.0)
                 stopped_sessions.append(session_id)
+            # Release MRT back to pool
+            release_mrt(mrt_index)
             jam_registry.pop(session_id, None)
     return {"stopped_sessions": stopped_sessions, "count": len(stopped_sessions)}
     mean: Optional[float]            = Form(None),
     centroid_weights: str            = Form(""),
 ):
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    if not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
+    # Get MRT from the worker's assigned instance
+    mrt = _MRT_POOL[session_info['mrt_index']]
+    asset_manager.ensure_assets_loaded(mrt)
     # 1) fast knob updates
     if any(v is not None for v in (guidance_weight, temperature, topk)):
         worker.update_knobs(
 @app.post("/jam/reseed")
 def jam_reseed(session_id: str = Form(...), loop_audio: UploadFile = File(None)):
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    if not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Option 1: use uploaded new “combined” bounce from the app
     anchor_bars: float = Form(2.0),              # how much of the original to re-inject
     combined_audio: UploadFile = File(None),     # preferred: Swift supplies the current combined mix
 ):
+    with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
+        raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
+    if not worker.is_alive():
         raise HTTPException(status_code=404, detail="Session not found")
     # Build a waveform to reseed from
 @app.get("/jam/status")
 def jam_status(session_id: str):
     with jam_lock:
+        session_info = jam_registry.get(session_id)
+    if session_info is None:
         raise HTTPException(status_code=404, detail="Session not found")
+    worker = session_info['worker']
     running = worker.is_alive()
     # Snapshot safely
                 # attach or create
                 if sid:
                     with jam_lock:
+                        session_info = jam_registry.get(sid)
+                    if session_info is None:
+                        await send_json({"type":"error","error":"Session not found"})
+                        continue
+                    worker = session_info['worker']
+                    if not worker.is_alive():
                         await send_json({"type":"error","error":"Session not found"})
                         continue
                 else:
         """
     return Response(content=html_content, media_type="text/html")
+# ============================================================================
+# Global Generation Configuration Endpoints
+# ============================================================================
+@app.get("/config/generation")
+async def get_generation_config():
+    """
+    Get current global defaults for temperature, topk, and guidance_weight.
+    These defaults are applied at MRT initialization and affect all new requests.
+    """
+    return _GLOBAL_GEN_PARAMS.get()
+@app.put("/config/generation")
+async def update_generation_config(
+    temperature: Optional[float] = None,
+    topk: Optional[int] = None,
+    guidance_weight: Optional[float] = None
+):
+    """
+    Update global defaults for temperature, topk, and guidance_weight.
+    NOTE: Changes require MRT pool restart to take effect.
+    Call POST /config/generation/apply after updating to apply changes.
+    Per-request overrides still work - explicit parameters in requests
+    will override these global defaults.
+    """
+    return {
+        "updated": _GLOBAL_GEN_PARAMS.update(
+            temperature=temperature,
+            topk=topk,
+            guidance_weight=guidance_weight
+        ),
+        "note": "Changes require pool restart. Call POST /config/generation/apply to apply."
+    }
+@app.post("/config/generation/apply")
+async def apply_generation_config():
+    """
+    Restart MRT pool with new global parameters.
+    This will:
+    1. Check if any JAM sessions are active
+    2. If active sessions exist, return 409 error
+    3. If no active sessions, recreate MRT pool with new parameters
+    All future requests will use the new global defaults.
+    """
+    # Check for active sessions
+    with jam_lock:
+        active_sessions = []
+        for sid, session_info in jam_registry.items():
+            if session_info['worker'].is_alive():
+                active_sessions.append(sid)
+        if active_sessions:
+            raise HTTPException(
+                status_code=409,
+                detail=f"Cannot restart: {len(active_sessions)} active JAM session(s). Stop them first via /jam/stop"
+            )
+    # Restart pool with new parameters
+    reset_mrt_pool()
+    return {
+        "status": "applied",
+        "params": _GLOBAL_GEN_PARAMS.get(),
+        "message": "MRT pool restarted with new parameters"
+    }
+@app.get("/config/generation/pool_status")
+async def get_pool_status():
+    """Get current MRT pool status and availability"""
+    with _MRT_POOL_LOCK:
+        return {
+            "pool_size": len(_MRT_POOL),
+            "available": _MRT_AVAILABLE.copy(),
+            "initialized": _POOL_INITIALIZED,
+            "params": _GLOBAL_GEN_PARAMS.get()
+        }
+# ============================================================================
+# Static Files
+# ============================================================================
 @app.get("/lil_demo_540p.mp4")
 def demo_video():
     return FileResponse(Path(__file__).parent / "lil_demo_540p.mp4", media_type="video/mp4")