| # --- Merge Configuration: Linear Ramp with Mild Head/Input Blend --- | |
| models: | |
| - model: Vdr1/L3-8B-Sunfall-v0.4-Lunar-Stheno | |
| - model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B | |
| merge_method: slerp | |
| base_model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B | |
| dtype: bfloat16 | |
| # t-schedule per layer: small Sunfall on input/head, ramping up linearly, | |
| # plateau at 0.8 through the mid-network, then ramp down slightly at the end. | |
| parameters: | |
| t: [0.2, 0.4, 0.6, 0.8, 0.8, 0.6, 0.4, 0.2, 0.2] | |
| # — Interpretation by layer: | |
| # [Embedding, Block1, Block2, Block3, Block4, Block5, Block6, Block7, LM Head] | |
| # 0.2 Sunfall ↑ ramp ↑ plateau ↓ ramp 0.2 Sunfall | |
| # Notes: | |
| # 1. Using a slightly nonzero t=0.2 at both the embedding layer and LM head | |
| # helps align scales and prevents representational drift when Sunfall’s | |
| # mid-network activations become stronger. | |
| # 2. The linear ramp (0.2 → 0.8) avoids any sudden “hard switch” that can cause | |
| # instability or hallucination bubbles in a single block. | |
| # 3. The plateau of t=0.8 across four central blocks emphasizes Sunfall’s style | |
| # without giving it 100% in any one layer—this typically yields more coherent | |
| # blending than a full t=1.0 peak. | |
| # 4. You can experiment with extending or contracting the plateau (e.g. 3 vs. 5 | |
| # central blocks) based on your qualitative outputs. | |
| # “Hermes for input & output” remains in effect by using t=0.2 at both ends. |