web-consultenv / test_integration.py
aashish1904's picture
v2.2
4d62aeb
"""ConsultEnv Integration Tests β€” sub-task decomposition (3 calls per module)."""
import sys, os, unittest
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from server.consultenv_environment import ConsultEnvEnvironment as Env
from models import ConsultAction
# ─── helpers ──────────────────────────────────────────────────────────────────
def run_module(env, module, params_0=None, params_1=None, params_2=None):
"""Run all 3 sub-tasks of a module, optionally with per-sub-task params.
For secondary, defaults to in_house path so all 3 sub-tasks execute."""
if module == "secondary" and params_0 is None:
params_0 = {"method": "in_house"}
obs = env.step(ConsultAction(action_type=module, parameters=params_0 or {}))
# Fast path (vendor/offshore) completes secondary in 1 step β€” skip ST1/ST2
if module == "secondary" and obs.module_progress.get("secondary", 0) >= 3:
return obs
obs = env.step(ConsultAction(action_type=module, parameters=params_1 or {}))
obs = env.step(ConsultAction(action_type=module, parameters=params_2 or {}))
return obs
# ─── team costs ───────────────────────────────────────────────────────────────
class TestTeamCosts(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_minimal_team_cost(self):
obs = self.env.reset("benchmarking_study")
obs = self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True}))
self.assertIn("Partner", obs.team.roles)
self.assertIn("Manager", obs.team.roles)
self.assertEqual(len(obs.team.roles), 2)
def test_full_team_cost(self):
obs = self.env.reset("benchmarking_study")
obs = self.env.step(ConsultAction(action_type="staff_team", parameters={
"partner": True, "manager": True,
"consultant": True, "assoc_consultant": True, "associate": True,
"offshore_analyst": True, "industry_expert": True
}))
self.assertEqual(len(obs.team.roles), 7)
self.assertGreater(obs.team.total_cost, 0)
def test_team_cost_increases_with_roles(self):
self.env.reset("benchmarking_study")
obs1 = self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
cost1 = obs1.team.total_cost
self.env.reset("benchmarking_study")
obs2 = self.env.step(ConsultAction(action_type="staff_team", parameters={
"partner": True, "manager": True, "associate": True, "consultant": True
}))
self.assertGreater(obs2.team.total_cost, cost1)
# ─── module execution ─────────────────────────────────────────────────────────
class TestModuleExecution(unittest.TestCase):
def setUp(self):
self.env = Env()
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
def test_secondary_sub_task_0(self):
obs = self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertIsNotNone(obs.latest_output)
self.assertEqual(obs.latest_output.sub_task, "research_scoping")
self.assertGreater(obs.latest_output.quality, 0)
def test_secondary_ibisworld_at_sub_task_1(self):
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs = self.env.step(ConsultAction(action_type="secondary", parameters={"data_source": "ibisworld"}))
self.assertEqual(obs.latest_output.sub_task, "desk_research")
self.assertGreater(obs.latest_output.quality, 0.5)
def test_secondary_bloomberg_at_sub_task_1(self):
self.env.reset("commercial_due_diligence")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs = self.env.step(ConsultAction(action_type="secondary", parameters={"data_source": "bloomberg"}))
self.assertGreater(obs.latest_output.quality, 0.6)
self.assertGreater(obs.latest_output.external_cost, 10000)
def test_benchmarking_after_secondary_complete(self):
run_module(self.env, "secondary")
obs = self.env.step(ConsultAction(action_type="benchmarking", parameters={}))
self.assertIsNotNone(obs.latest_output)
self.assertEqual(obs.latest_output.sub_task, "peer_set_definition")
def test_qc_boost_at_sub_task_2(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs_no_qc = self.env.step(ConsultAction(action_type="secondary", parameters={"qc": False}))
q_no = obs_no_qc.latest_output.quality
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs_qc = self.env.step(ConsultAction(action_type="secondary", parameters={"qc": True}))
q_yes = obs_qc.latest_output.quality
self.assertGreater(q_yes, q_no)
# ─── speed multipliers ────────────────────────────────────────────────────────
class TestSpeedMultipliers(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_associate_speeds_secondary(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True}))
obs1 = self.env.step(ConsultAction(action_type="secondary", parameters={}))
d1 = obs1.latest_output.days_consumed
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
obs2 = self.env.step(ConsultAction(action_type="secondary", parameters={}))
d2 = obs2.latest_output.days_consumed
self.assertLess(d2, d1)
def test_workshop_isolation(self):
self.env.reset("ops_transformation")
self.env.step(ConsultAction(action_type="staff_team", parameters={
"partner": True, "manager": True, "consultant": True, "assoc_consultant": True, "associate": True
}))
for mod in ["secondary", "interviews", "benchmarking", "data_modelling", "insight_gen", "presentation"]:
run_module(self.env, mod)
obs = self.env.step(ConsultAction(action_type="workshops", parameters={}))
self.assertIsNotNone(obs.latest_output)
# ─── quality boosts ───────────────────────────────────────────────────────────
class TestQualityBoosts(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_associate_boosts_benchmarking(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True}))
run_module(self.env, "secondary")
obs1 = self.env.step(ConsultAction(action_type="benchmarking", parameters={}))
q1 = obs1.latest_output.quality
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
obs2 = self.env.step(ConsultAction(action_type="benchmarking", parameters={}))
q2 = obs2.latest_output.quality
self.assertGreater(q2, q1)
# ─── cascade ──────────────────────────────────────────────────────────────────
class TestCascade(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_cross_module_cascade_affects_downstream(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary", params_1={"data_source": "bloomberg"})
obs = self.env.step(ConsultAction(action_type="benchmarking", parameters={}))
self.assertIsNotNone(obs.latest_output)
def test_within_module_cascade_propagates(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs1 = self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertIsNotNone(obs1.latest_output)
self.assertGreater(obs1.latest_output.quality, 0)
# ─── workshops ────────────────────────────────────────────────────────────────
class TestWorkshops(unittest.TestCase):
def setUp(self):
self.env = Env()
def _run_to_workshops_st1(self, team_params, ws_st1_params):
self.env.reset("ops_transformation")
self.env.step(ConsultAction(action_type="staff_team", parameters=team_params))
for mod in ["secondary", "interviews", "benchmarking", "data_modelling", "insight_gen", "presentation"]:
run_module(self.env, mod)
self.env.step(ConsultAction(action_type="workshops", parameters={}))
return self.env.step(ConsultAction(action_type="workshops", parameters=ws_st1_params))
def test_coach_boosts_workshop(self):
obs1 = self._run_to_workshops_st1({}, {})
q1 = obs1.latest_output.quality
obs2 = self._run_to_workshops_st1({}, {"facilitator": "agile_coach"})
q2 = obs2.latest_output.quality
self.assertGreater(q2, q1)
def test_expert_boosts_workshop(self):
obs1 = self._run_to_workshops_st1({}, {})
q1 = obs1.latest_output.quality
obs2 = self._run_to_workshops_st1({"industry_expert": True}, {})
q2 = obs2.latest_output.quality
self.assertGreater(q2, q1)
# ─── discovery ────────────────────────────────────────────────────────────────
class TestDiscovery(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_discovery_with_interviews(self):
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
self.env.step(ConsultAction(action_type="interviews", parameters={}))
self.env.step(ConsultAction(action_type="interviews", parameters={
"interview_count": 8, "senior_ratio": 0.75
}))
obs = self.env.step(ConsultAction(action_type="interviews", parameters={}))
self.assertTrue(obs.discovery_found)
def test_no_discovery_low_seniors(self):
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
self.env.step(ConsultAction(action_type="interviews", parameters={}))
self.env.step(ConsultAction(action_type="interviews", parameters={
"interview_count": 4, "senior_ratio": 0.25
}))
obs = self.env.step(ConsultAction(action_type="interviews", parameters={}))
self.assertFalse(obs.discovery_found)
# ─── sequencing ───────────────────────────────────────────────────────────────
class TestSequencing(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_correct_order_rewarded(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
obs = self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertGreater(obs.reward, 0)
def test_dep_violation_penalizes(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
obs = self.env.step(ConsultAction(action_type="benchmarking", parameters={}))
breakdown = obs.pipeline_history[-1].reward_breakdown
self.assertTrue(breakdown.get("dependency_violation", False))
# ─── full episodes ────────────────────────────────────────────────────────────
class TestFullEpisodes(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_benchmarking_easy(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary", params_1={"data_source": "ibisworld"}, params_2={"qc": True})
run_module(self.env, "benchmarking")
run_module(self.env, "insight_gen")
obs = run_module(self.env, "presentation")
self.assertTrue(obs.done)
self.assertGreater(obs.total_reward, 0.5)
def test_cost_optimization_medium(self):
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "assoc_consultant": True, "associate": True}))
run_module(self.env, "secondary", params_1={"data_source": "ibisworld"})
run_module(self.env, "interviews",
params_1={"interview_count": 8, "senior_ratio": 0.75},
params_2={"qc": True})
run_module(self.env, "benchmarking")
run_module(self.env, "data_modelling", params_1={"tool": "alteryx"})
run_module(self.env, "insight_gen", params_1={"insight_method": "ai_assisted"})
obs = run_module(self.env, "presentation")
self.assertTrue(obs.done)
self.assertGreater(obs.total_reward, 0.3)
def test_ops_hard(self):
self.env.reset("ops_transformation")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "assoc_consultant": True, "associate": True}))
run_module(self.env, "secondary", params_1={"data_source": "ibisworld"})
run_module(self.env, "interviews", params_1={"interview_count": 8, "senior_ratio": 0.5}, params_2={"qc": True})
run_module(self.env, "benchmarking")
run_module(self.env, "data_modelling")
run_module(self.env, "insight_gen")
run_module(self.env, "presentation")
obs = run_module(self.env, "workshops", params_1={"facilitator": "agile_coach"}, params_2={"qc": True})
self.assertTrue(obs.done)
self.assertGreater(obs.total_reward, 0)
def test_cdd_expert(self):
self.env.reset("commercial_due_diligence")
self.env.step(ConsultAction(action_type="staff_team", parameters={
"partner": True, "manager": True, "industry_expert": True, "consultant": True, "assoc_consultant": True, "associate": True
}))
run_module(self.env, "secondary", params_1={"data_source": "bloomberg"}, params_2={"qc": True})
run_module(self.env, "interviews", params_1={"interview_count": 8, "senior_ratio": 0.5}, params_2={"qc": True})
run_module(self.env, "benchmarking")
run_module(self.env, "data_modelling")
run_module(self.env, "insight_gen")
run_module(self.env, "presentation")
obs = run_module(self.env, "workshops", params_1={"facilitator": "agile_coach"}, params_2={"qc": True})
self.assertTrue(obs.done)
self.assertGreater(obs.total_reward, 0)
# ─── determinism ──────────────────────────────────────────────────────────────
class TestDeterminism(unittest.TestCase):
def test_same_actions_same_scores(self):
env = Env()
scores = []
for _ in range(2):
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(env, "secondary", params_1={"data_source": "ibisworld"}, params_2={"qc": True})
run_module(env, "benchmarking")
run_module(env, "insight_gen")
obs = run_module(env, "presentation")
scores.append(obs.total_reward)
self.assertAlmostEqual(scores[0], scores[1], places=3)
# ─── error handling ───────────────────────────────────────────────────────────
class TestErrorHandling(unittest.TestCase):
def test_module_before_staff(self):
env = Env()
env.reset("benchmarking_study")
with self.assertRaises(ValueError):
env.step(ConsultAction(action_type="secondary", parameters={}))
def test_double_staff(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
with self.assertRaises(ValueError):
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
def test_invalid_module(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
with self.assertRaises(ValueError):
env.step(ConsultAction(action_type="workshops", parameters={}))
def test_duplicate_module_after_completion(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(env, "secondary")
with self.assertRaises(ValueError):
env.step(ConsultAction(action_type="secondary", parameters={}))
def test_skip_non_optional_raises(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
with self.assertRaises(ValueError):
env.step(ConsultAction(action_type="secondary", parameters={"skip": True}))
def test_step_after_done(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
for mod in ["secondary", "benchmarking", "insight_gen", "presentation"]:
run_module(env, mod)
with self.assertRaises(RuntimeError):
env.step(ConsultAction(action_type="secondary", parameters={}))
def test_bad_scenario(self):
env = Env()
with self.assertRaises(ValueError):
env.reset("nonexistent")
# ─── reward range ─────────────────────────────────────────────────────────────
class TestRewardRange(unittest.TestCase):
def test_rewards_in_range(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
for mod in ["secondary", "benchmarking", "insight_gen", "presentation"]:
for _ in range(3):
obs = env.step(ConsultAction(action_type=mod, parameters={}))
self.assertGreater(obs.reward, 0)
self.assertLess(obs.reward, 1)
self.assertGreater(obs.total_reward, 0)
self.assertLess(obs.total_reward, 1)
# ─── data modelling tools ─────────────────────────────────────────────────────
class TestDataModellingTools(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_alteryx_faster(self):
# sub-task 1 is where the tool param applies
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
run_module(self.env, "interviews")
self.env.step(ConsultAction(action_type="data_modelling", parameters={}))
obs1 = self.env.step(ConsultAction(action_type="data_modelling", parameters={"tool": "excel"}))
d1 = obs1.latest_output.days_consumed
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
run_module(self.env, "interviews")
self.env.step(ConsultAction(action_type="data_modelling", parameters={}))
obs2 = self.env.step(ConsultAction(action_type="data_modelling", parameters={"tool": "alteryx"}))
d2 = obs2.latest_output.days_consumed
self.assertLess(d2, d1)
# ─── budget nuclear ───────────────────────────────────────────────────────────
class TestBudgetNuclear(unittest.TestCase):
def test_budget_exceeded_penalty(self):
env = Env()
env.reset("benchmarking_study")
env.step(ConsultAction(action_type="staff_team", parameters={
"partner": True, "manager": True, "consultant": True, "assoc_consultant": True, "associate": True,
"offshore_analyst": True, "industry_expert": True
}))
for mod in ["secondary", "benchmarking", "insight_gen", "presentation"]:
for i in range(3):
params = {"data_source": "bloomberg"} if mod == "secondary" and i == 1 else {}
obs = env.step(ConsultAction(action_type=mod, parameters=params))
self.assertTrue(obs.done)
# ─── sub-task cascade ─────────────────────────────────────────────────────────
class TestSubTaskCascade(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_within_module_cascade_active(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
self.env.step(ConsultAction(action_type="secondary", parameters={}))
obs1 = self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertIsNotNone(obs1.latest_output)
self.assertGreater(obs1.latest_output.quality, 0)
def test_skip_tanks_within_module_cascade(self):
team = {"associate": True, "assoc_consultant": True}
# Path A: skip workshops ST0 (pre_read_and_agenda)
self.env.reset("ops_transformation")
self.env.step(ConsultAction(action_type="staff_team", parameters=team))
for mod in ["secondary", "interviews", "benchmarking", "data_modelling", "insight_gen", "presentation"]:
p1 = {"data_source": "ibisworld"} if mod == "secondary" else None
run_module(self.env, mod, params_1=p1)
self.env.step(ConsultAction(action_type="workshops", parameters={"skip": True}))
obs_fac_skip = self.env.step(ConsultAction(action_type="workshops", parameters={}))
q_skip = obs_fac_skip.latest_output.quality
# Path B: full run (ST0 runs, quality > 0.35 so cascade diverges)
self.env.reset("ops_transformation")
self.env.step(ConsultAction(action_type="staff_team", parameters=team))
for mod in ["secondary", "interviews", "benchmarking", "data_modelling", "insight_gen", "presentation"]:
p1 = {"data_source": "ibisworld"} if mod == "secondary" else None
run_module(self.env, mod, params_1=p1)
self.env.step(ConsultAction(action_type="workshops", parameters={}))
obs_fac_full = self.env.step(ConsultAction(action_type="workshops", parameters={}))
q_full = obs_fac_full.latest_output.quality
self.assertGreater(q_full, q_skip)
# ─── optional skip ────────────────────────────────────────────────────────────
class TestOptionalSkip(unittest.TestCase):
def setUp(self):
self.env = Env()
def test_skip_presentation_internal_review(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
run_module(self.env, "benchmarking")
run_module(self.env, "insight_gen")
self.env.step(ConsultAction(action_type="presentation", parameters={}))
self.env.step(ConsultAction(action_type="presentation", parameters={}))
obs = self.env.step(ConsultAction(action_type="presentation", parameters={"skip": True}))
self.assertTrue(obs.done)
self.assertLess(self.env._state["module_qualities"]["presentation"], 0.8)
def test_skip_scenario_and_sensitivity(self):
self.env.reset("cost_optimization")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True, "associate": True}))
run_module(self.env, "secondary")
run_module(self.env, "interviews")
self.env.step(ConsultAction(action_type="data_modelling", parameters={}))
self.env.step(ConsultAction(action_type="data_modelling", parameters={"tool": "alteryx"}))
obs = self.env.step(ConsultAction(action_type="data_modelling", parameters={"skip": True}))
self.assertEqual(obs.module_progress["data_modelling"], 3)
self.assertIn("data_modelling", self.env._state["completed_modules"])
def test_module_progress_advances(self):
self.env.reset("benchmarking_study")
self.env.step(ConsultAction(action_type="staff_team", parameters={"partner": True, "manager": True}))
self.assertEqual(self.env._state["module_progress"]["secondary"], 0)
self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertEqual(self.env._state["module_progress"]["secondary"], 1)
self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertEqual(self.env._state["module_progress"]["secondary"], 2)
self.env.step(ConsultAction(action_type="secondary", parameters={}))
self.assertEqual(self.env._state["module_progress"]["secondary"], 3)
self.assertIn("secondary", self.env._state["completed_modules"])
if __name__ == "__main__":
unittest.main(verbosity=2)