[ { "name": "Qwen2.5-0.5B", "avg_score": 3.0, "avg_time": 3.1588314941951205, "results": [ { "command": "show game state", "score": 4, "time": 4.286122560501099, "response": "Here is the response in JSON format:\n\n```json\n{\n " }, { "command": "move infantry to 100,200", "score": 4, "time": 4.499698638916016, "response": "In this case, the tool is \"move_units\" and the arg" }, { "command": "attack enemy tank at 300,150", "score": 5, "time": 4.139246940612793, "response": "In this case, the tool is \"attack_unit\" and the ar" }, { "command": "build barracks near 250,200", "score": 3, "time": 2.345900774002075, "response": ".\n```json\n{\n \"tool\": \"build_building\",\n \"args\": " }, { "command": "defend base with all units", "score": 1, "time": 1.6794795989990234, "response": ".\n```json\n{\n \"tool\": \"move_units\",\n \"args\": {\n " }, { "command": "harvest resources with harvester", "score": 2, "time": 1.0160574913024902, "response": ".\n```json\n{\n \"tool\": \"get_game_state\",\n \"args\": " }, { "command": "scout enemy positions", "score": 2, "time": 4.145314455032349, "response": "In this scenario, the user command is \"scout enemy" } ] }, { "name": "Qwen3-0.6B", "avg_score": 2.142857142857143, "avg_time": 7.839082138878958, "results": [ { "command": "show game state", "score": 4, "time": 8.01675820350647, "response": "If the user command is not available, respond with" }, { "command": "move infantry to 100,200", "score": 3, "time": 7.884659290313721, "response": "{\n \"tool\": \"move_units\",\n \"args\": {\n \"unit_id" }, { "command": "attack enemy tank at 300,150", "score": 3, "time": 7.239688873291016, "response": "{\n \"tool\": \"attack_unit\",\n \"args\": {\n \"attack" }, { "command": "build barracks near 250,200", "score": 1, "time": 7.6789350509643555, "response": "The correct answer is {\"tool\": \"build\", \"args\": {\"" }, { "command": "defend base with all units", "score": 1, "time": 8.047882318496704, "response": "Possible tool names: build, get_game_state, move, " }, { "command": "harvest resources with harvester", "score": 1, "time": 8.035732746124268, "response": "The correct answer is {\"tool\": \"harvest\", \"args\": " }, { "command": "scout enemy positions", "score": 2, "time": 7.969918489456177, "response": "The user's command is \"scout enemy positions\". Wha" } ] }, { "name": "Gemma-3-270M", "avg_score": 1.4285714285714286, "avg_time": 4.817336320877075, "results": [ { "command": "show game state", "score": 1, "time": 4.833978652954102, "response": "- show game state\n- move_units(unit_ids, target_x," }, { "command": "move infantry to 100,200", "score": 2, "time": 4.697685718536377, "response": "```\n`move infantry to 100,200`\n\n**Explanation:**\n\n" }, { "command": "attack enemy tank at 300,150", "score": 3, "time": 4.781869649887085, "response": "```\n**Explanation:**\n\n* `get_game_state()`: Retrie" }, { "command": "build barracks near 250,200", "score": 3, "time": 4.599961996078491, "response": "```\n`build barracks near 250,200`\n```\nJSON:\n```\n{\"" }, { "command": "defend base with all units", "score": 0, "time": 4.869190216064453, "response": "- `tool_name` is a string\n- `args` is a dictionary" }, { "command": "harvest resources with harvester", "score": 0, "time": 4.996307849884033, "response": "```\n- `harvest resources with harvester`\n- `harves" }, { "command": "scout enemy positions", "score": 1, "time": 4.942360162734985, "response": "```\n`scout enemy positions`\n- `tool`: \"scout\"\n- `a" } ] } ]