{
  "evaluation_type": "comprehensive_mcp_test",
  "total_models_tested": 9,
  "successful_models": 5,
  "results": [
    {
      "name": "Qwen2.5-0.5B",
      "file_size_mb": 408.8689880371094,
      "avg_score": 2.6,
      "avg_time": 2.6360722541809083,
      "efficiency": 0.9863159084036122,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 0.31192469596862793,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 0.18253064155578613,
          "response": ""
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 7,
          "time": 4.232211351394653,
          "response": "Where tool_name is the name of the tool used and args is a dictionary containing the arguments for t..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 4.225749492645264,
          "response": "Where tool_name is the name of the tool and args is a dictionary with the arguments. If no arguments..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 4,
          "time": 4.22794508934021,
          "response": "where tool_name is the name of the tool and args is a dictionary containing the arguments. If no too..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-0.6B",
      "file_size_mb": 609.8238830566406,
      "avg_score": 2.8,
      "avg_time": 8.223706769943238,
      "efficiency": 0.3404790659892809,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 7,
          "time": 8.638539791107178,
          "response": ".\n\nIf the user command is not supported by the available tools, respond with an empty array.\n\nNow, t..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 8.075484991073608,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON correctly.\n\nIf the command is not poss..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 7.951770067214966,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON correctly.\n\nNow, the game state is as..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 8.252855062484741,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON properly.\n\nNow, the user is in a game..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 8.199883937835693,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON.\n\nIf the command is not possible, retu..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Gemma-3-270M",
      "file_size_mb": 428.0401306152344,
      "avg_score": 0.0,
      "avg_time": 0.16690435409545898,
      "efficiency": 0.0,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 0.2941462993621826,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 0.13967180252075195,
          "response": ""
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 0.1264328956604004,
          "response": ""
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 0,
          "time": 0.14153170585632324,
          "response": ""
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 0.13273906707763672,
          "response": ""
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-1.7B",
      "file_size_mb": 1007.8267211914062,
      "avg_score": 3.0,
      "avg_time": 13.003729963302613,
      "efficiency": 0.23070303739513193,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 2,
          "time": 12.862720251083374,
          "response": ".\n\nYou must use the JSON format specified, without any additional text or explanation. The JSON must..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 2,
          "time": 12.972241401672363,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe game state is as follows:\n-..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 3,
          "time": 13.497555255889893,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe tool to use is attack_unit...."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 12.513315677642822,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe correct tool name is \"build..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 3,
          "time": 13.17281723022461,
          "response": ".\n\nYou can use the following tool definitions:\n- move_units: move units to a new position\n- attack_u..."
        }
      ],
      "type": "general"
    },
    {
      "name": "MCP-Instruct-v1",
      "file_size_mb": 697.0347290039062,
      "avg_score": 0.0,
      "avg_time": 0.1320805072784424,
      "efficiency": 0.0,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 0.6604025363922119,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 0,
          "error": "llama_decode returned -1"
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 0,
          "error": "llama_decode returned -1"
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 0,
          "time": 0,
          "error": "llama_decode returned -1"
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 0,
          "error": "llama_decode returned -1"
        }
      ],
      "type": "mcp_specialized"
    },
    {
      "name": "MCPR L-3B-Exa",
      "file_size_mb": 1215.7023620605469,
      "avg_score": 0.0,
      "avg_time": 22.14646472930908,
      "efficiency": 0.0,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 22.817347049713135,
          "response": "+\\),),“), and“““““““““““““““““““““““““”“”““““““““““”“““““““““““““““““““““““““““““““““““““““initializ..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 21.51675510406494,
          "response": "+\\),),“),3“”“”“”“),),““““““““““”“),),),), and“),),), and@@ the Sty□ Sty□ Sty□ Sty□ Sty□ Sty□ Sty□ St..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 22.118958473205566,
          "response": "+\\),),+\\),), and““”““““““““““““““““““““”“““““”“”“““““““““““““““““““““”“”““”““”““““““““““““““““““““““..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 0,
          "time": 22.297714471817017,
          "response": "+\\),),“), and@@ the Sty mini mini mini mini mini mini mini mini mini the““““”“),),+\\),),), and“),),)..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 21.98154854774475,
          "response": "and@@ Sty@@patterns@@ mini@@ Sty@@ Sty mini mini mini mini mini mini mini mini mini the“““““““““”“““..."
        }
      ],
      "type": "mcp_specialized"
    },
    {
      "name": "Gemma-3n-E2B-it",
      "file_size_mb": 1958.3001403808594,
      "avg_score": 0.0,
      "avg_time": 1.5714858055114747,
      "efficiency": 0.0,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 3.1773452758789062,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 1.1669323444366455,
          "response": ""
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 1.1747264862060547,
          "response": ""
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 0,
          "time": 1.2873260974884033,
          "response": ""
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 1.0510988235473633,
          "response": ""
        }
      ],
      "type": "mcp_specialized"
    },
    {
      "name": "Llama-Breeze2-3B",
      "file_size_mb": 1424.04345703125,
      "avg_score": 3.6,
      "avg_time": 14.693956804275512,
      "efficiency": 0.24499867856917243,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 3,
          "time": 3.5608396530151367,
          "response": "[get_game_state()]"
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 3,
          "time": 5.626140356063843,
          "response": "[move_units(unit_ids='infantry', target_x='150', target_y='200')]"
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 23.91610813140869,
          "response": "The tool used is \"get_game_state\" and the tool name is \"get_game_state\". The args for this tool is a..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 23.745216846466064,
          "response": "{\n  \"tool\": \"build_building\",\n  \"args\": {\n    \"building_type\": \"power plant\",\n    \"position_x\": 100,..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 2,
          "time": 16.621479034423828,
          "response": "The game is ready for action. You are the AI, and you are ready to assist. You have the ability to m..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen2.5-Coder-0.5B",
      "file_size_mb": 408.8690185546875,
      "avg_score": 4.4,
      "avg_time": 4.1166441440582275,
      "efficiency": 1.0688317585941343,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 5,
          "time": 4.25421667098999,
          "response": ".\n\nHere's a possible response:\n\n{\"tool\": \"get_game_state\", \"args\": {\"game_state\": {\"units\": [{\"id\": ..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 4.333646059036255,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 4.139528274536133,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 3.9508562088012695,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a JSON object containing the arguments ..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 5,
          "time": 3.9049735069274902,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        }
      ],
      "type": "code_specialized"
    }
  ],
  "ranking_by_score": [
    {
      "name": "Qwen2.5-Coder-0.5B",
      "file_size_mb": 408.8690185546875,
      "avg_score": 4.4,
      "avg_time": 4.1166441440582275,
      "efficiency": 1.0688317585941343,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 5,
          "time": 4.25421667098999,
          "response": ".\n\nHere's a possible response:\n\n{\"tool\": \"get_game_state\", \"args\": {\"game_state\": {\"units\": [{\"id\": ..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 4.333646059036255,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 4.139528274536133,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 3.9508562088012695,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a JSON object containing the arguments ..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 5,
          "time": 3.9049735069274902,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        }
      ],
      "type": "code_specialized"
    },
    {
      "name": "Llama-Breeze2-3B",
      "file_size_mb": 1424.04345703125,
      "avg_score": 3.6,
      "avg_time": 14.693956804275512,
      "efficiency": 0.24499867856917243,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 3,
          "time": 3.5608396530151367,
          "response": "[get_game_state()]"
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 3,
          "time": 5.626140356063843,
          "response": "[move_units(unit_ids='infantry', target_x='150', target_y='200')]"
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 23.91610813140869,
          "response": "The tool used is \"get_game_state\" and the tool name is \"get_game_state\". The args for this tool is a..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 23.745216846466064,
          "response": "{\n  \"tool\": \"build_building\",\n  \"args\": {\n    \"building_type\": \"power plant\",\n    \"position_x\": 100,..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 2,
          "time": 16.621479034423828,
          "response": "The game is ready for action. You are the AI, and you are ready to assist. You have the ability to m..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-1.7B",
      "file_size_mb": 1007.8267211914062,
      "avg_score": 3.0,
      "avg_time": 13.003729963302613,
      "efficiency": 0.23070303739513193,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 2,
          "time": 12.862720251083374,
          "response": ".\n\nYou must use the JSON format specified, without any additional text or explanation. The JSON must..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 2,
          "time": 12.972241401672363,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe game state is as follows:\n-..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 3,
          "time": 13.497555255889893,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe tool to use is attack_unit...."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 12.513315677642822,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe correct tool name is \"build..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 3,
          "time": 13.17281723022461,
          "response": ".\n\nYou can use the following tool definitions:\n- move_units: move units to a new position\n- attack_u..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-0.6B",
      "file_size_mb": 609.8238830566406,
      "avg_score": 2.8,
      "avg_time": 8.223706769943238,
      "efficiency": 0.3404790659892809,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 7,
          "time": 8.638539791107178,
          "response": ".\n\nIf the user command is not supported by the available tools, respond with an empty array.\n\nNow, t..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 8.075484991073608,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON correctly.\n\nIf the command is not poss..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 7.951770067214966,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON correctly.\n\nNow, the game state is as..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 8.252855062484741,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON properly.\n\nNow, the user is in a game..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 8.199883937835693,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON.\n\nIf the command is not possible, retu..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen2.5-0.5B",
      "file_size_mb": 408.8689880371094,
      "avg_score": 2.6,
      "avg_time": 2.6360722541809083,
      "efficiency": 0.9863159084036122,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 0.31192469596862793,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 0.18253064155578613,
          "response": ""
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 7,
          "time": 4.232211351394653,
          "response": "Where tool_name is the name of the tool used and args is a dictionary containing the arguments for t..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 4.225749492645264,
          "response": "Where tool_name is the name of the tool and args is a dictionary with the arguments. If no arguments..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 4,
          "time": 4.22794508934021,
          "response": "where tool_name is the name of the tool and args is a dictionary containing the arguments. If no too..."
        }
      ],
      "type": "general"
    }
  ],
  "ranking_by_efficiency": [
    {
      "name": "Qwen2.5-Coder-0.5B",
      "file_size_mb": 408.8690185546875,
      "avg_score": 4.4,
      "avg_time": 4.1166441440582275,
      "efficiency": 1.0688317585941343,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 5,
          "time": 4.25421667098999,
          "response": ".\n\nHere's a possible response:\n\n{\"tool\": \"get_game_state\", \"args\": {\"game_state\": {\"units\": [{\"id\": ..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 4.333646059036255,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 4.139528274536133,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 3.9508562088012695,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a JSON object containing the arguments ..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 5,
          "time": 3.9049735069274902,
          "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
        }
      ],
      "type": "code_specialized"
    },
    {
      "name": "Qwen2.5-0.5B",
      "file_size_mb": 408.8689880371094,
      "avg_score": 2.6,
      "avg_time": 2.6360722541809083,
      "efficiency": 0.9863159084036122,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 0,
          "time": 0.31192469596862793,
          "response": ""
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 0,
          "time": 0.18253064155578613,
          "response": ""
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 7,
          "time": 4.232211351394653,
          "response": "Where tool_name is the name of the tool used and args is a dictionary containing the arguments for t..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 4.225749492645264,
          "response": "Where tool_name is the name of the tool and args is a dictionary with the arguments. If no arguments..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 4,
          "time": 4.22794508934021,
          "response": "where tool_name is the name of the tool and args is a dictionary containing the arguments. If no too..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-0.6B",
      "file_size_mb": 609.8238830566406,
      "avg_score": 2.8,
      "avg_time": 8.223706769943238,
      "efficiency": 0.3404790659892809,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 7,
          "time": 8.638539791107178,
          "response": ".\n\nIf the user command is not supported by the available tools, respond with an empty array.\n\nNow, t..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 5,
          "time": 8.075484991073608,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON correctly.\n\nIf the command is not poss..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 0,
          "time": 7.951770067214966,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON correctly.\n\nNow, the game state is as..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 2,
          "time": 8.252855062484741,
          "response": ".\n\nMake sure to use the correct tool names and format the JSON properly.\n\nNow, the user is in a game..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 0,
          "time": 8.199883937835693,
          "response": ".\n\nMake sure to use the correct tool name and format the JSON.\n\nIf the command is not possible, retu..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Llama-Breeze2-3B",
      "file_size_mb": 1424.04345703125,
      "avg_score": 3.6,
      "avg_time": 14.693956804275512,
      "efficiency": 0.24499867856917243,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 3,
          "time": 3.5608396530151367,
          "response": "[get_game_state()]"
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 3,
          "time": 5.626140356063843,
          "response": "[move_units(unit_ids='infantry', target_x='150', target_y='200')]"
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 5,
          "time": 23.91610813140869,
          "response": "The tool used is \"get_game_state\" and the tool name is \"get_game_state\". The args for this tool is a..."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 23.745216846466064,
          "response": "{\n  \"tool\": \"build_building\",\n  \"args\": {\n    \"building_type\": \"power plant\",\n    \"position_x\": 100,..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 2,
          "time": 16.621479034423828,
          "response": "The game is ready for action. You are the AI, and you are ready to assist. You have the ability to m..."
        }
      ],
      "type": "general"
    },
    {
      "name": "Qwen3-1.7B",
      "file_size_mb": 1007.8267211914062,
      "avg_score": 3.0,
      "avg_time": 13.003729963302613,
      "efficiency": 0.23070303739513193,
      "results": [
        {
          "test": "Commande simple",
          "difficulty": "easy",
          "score": 2,
          "time": 12.862720251083374,
          "response": ".\n\nYou must use the JSON format specified, without any additional text or explanation. The JSON must..."
        },
        {
          "test": "Action avec coordonnées",
          "difficulty": "easy",
          "score": 2,
          "time": 12.972241401672363,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe game state is as follows:\n-..."
        },
        {
          "test": "Attaque spécifique",
          "difficulty": "medium",
          "score": 3,
          "time": 13.497555255889893,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe tool to use is attack_unit...."
        },
        {
          "test": "Construction",
          "difficulty": "medium",
          "score": 5,
          "time": 12.513315677642822,
          "response": ".\n\nYou must use the correct tool name and format the JSON properly.\n\nThe correct tool name is \"build..."
        },
        {
          "test": "Commande complexe",
          "difficulty": "hard",
          "score": 3,
          "time": 13.17281723022461,
          "response": ".\n\nYou can use the following tool definitions:\n- move_units: move units to a new position\n- attack_u..."
        }
      ],
      "type": "general"
    }
  ],
  "best_overall": {
    "name": "Qwen2.5-Coder-0.5B",
    "file_size_mb": 408.8690185546875,
    "avg_score": 4.4,
    "avg_time": 4.1166441440582275,
    "efficiency": 1.0688317585941343,
    "results": [
      {
        "test": "Commande simple",
        "difficulty": "easy",
        "score": 5,
        "time": 4.25421667098999,
        "response": ".\n\nHere's a possible response:\n\n{\"tool\": \"get_game_state\", \"args\": {\"game_state\": {\"units\": [{\"id\": ..."
      },
      {
        "test": "Action avec coordonnées",
        "difficulty": "easy",
        "score": 5,
        "time": 4.333646059036255,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      },
      {
        "test": "Attaque spécifique",
        "difficulty": "medium",
        "score": 5,
        "time": 4.139528274536133,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      },
      {
        "test": "Construction",
        "difficulty": "medium",
        "score": 2,
        "time": 3.9508562088012695,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a JSON object containing the arguments ..."
      },
      {
        "test": "Commande complexe",
        "difficulty": "hard",
        "score": 5,
        "time": 3.9049735069274902,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      }
    ],
    "type": "code_specialized"
  },
  "most_efficient": {
    "name": "Qwen2.5-Coder-0.5B",
    "file_size_mb": 408.8690185546875,
    "avg_score": 4.4,
    "avg_time": 4.1166441440582275,
    "efficiency": 1.0688317585941343,
    "results": [
      {
        "test": "Commande simple",
        "difficulty": "easy",
        "score": 5,
        "time": 4.25421667098999,
        "response": ".\n\nHere's a possible response:\n\n{\"tool\": \"get_game_state\", \"args\": {\"game_state\": {\"units\": [{\"id\": ..."
      },
      {
        "test": "Action avec coordonnées",
        "difficulty": "easy",
        "score": 5,
        "time": 4.333646059036255,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      },
      {
        "test": "Attaque spécifique",
        "difficulty": "medium",
        "score": 5,
        "time": 4.139528274536133,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      },
      {
        "test": "Construction",
        "difficulty": "medium",
        "score": 2,
        "time": 3.9508562088012695,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a JSON object containing the arguments ..."
      },
      {
        "test": "Commande complexe",
        "difficulty": "hard",
        "score": 5,
        "time": 3.9049735069274902,
        "response": "where \"tool_name\" is the name of the tool used and \"args\" is a dictionary containing the arguments p..."
      }
    ],
    "type": "code_specialized"
  }
}