{ "all_results": [ { "name": "Qwen2.5-0.5B", "avg_score": 4.0, "avg_time": 3.52385942141215, "efficiency": 1.1351190617011169, "tests": [ { "test": "Commande simple", "score": 7, "time": 3.418940305709839, "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.8486745357513428, "response": "La commande \"move_units\" est utilisée pour déplace..." }, { "test": "Vitesse de réponse", "score": 0, "time": 3.3039634227752686, "response": ", je vais faire une tâche de base. Je vais essayer..." } ] }, { "name": "Qwen3-0.6B", "avg_score": 6.0, "avg_time": 6.404076337814331, "efficiency": 0.936903260283084, "tests": [ { "test": "Commande simple", "score": 7, "time": 6.516923427581787, "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." }, { "test": "Action avec paramètres", "score": 7, "time": 6.65591287612915, "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." }, { "test": "Vitesse de réponse", "score": 4, "time": 6.039392709732056, "response": ", but not too much. The user is asking for a respo..." } ] }, { "name": "Gemma-3-1B", "avg_score": 4.0, "avg_time": 6.960511525472005, "efficiency": 0.5746704082540475, "tests": [ { "test": "Commande simple", "score": 7, "time": 7.20223069190979, "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." }, { "test": "Action avec paramètres", "score": 5, "time": 6.998988628387451, "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." }, { "test": "Vitesse de réponse", "score": 0, "time": 6.680315256118774, "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." } ] }, { "name": "Gemma-3-270M", "avg_score": 4.666666666666667, "avg_time": 3.6990818977355957, "efficiency": 1.2615743029434903, "tests": [ { "test": "Commande simple", "score": 5, "time": 3.697866201400757, "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.690243721008301, "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." }, { "test": "Vitesse de réponse", "score": 4, "time": 3.7091357707977295, "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." } ] } ], "successful_models": [ { "name": "Qwen2.5-0.5B", "avg_score": 4.0, "avg_time": 3.52385942141215, "efficiency": 1.1351190617011169, "tests": [ { "test": "Commande simple", "score": 7, "time": 3.418940305709839, "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.8486745357513428, "response": "La commande \"move_units\" est utilisée pour déplace..." }, { "test": "Vitesse de réponse", "score": 0, "time": 3.3039634227752686, "response": ", je vais faire une tâche de base. Je vais essayer..." } ] }, { "name": "Qwen3-0.6B", "avg_score": 6.0, "avg_time": 6.404076337814331, "efficiency": 0.936903260283084, "tests": [ { "test": "Commande simple", "score": 7, "time": 6.516923427581787, "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." }, { "test": "Action avec paramètres", "score": 7, "time": 6.65591287612915, "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." }, { "test": "Vitesse de réponse", "score": 4, "time": 6.039392709732056, "response": ", but not too much. The user is asking for a respo..." } ] }, { "name": "Gemma-3-1B", "avg_score": 4.0, "avg_time": 6.960511525472005, "efficiency": 0.5746704082540475, "tests": [ { "test": "Commande simple", "score": 7, "time": 7.20223069190979, "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." }, { "test": "Action avec paramètres", "score": 5, "time": 6.998988628387451, "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." }, { "test": "Vitesse de réponse", "score": 0, "time": 6.680315256118774, "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." } ] }, { "name": "Gemma-3-270M", "avg_score": 4.666666666666667, "avg_time": 3.6990818977355957, "efficiency": 1.2615743029434903, "tests": [ { "test": "Commande simple", "score": 5, "time": 3.697866201400757, "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.690243721008301, "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." }, { "test": "Vitesse de réponse", "score": 4, "time": 3.7091357707977295, "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." } ] } ], "ranking_by_score": [ { "name": "Qwen3-0.6B", "avg_score": 6.0, "avg_time": 6.404076337814331, "efficiency": 0.936903260283084, "tests": [ { "test": "Commande simple", "score": 7, "time": 6.516923427581787, "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." }, { "test": "Action avec paramètres", "score": 7, "time": 6.65591287612915, "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." }, { "test": "Vitesse de réponse", "score": 4, "time": 6.039392709732056, "response": ", but not too much. The user is asking for a respo..." } ] }, { "name": "Gemma-3-270M", "avg_score": 4.666666666666667, "avg_time": 3.6990818977355957, "efficiency": 1.2615743029434903, "tests": [ { "test": "Commande simple", "score": 5, "time": 3.697866201400757, "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.690243721008301, "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." }, { "test": "Vitesse de réponse", "score": 4, "time": 3.7091357707977295, "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." } ] }, { "name": "Qwen2.5-0.5B", "avg_score": 4.0, "avg_time": 3.52385942141215, "efficiency": 1.1351190617011169, "tests": [ { "test": "Commande simple", "score": 7, "time": 3.418940305709839, "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.8486745357513428, "response": "La commande \"move_units\" est utilisée pour déplace..." }, { "test": "Vitesse de réponse", "score": 0, "time": 3.3039634227752686, "response": ", je vais faire une tâche de base. Je vais essayer..." } ] }, { "name": "Gemma-3-1B", "avg_score": 4.0, "avg_time": 6.960511525472005, "efficiency": 0.5746704082540475, "tests": [ { "test": "Commande simple", "score": 7, "time": 7.20223069190979, "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." }, { "test": "Action avec paramètres", "score": 5, "time": 6.998988628387451, "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." }, { "test": "Vitesse de réponse", "score": 0, "time": 6.680315256118774, "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." } ] } ], "ranking_by_efficiency": [ { "name": "Gemma-3-270M", "avg_score": 4.666666666666667, "avg_time": 3.6990818977355957, "efficiency": 1.2615743029434903, "tests": [ { "test": "Commande simple", "score": 5, "time": 3.697866201400757, "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.690243721008301, "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." }, { "test": "Vitesse de réponse", "score": 4, "time": 3.7091357707977295, "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." } ] }, { "name": "Qwen2.5-0.5B", "avg_score": 4.0, "avg_time": 3.52385942141215, "efficiency": 1.1351190617011169, "tests": [ { "test": "Commande simple", "score": 7, "time": 3.418940305709839, "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.8486745357513428, "response": "La commande \"move_units\" est utilisée pour déplace..." }, { "test": "Vitesse de réponse", "score": 0, "time": 3.3039634227752686, "response": ", je vais faire une tâche de base. Je vais essayer..." } ] }, { "name": "Qwen3-0.6B", "avg_score": 6.0, "avg_time": 6.404076337814331, "efficiency": 0.936903260283084, "tests": [ { "test": "Commande simple", "score": 7, "time": 6.516923427581787, "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." }, { "test": "Action avec paramètres", "score": 7, "time": 6.65591287612915, "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." }, { "test": "Vitesse de réponse", "score": 4, "time": 6.039392709732056, "response": ", but not too much. The user is asking for a respo..." } ] }, { "name": "Gemma-3-1B", "avg_score": 4.0, "avg_time": 6.960511525472005, "efficiency": 0.5746704082540475, "tests": [ { "test": "Commande simple", "score": 7, "time": 7.20223069190979, "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." }, { "test": "Action avec paramètres", "score": 5, "time": 6.998988628387451, "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." }, { "test": "Vitesse de réponse", "score": 0, "time": 6.680315256118774, "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." } ] } ], "best_overall": { "name": "Qwen3-0.6B", "avg_score": 6.0, "avg_time": 6.404076337814331, "efficiency": 0.936903260283084, "tests": [ { "test": "Commande simple", "score": 7, "time": 6.516923427581787, "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." }, { "test": "Action avec paramètres", "score": 7, "time": 6.65591287612915, "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." }, { "test": "Vitesse de réponse", "score": 4, "time": 6.039392709732056, "response": ", but not too much. The user is asking for a respo..." } ] }, "most_efficient": { "name": "Gemma-3-270M", "avg_score": 4.666666666666667, "avg_time": 3.6990818977355957, "efficiency": 1.2615743029434903, "tests": [ { "test": "Commande simple", "score": 5, "time": 3.697866201400757, "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." }, { "test": "Action avec paramètres", "score": 5, "time": 3.690243721008301, "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." }, { "test": "Vitesse de réponse", "score": 4, "time": 3.7091357707977295, "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." } ] } }