| 49041 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 49042 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 49043 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 49044 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-45step |
HuggingFace |
1.30 |
unrated |
| 49045 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 49046 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-75step |
HuggingFace |
1.30 |
unrated |
| 49047 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 49048 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-105step |
HuggingFace |
1.30 |
unrated |
| 49049 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-120step |
HuggingFace |
1.30 |
unrated |
| 49050 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-135step |
HuggingFace |
1.30 |
unrated |
| 49051 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-150step |
HuggingFace |
1.30 |
unrated |
| 49052 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-15step |
HuggingFace |
1.30 |
unrated |
| 49053 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-30step |
HuggingFace |
1.30 |
unrated |
| 49054 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-45step |
HuggingFace |
1.30 |
unrated |
| 49055 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-60step |
HuggingFace |
1.30 |
unrated |
| 49056 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-75step |
HuggingFace |
1.30 |
unrated |
| 49057 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-90step |
HuggingFace |
1.30 |
unrated |
| 49058 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-105step |
HuggingFace |
1.30 |
unrated |
| 49059 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-120step |
HuggingFace |
1.30 |
unrated |
| 49060 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-135step |
HuggingFace |
1.30 |
unrated |