| 49061 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-150step |
HuggingFace |
1.30 |
unrated |
| 49062 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-15step |
HuggingFace |
1.30 |
unrated |
| 49063 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-30step |
HuggingFace |
1.30 |
unrated |
| 49064 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-45step |
HuggingFace |
1.30 |
unrated |
| 49065 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-60step |
HuggingFace |
1.30 |
unrated |
| 49066 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-75step |
HuggingFace |
1.30 |
unrated |
| 49067 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info400-90step |
HuggingFace |
1.30 |
unrated |
| 49068 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-105step |
HuggingFace |
1.30 |
unrated |
| 49069 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-120step |
HuggingFace |
1.30 |
unrated |
| 49070 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-135step |
HuggingFace |
1.30 |
unrated |
| 49071 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-150step |
HuggingFace |
1.30 |
unrated |
| 49072 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-15step |
HuggingFace |
1.30 |
unrated |
| 49073 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-30step |
HuggingFace |
1.30 |
unrated |
| 49074 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-45step |
HuggingFace |
1.30 |
unrated |
| 49075 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-60step |
HuggingFace |
1.30 |
unrated |
| 49076 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-75step |
HuggingFace |
1.30 |
unrated |
| 49077 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Llama-3.1-8B-Instruct-info5-90step |
HuggingFace |
1.30 |
unrated |
| 49078 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-105step |
HuggingFace |
1.30 |
unrated |
| 49079 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-120step |
HuggingFace |
1.30 |
unrated |
| 49080 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-135step |
HuggingFace |
1.30 |
unrated |