| 49081 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-20step-v2-Qwen2.5-7B-Instruct-150step |
HuggingFace |
1.30 |
unrated |
| 49082 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-105step |
HuggingFace |
1.30 |
unrated |
| 49083 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-120step |
HuggingFace |
1.30 |
unrated |
| 49084 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-135step |
HuggingFace |
1.30 |
unrated |
| 49085 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-150step |
HuggingFace |
1.30 |
unrated |
| 49086 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-15step |
HuggingFace |
1.30 |
unrated |
| 49087 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-30step |
HuggingFace |
1.30 |
unrated |
| 49088 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-45step |
HuggingFace |
1.30 |
unrated |
| 49089 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-60step |
HuggingFace |
1.30 |
unrated |
| 49090 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-75step |
HuggingFace |
1.30 |
unrated |
| 49091 |
verl_agent_alfworld-GRPO-kl-0.01-from-webshop-40step-v2-Llama-3.2-3B-Instruct-90step |
HuggingFace |
1.30 |
unrated |
| 49092 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 49093 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 49094 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-135step |
HuggingFace |
1.30 |
unrated |
| 49095 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 49096 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 49097 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 49098 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-45step |
HuggingFace |
1.30 |
unrated |
| 49099 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 49100 |
verl-agent-sciworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-75step |
HuggingFace |
1.30 |
unrated |