| 49021 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-150step |
HuggingFace |
1.30 |
unrated |
| 49022 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-15step |
HuggingFace |
1.30 |
unrated |
| 49023 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-30step |
HuggingFace |
1.30 |
unrated |
| 49024 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-45step |
HuggingFace |
1.30 |
unrated |
| 49025 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-60step |
HuggingFace |
1.30 |
unrated |
| 49026 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-75step |
HuggingFace |
1.30 |
unrated |
| 49027 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-nothink-90step |
HuggingFace |
1.30 |
unrated |
| 49028 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-105step |
HuggingFace |
1.30 |
unrated |
| 49029 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-120step |
HuggingFace |
1.30 |
unrated |
| 49030 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-135step |
HuggingFace |
1.30 |
unrated |
| 49031 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-150step |
HuggingFace |
1.30 |
unrated |
| 49032 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-15step |
HuggingFace |
1.30 |
unrated |
| 49033 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-30step |
HuggingFace |
1.30 |
unrated |
| 49034 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-45step |
HuggingFace |
1.30 |
unrated |
| 49035 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-60step |
HuggingFace |
1.30 |
unrated |
| 49036 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-75step |
HuggingFace |
1.30 |
unrated |
| 49037 |
verl_agent_alfworld-GRPO-from-webshop-20step-v2-Qwen2.5-7B-Instruct-only16-think-90step |
HuggingFace |
1.30 |
unrated |
| 49038 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-105step |
HuggingFace |
1.30 |
unrated |
| 49039 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-120step |
HuggingFace |
1.30 |
unrated |
| 49040 |
verl_agent-alfworld-GRPO-kl0.01-from-sft-step100-Llama-3.1-8B-Instruct-nothink-135step |
HuggingFace |
1.30 |
unrated |