gpt-4-turbo-2024-04-09+cot
gpt-4o+cot
gpt-4-0613+cot
claude-3-opus-20240229+cot
llama3-405-cot
gpt-4-0613
gpt-4-turbo-2024-04-09
llama3-405
gpt-4o
claude-3-opus-20240229
gpt-3.5-turbo-0613+cot
codellama-34b+cot
codetulu-2-34b
gpt-3.5-turbo-0613
codellama-13b+cot
codellama-34b
phind
deepseek-base-33b
deepseek-instruct-33b
codellama-python-34b
wizard-34b
codellama-13b
deepseek-base-6.7b
magicoder-ds-7b
codellama-7b+cot
codellama-python-13b
mixtral-8x7b
deepseek-instruct-6.7b
codellama-python-7b
wizard-13b
codellama-7b
mistral-7b
phi-2
starcoderbase-16b
starcoderbase-7b
deepseek-base-1.3b
deepseek-instruct-1.3b
phi-1.5
phi-1
CRUXEval-input/112
CRUXEval-input/545
CRUXEval-input/391
CRUXEval-input/491
CRUXEval-input/570
CRUXEval-input/640
CRUXEval-input/517
CRUXEval-input/189
CRUXEval-input/399
CRUXEval-input/211
CRUXEval-input/703
CRUXEval-input/672
CRUXEval-input/555
CRUXEval-input/367
CRUXEval-input/776
CRUXEval-input/7
CRUXEval-input/775
CRUXEval-input/114
CRUXEval-input/643
CRUXEval-input/337
CRUXEval-input/71
CRUXEval-input/694
CRUXEval-input/388
CRUXEval-input/357
CRUXEval-input/18
CRUXEval-input/727
CRUXEval-input/35
CRUXEval-input/126
CRUXEval-input/658
CRUXEval-input/471
CRUXEval-input/751
CRUXEval-input/629
CRUXEval-input/593
CRUXEval-input/323
CRUXEval-input/1
CRUXEval-input/659
CRUXEval-input/441
CRUXEval-input/255
CRUXEval-input/0
CRUXEval-input/426
CRUXEval-input/169
CRUXEval-input/524
CRUXEval-input/353
CRUXEval-input/127
CRUXEval-input/589
CRUXEval-input/22
CRUXEval-input/725
CRUXEval-input/397
CRUXEval-input/632
CRUXEval-input/122
CRUXEval-input/605
CRUXEval-input/143
CRUXEval-input/519
CRUXEval-input/781
0
0.2
0.4
0.6
0.8
1
pass1_ex
model
example_id
plotly-logomark