gpt-4-turbo-2024-04-09+cot
claude-3-opus-20240229+cot
gpt-4-0613+cot
gpt-4o+cot
llama3-405-cot
gpt-4o
gpt-4-0613
gpt-4-turbo-2024-04-09
claude-3-opus-20240229
llama3-405
gpt-3.5-turbo-0613+cot
deepseek-instruct-33b
gpt-3.5-turbo-0613
deepseek-base-33b
codetulu-2-34b
magicoder-ds-7b
codellama-34b+cot
deepseek-base-6.7b
wizard-34b
codellama-34b
codellama-python-34b
wizard-13b
deepseek-instruct-6.7b
mixtral-8x7b
codellama-python-13b
codellama-13b
phind
codellama-13b+cot
codellama-python-7b
mistral-7b
codellama-7b
starcoderbase-16b
phi-2
starcoderbase-7b
deepseek-base-1.3b
codellama-7b+cot
deepseek-instruct-1.3b
phi-1.5
phi-1
CRUXEval-output/112
CRUXEval-output/340
CRUXEval-output/35
CRUXEval-output/136
CRUXEval-output/158
CRUXEval-output/240
CRUXEval-output/376
CRUXEval-output/159
CRUXEval-output/624
CRUXEval-output/58
CRUXEval-output/303
CRUXEval-output/712
CRUXEval-output/73
CRUXEval-output/598
CRUXEval-output/31
CRUXEval-output/17
CRUXEval-output/754
CRUXEval-output/53
CRUXEval-output/360
CRUXEval-output/50
CRUXEval-output/463
CRUXEval-output/132
CRUXEval-output/734
CRUXEval-output/45
CRUXEval-output/55
CRUXEval-output/583
CRUXEval-output/105
CRUXEval-output/743
CRUXEval-output/536
CRUXEval-output/356
CRUXEval-output/329
CRUXEval-output/508
CRUXEval-output/593
CRUXEval-output/52
CRUXEval-output/615
CRUXEval-output/47
CRUXEval-output/367
CRUXEval-output/16
CRUXEval-output/189
CRUXEval-output/96
CRUXEval-output/641
CRUXEval-output/56
CRUXEval-output/176
CRUXEval-output/63
CRUXEval-output/351
CRUXEval-output/675
CRUXEval-output/187
CRUXEval-output/428
CRUXEval-output/628
CRUXEval-output/116
CRUXEval-output/279
CRUXEval-output/431
CRUXEval-output/579
CRUXEval-output/7
0
0.2
0.4
0.6
0.8
1
pass1_ex
model
example_id
plotly-logomark