Spaces:
Sleeping
Sleeping
Update README.md
Browse files
README.md
CHANGED
|
@@ -43,7 +43,7 @@ The Code Eval metric calculates how good are predictions given a set of referenc
|
|
| 43 |
|
| 44 |
`predictions`: a list of candidates to evaluate. Each candidate should be a list of strings with several code candidates to solve the problem.
|
| 45 |
|
| 46 |
-
`references`: a list of
|
| 47 |
|
| 48 |
`k`: number of code candidates to consider in the evaluation. The default value is `[1, 10, 100]`.
|
| 49 |
|
|
@@ -54,7 +54,7 @@ The Code Eval metric calculates how good are predictions given a set of referenc
|
|
| 54 |
```python
|
| 55 |
from evaluate import load
|
| 56 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 57 |
-
references = [
|
| 58 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
| 59 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 60 |
```
|
|
@@ -86,7 +86,7 @@ Full match at `k=1`:
|
|
| 86 |
```python
|
| 87 |
from evaluate import load
|
| 88 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 89 |
-
references = [
|
| 90 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
| 91 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 92 |
print(pass_at_k)
|
|
@@ -98,7 +98,7 @@ No match for k = 1:
|
|
| 98 |
```python
|
| 99 |
from evaluate import load
|
| 100 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 101 |
-
references = [
|
| 102 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
| 103 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 104 |
print(pass_at_k)
|
|
@@ -110,7 +110,7 @@ Partial match at k=1, full match at k=2:
|
|
| 110 |
```python
|
| 111 |
from evaluate import load
|
| 112 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 113 |
-
references = [
|
| 114 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))", "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
| 115 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 116 |
print(pass_at_k)
|
|
|
|
| 43 |
|
| 44 |
`predictions`: a list of candidates to evaluate. Each candidate should be a list of strings with several code candidates to solve the problem.
|
| 45 |
|
| 46 |
+
`references`: a list of Dict of [str, str], each dict has two keys "input" and "reference_output". e.g. [{"input": "1 2", "reference_output": "3"}]
|
| 47 |
|
| 48 |
`k`: number of code candidates to consider in the evaluation. The default value is `[1, 10, 100]`.
|
| 49 |
|
|
|
|
| 54 |
```python
|
| 55 |
from evaluate import load
|
| 56 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 57 |
+
references = [{"input":"2 3", "reference_output":"5})]
|
| 58 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
| 59 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 60 |
```
|
|
|
|
| 86 |
```python
|
| 87 |
from evaluate import load
|
| 88 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 89 |
+
references = [{"input":"2 3", "reference_output":"5"}]
|
| 90 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))"]]
|
| 91 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 92 |
print(pass_at_k)
|
|
|
|
| 98 |
```python
|
| 99 |
from evaluate import load
|
| 100 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 101 |
+
references = [{"input":"2 3", "reference_output":"5"}]
|
| 102 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
| 103 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 104 |
print(pass_at_k)
|
|
|
|
| 110 |
```python
|
| 111 |
from evaluate import load
|
| 112 |
code_eval_stdio = load("hage2000/code_eval_stdio")
|
| 113 |
+
references = [{"input":, "reference_output":"5"}]
|
| 114 |
candidates = [[ "nums = list(map(int, input().split()))\nprint(sum(nums))", "nums = list(map(int, input().split()))\nprint(nums[0]*nums[1])"]]
|
| 115 |
pass_at_k, results = code_eval_stdio.compute(references=references, predictions=candidates, k=[1, 2])
|
| 116 |
print(pass_at_k)
|