Fsoft-AIC/RepoExec
收藏数据集概述
RepoExec 是一个新颖的基准测试,旨在以可执行性和正确性为重点,在仓库级别评估代码生成。该基准测试强调现实世界的适用性,并提供对代码功能的全面评估,旨在为更可靠和适用的 CodeLLMs 铺平道路。
支持的任务
RepoExec 支持仓库级别的代码生成,重点关注可执行性、测试用例的正确性以及跨文件依赖的上下文使用。
语言
目前,RepoExec 支持 Python 仓库。
数据集结构
数据实例
json { "id": 0, "project": "test-apps/python-string-utils", "module": "string_utils.manipulation", "entry_point": "reverse", "solution": "def reverse(input_string: str) -> str: """ Returns the string with its chars reversed.
*Example:*
>>> reverse(hello) # returns olleh
:param input_string: String to revert.
:type input_string: str
:return: Reversed string.
"""
if not is_string(input_string):
raise InvalidInputError(input_string)
return input_string[::-1]",
"prompt": "import base64
import random import unicodedata import zlib from typing import Union from uuid import uuid4 from ._regex import * from .errors import InvalidInputError from .validation import is_snake_case, is_full_string, is_camel_case, is_integer, is_string
class InvalidInputError(TypeError): """ Custom error raised when received object is not a string as expected. """
def __init__(self, input_data: Any):
"""
:param input_data: Any received object
"""
type_name = type(input_data).__name__
msg = Expected "str", received "{}".format(type_name)
super().__init__(msg)
def is_string(obj: Any) -> bool: """ Checks if an object is a string.
*Example:*
>>> is_string(foo) # returns true
>>> is_string(bfoo) # returns false
:param obj: Object to test.
:return: True if string, false otherwise.
"""
return isinstance(obj, str)
def reverse(input_string: str) -> str: """ Returns the string with its chars reversed.
*Example:*
>>> reverse(hello) # returns olleh
:param input_string: String to revert.
:type input_string: str
:return: Reversed string.
"""
", "target_function_prompt": "def reverse(input_string: str) -> str: """ Returns the string with its chars reversed.
*Example:*
>>> reverse(hello) # returns olleh
:param input_string: String to revert.
:type input_string: str
:return: Reversed string.
"""
", "function_signature": "def reverse(input_string: str) -> str:", "docstring": " Returns the string with its chars reversed.
Example:
reverse(hello) # returns olleh
:param input_string: String to revert. :type input_string: str :return: Reversed string. ", "original_docstring": """" Returns the string with its chars reversed.
Example:
reverse(hello) # returns olleh
:param input_string: String to revert. :type input_string: str :return: Reversed string. """", "docstring_tokens": [ "Returns", "the", "string", "with", "its", "chars", "reversed", ".", "", "Example", ":", "", ">>>", "reverse", "(", "", "hello", "", ")", "#", "returns", "", "olleh", "", ":", "param", "input_string", ":", "String", "to", "revert", ".", ":", "type", "input_string", ":", "str", ":", "return", ":", "Reversed", "string", "." ], "cross_context": true, "isContained": false, "raw_solution": "def reverse(input_string: str) -> str: """ Returns the string with its chars reversed.
*Example:*
>>> reverse(hello) # returns olleh
:param input_string: String to revert.
:type input_string: str
:return: Reversed string.
"""
if not is_string(input_string):
raise InvalidInputError(input_string)
return input_string[::-1]",
"check": "
import sys sys.path.insert(1, "/input/test-apps/python-string-utils") import unittest, pytest import math import random import re import copy import datetime import itertools import collections import heapq import statistics import functools import hashlib import numpy import numpy as np import string from typing import * from collections import * import pickle import timeout_decorator
all = [ camel_case_to_snake, snake_case_to_camel, reverse, shuffle, strip_html, prettify, asciify, slugify, booleanize, strip_margin, compress, decompress, roman_encode, roman_decode, ]
import base64 import random import unicodedata import zlib from typing import Union from uuid import uuid4
from string_utils._regex import * from string_utils.errors import InvalidInputError from string_utils.validation import is_snake_case, is_full_string, is_camel_case, is_integer, is_string
class __RomanNumbers: # internal rule mappings for encode() __mappings = [ # units {1: I, 5: V}, # tens {1: X, 5: L}, # hundreds {1: C, 5: D}, # thousands {1: M}, ]
# swap key/value definitions for decode()
__reversed_mappings = [{v: k for k, v in m.items()} for m in __mappings]
@classmethod
def __encode_digit(cls, index: int, value: int) -> str:
# if digit is zero, there is no sign to display
if value == 0:
return
# from 1 to 3 we have just to repeat the sign N times (eg: III, XXX...)
if value <= 3:
return cls.__mappings[index][1] * value
# if 4 we have to add unit prefix
if value == 4:
return cls.__mappings[index][1] + cls.__mappings[index][5]
# if is 5, is a straight map
if value == 5:
return cls.__mappings[index][5]
# if 6, 7 or 8 we have to append unit suffixes
if value <= 8:
suffix = cls.__mappings[index][1] * (value - 5)
return cls.__mappings[index][5] + suffix
# if 9 we have to prepend current unit to next
return cls.__mappings[index][1] + cls.__mappings[index + 1][1]
@classmethod
def encode(cls, input_number: Union[str, int]) -> str:
# force input conversion to a string (we need it in order to iterate on each digit)
input_string = str(input_number)
if not is_integer(input_string):
raise ValueError(Invalid input, only strings or integers are allowed)
value = int(input_string)
if value < 1 or value > 3999:
raise ValueError(Input must be >= 1 and <= 3999)
input_len = len(input_string)
output =
# decode digits from right to left (start from units to thousands)
for index in range(input_len):
# get actual digit value as int
digit = int(input_string[input_len - index - 1])
# encode digit to roman string
encoded_digit = cls.__encode_digit(index, digit)
# prepend encoded value to the current output in order to have the final string sorted
# from thousands to units
output = encoded_digit + output
return output
@classmethod
def __index_for_sign(cls, sign: str) -> int:
for index, mapping in enumerate(cls.__reversed_mappings):
if sign in mapping:
return index
raise ValueError(Invalid token found: "{}".format(sign))
@classmethod
def decode(cls, input_string: str) -> int:
if not is_full_string(input_string):
raise ValueError(Input must be a non empty string)
# reverse the provided string so that we can start parsing from units to thousands
reversed_string = reverse(input_string.upper())
# track last used value
last_value = None
# computed number to return
output = 0
# for each sign in the string we get its numeric value and add or subtract it to the computed output
for sign in reversed_string:
# are we dealing with units, tens, hundreds or thousands?
index = cls.__index_for_sign(sign)
# its basically 1 or 5 (based on mapping rules definitions)
key_value = cls.__reversed_mappings[index][sign]
# Based on the level (tens, hundreds...) we have to add as many zeroes as the level into which we are
# in order to have the actual sign value.
# For instance, if we are at level 2 we are dealing with hundreds, therefore instead of 1 or 5, we will
# obtain 100 or 500 by adding 2 zeroes
sign_value = int(str(key_value) + 0 * index)
# increase total value if we are moving on with level
if last_value is None or sign_value >= last_value:
output += sign_value
# Decrease value if we are back to a previous level
# For instance, if we are parsing "IX", we first encounter "X" which is ten then "I" which is unit,
# So we have to do the following operation in order to get 9 (the final result): 10 - 1
else:
output -= sign_value
last_value = sign_value
return output
class __StringCompressor:
@staticmethod
def __require_valid_input_and_encoding(input_string: str, encoding: str):
if not is_string(input_string):
raise InvalidInputError(input_string)
if len(input_string) == 0:
raise ValueError(Input string cannot be empty)
if not is_string(encoding):
raise ValueError(Invalid encoding)
@classmethod
def compress(cls, input_string: str, encoding: str = utf-8, compression_level: int = 9) -> str:
cls.__require_valid_input_and_encoding(input_string, encoding)
if not isinstance(compression_level, int) or compression_level < 0 or compression_level > 9:
raise ValueError(Invalid compression_level: it must be an "int" between 0 and 9)
# turns input string into a sequence of bytes using provided encoding
original_bytes = input_string.encode(encoding)
# compress bytes using zlib library
compressed_bytes = zlib.compress(original_bytes, compression_level)
# encode compressed bytes using base64
# (this ensure that all characters will be available and that the output string can be used safely in any
# context such URLs)
encoded_bytes = base64.urlsafe_b64encode(compressed_bytes)
# finally turns base64 bytes into a string
output = encoded_bytes.decode(encoding)
return output
@classmethod
def decompress(cls, input_string: str, encoding: str = utf-8) -> str:
cls.__require_valid_input_and_encoding(input_string, encoding)
# turns input string into a sequence of bytes
# (the string is assumed to be a previously compressed string, therefore we have to decode it using base64)
input_bytes = base64.urlsafe_b64decode(input_string)
# decompress bytes using zlib
decompressed_bytes = zlib.decompress(input_bytes)
# decode the decompressed bytes to get the original string back
original_string = decompressed_bytes.decode(encoding)
return original_string
class __StringFormatter: def init(self, input_string): if not is_string(input_string): raise InvalidInputError(input_string)
self.input_string = input_string
def __uppercase_first_char(self, regex_match):
return regex_match.group(0).upper()
def __remove_duplicates(self, regex_match):
return regex_match.group(1)[0]
def __uppercase_first_letter_after_sign(self, regex_match):
match = regex_match.group(1)
return match[:-1] + match[2].upper()
def __ensure_right_space_only(self, regex_match):
return regex_match.group(1).strip() +
def __ensure_left_space_only(self, regex_match):
return + regex_match.group(1).strip()
def __ensure_spaces_around(self, regex_match):
return + regex_match.group(1).strip() +
def __remove_internal_spaces(self, regex_match):
return regex_match.group(1).strip()
def __fix_saxon_genitive(self, regex_match):
return regex_match.group(1).replace( , ) +
# generates a placeholder to inject temporary into the string, it will be replaced with the original
# value at the end of the process
@staticmethod
def __placeholder_key():
return $ + uuid4().hex + $
def format(self) -> str:
# map of temporary placeholders
placeholders = {}
out = self.input_string
# looks for url or email and updates placeholders map with found values
placeholders.update({self.__placeholder_key(): m[0] for m in URLS_RE.findall(out)})
placeholders.update({self.__placeholder_key(): m for m in EMAILS_RE.findall(out)})
# replace original value with the placeholder key
for p in



