Skip to content

Commit 4e5dc72

Browse files
authored
Update run_locate.py
1 parent ccefefc commit 4e5dc72

1 file changed

Lines changed: 1 addition & 29 deletions

File tree

sweagent/run/run_locate.py

Lines changed: 1 addition & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -95,18 +95,6 @@ def filter_instances_by_ids(instances: list[BatchInstance], target_ids: set[str]
9595
return filtered_instances
9696

9797
def deduplicate_patches(sample_paths: list[Path], output_file: Path, logger, min_ratio: float = 0.5) -> dict:
98-
"""
99-
从多个sample的patch文件中去重
100-
去重规则:
101-
- 如果file_path相同且current_code的最长连续匹配行数 >= 较短代码长度的min_ratio,认为是重复
102-
Args:
103-
sample_paths: 样本目录列表
104-
output_file: 输出的jsonl文件路径
105-
logger: 日志记录器
106-
min_ratio: 最小匹配比例(默认0.5,即二分之一)
107-
Returns:
108-
去重统计信息
109-
"""
11098
all_modifications = []
11199
unique_modifications = []
112100
total_count = 0
@@ -185,14 +173,6 @@ def are_modifications_similar(mod1: dict, mod2: dict, min_ratio: float = 0.5) ->
185173
return is_similar, details
186174

187175
def find_longest_consecutive_match(code1: str, code2: str) -> tuple[int, int]:
188-
"""
189-
找到两段代码中最长的连续匹配行数
190-
Args:
191-
code1: 第一段代码
192-
code2: 第二段代码
193-
Returns:
194-
(最长连续匹配的行数, code1的总行数)
195-
"""
196176
lines1 = get_code_lines(code1)
197177
lines2 = get_code_lines(code2)
198178
if not lines1 or not lines2:
@@ -209,14 +189,6 @@ def get_code_lines(code: str) -> list[str]:
209189
return [line.strip() for line in code.strip().split('\n') if line.strip()]
210190

211191
def deduplicate_patches_by_first_path_and_count(sample_paths: list[Path], output_file: Path, logger, min_unique: int = 2) -> dict:
212-
"""
213-
直接从 sample1-sampleN 读取 .patch,按 (第一个 file_path, files_to_modify 数量) 去重。
214-
仅当去重后 unique >= min_unique 才写出 output_file,否则跳过不生成文件。
215-
输出 jsonl,每行:
216-
{"files_to_modify": [...], "source_sample": "xxx-sampleK"}
217-
Returns:
218-
统计信息 dict
219-
"""
220192
all_patches = []
221193
total_read = 0
222194
invalid_count = 0
@@ -1074,4 +1046,4 @@ def run_from_cli(args: list[str] | None = None):
10741046
run_from_config(BasicCLI(RunBatchConfig, help_text=help_text).get_config(args))
10751047

10761048
if __name__ == "__main__":
1077-
run_from_cli()
1049+
run_from_cli()

0 commit comments

Comments
 (0)