Skip to content

Commit 834647f

Browse files
Merge pull request #87 from Erotemic/lint-and-autoformat
Lint and autoformat
2 parents 175a7b2 + 71074d8 commit 834647f

42 files changed

Lines changed: 2911 additions & 2191 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

every_eval_ever/__init__.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@
55
import importlib
66
from typing import Any
77

8-
__all__ = ["eval_types", "instance_level_types"]
8+
__all__ = ['eval_types', 'instance_level_types']
99

1010

1111
def __getattr__(name: str) -> Any:
12-
if name in {"eval_types", "instance_level_types"}:
13-
module = importlib.import_module(f".{name}", __name__)
12+
if name in {'eval_types', 'instance_level_types'}:
13+
module = importlib.import_module(f'.{name}', __name__)
1414
globals()[name] = module
1515
return module
16-
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
16+
raise AttributeError(f'module {__name__!r} has no attribute {name!r}')

every_eval_ever/__main__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from .cli import main
22

3-
if __name__ == "__main__":
3+
if __name__ == '__main__':
44
raise SystemExit(main())

every_eval_ever/check_duplicate_entries.py

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,32 @@
44
import os
55
from typing import Any, Dict, List
66

7-
IGNORE_KEYS = {"retrieved_timestamp", "evaluation_id"}
7+
IGNORE_KEYS = {'retrieved_timestamp', 'evaluation_id'}
88

99

1010
def expand_paths(paths: List[str]) -> List[str]:
1111
"""Expand folders to file paths."""
1212
file_paths: List[str] = []
1313
for path in paths:
14-
if os.path.isfile(path) and path.endswith(".json"):
14+
if os.path.isfile(path) and path.endswith('.json'):
1515
file_paths.append(path)
1616
elif os.path.isdir(path):
1717
for root, _, file_names in os.walk(path):
1818
for file_name in file_names:
19-
if file_name.endswith(".json"):
19+
if file_name.endswith('.json'):
2020
file_paths.append(os.path.join(root, file_name))
2121
else:
22-
raise Exception(f"Could not find file or directory at path: {path}")
22+
raise Exception(f'Could not find file or directory at path: {path}')
2323
return file_paths
2424

2525

2626
def annotate_error(file_path: str, message: str, **kwargs) -> None:
2727
"""If run in GitHub Actions, annotate errors."""
28-
if os.environ.get("GITHUB_ACTION"):
29-
joined_kwargs = "".join(f",{key}={value}" for key, value in kwargs.items())
30-
print(f"::error file={file_path}{joined_kwargs}::{message}")
28+
if os.environ.get('GITHUB_ACTION'):
29+
joined_kwargs = ''.join(
30+
f',{key}={value}' for key, value in kwargs.items()
31+
)
32+
print(f'::error file={file_path}{joined_kwargs}::{message}')
3133

3234

3335
def normalize_list(items: List[Any]) -> List[Any]:
@@ -36,7 +38,7 @@ def normalize_list(items: List[Any]) -> List[Any]:
3638
return sorted(
3739
normalized_items,
3840
key=lambda item: json.dumps(
39-
item, sort_keys=True, separators=(",", ":"), ensure_ascii=True
41+
item, sort_keys=True, separators=(',', ':'), ensure_ascii=True
4042
),
4143
)
4244

@@ -58,80 +60,84 @@ def normalized_hash(payload: Dict[str, Any]) -> str:
5860
encoded = json.dumps(
5961
normalized,
6062
sort_keys=True,
61-
separators=(",", ":"),
63+
separators=(',', ':'),
6264
ensure_ascii=True,
6365
)
64-
return hashlib.sha256(encoded.encode("utf-8")).hexdigest()
66+
return hashlib.sha256(encoded.encode('utf-8')).hexdigest()
6567

6668

6769
def main(argv: List[str] | None = None) -> int:
6870
parser = argparse.ArgumentParser(
69-
prog="check_duplicate_entries",
70-
description="Detects duplicate evaluation entries ignoring scrape timestamp fields.",
71+
prog='check_duplicate_entries',
72+
description='Detects duplicate evaluation entries ignoring scrape timestamp fields.',
7173
)
7274
parser.add_argument(
73-
"paths", nargs="+", type=str, help="File or folder paths to JSON data"
75+
'paths', nargs='+', type=str, help='File or folder paths to JSON data'
7476
)
7577
args = parser.parse_args(argv)
7678

7779
file_paths = expand_paths(args.paths)
7880
print()
79-
print(f"Checking {len(file_paths)} JSON files for duplicates...")
81+
print(f'Checking {len(file_paths)} JSON files for duplicates...')
8082
print()
8183

8284
groups: Dict[str, List[Dict[str, Any]]] = {}
8385
for file_path in file_paths:
8486
try:
85-
with open(file_path, "r") as f:
87+
with open(file_path, 'r') as f:
8688
payload = json.load(f)
8789
except json.JSONDecodeError as e:
88-
message = f"JSONDecodeError: {str(e)}"
90+
message = f'JSONDecodeError: {str(e)}'
8991
annotate_error(
9092
file_path,
9193
message,
92-
title="JSONDecodeError",
94+
title='JSONDecodeError',
9395
col=e.colno,
9496
line=e.lineno,
9597
)
96-
print(f"{file_path}")
97-
print(" " + message)
98+
print(f'{file_path}')
99+
print(' ' + message)
98100
print()
99101
raise
100102

101103
entry_hash = normalized_hash(payload)
102104
groups.setdefault(entry_hash, []).append(
103105
{
104-
"path": file_path,
105-
"evaluation_id": payload.get("evaluation_id"),
106-
"retrieved_timestamp": payload.get("retrieved_timestamp"),
106+
'path': file_path,
107+
'evaluation_id': payload.get('evaluation_id'),
108+
'retrieved_timestamp': payload.get('retrieved_timestamp'),
107109
}
108110
)
109111

110-
duplicate_groups = [entries for entries in groups.values() if len(entries) > 1]
112+
duplicate_groups = [
113+
entries for entries in groups.values() if len(entries) > 1
114+
]
111115
if not duplicate_groups:
112-
print("No duplicates found.")
116+
print('No duplicates found.')
113117
print()
114118
return 0
115119

116-
ignore_label = ", ".join(f"`{key}`" for key in sorted(IGNORE_KEYS))
117-
print(f"Found duplicate entries (ignoring keys: {ignore_label}).")
120+
ignore_label = ', '.join(f'`{key}`' for key in sorted(IGNORE_KEYS))
121+
print(f'Found duplicate entries (ignoring keys: {ignore_label}).')
118122
print()
119123

120124
for index, entries in enumerate(duplicate_groups, start=1):
121-
print(f"Duplicate group {index} ({len(entries)} files):")
125+
print(f'Duplicate group {index} ({len(entries)} files):')
122126
for entry in entries:
123-
print(f" - {entry['path']}")
124-
print(f" evaluation_id: {entry.get('evaluation_id')}")
125-
print(f" retrieved_timestamp: {entry.get('retrieved_timestamp')}")
127+
print(f' - {entry["path"]}')
128+
print(f' evaluation_id: {entry.get("evaluation_id")}')
129+
print(
130+
f' retrieved_timestamp: {entry.get("retrieved_timestamp")}'
131+
)
126132
annotate_error(
127-
entry["path"],
128-
"Duplicate entry detected (ignoring `evaluation_id` and `retrieved_timestamp`).",
129-
title="DuplicateEntry",
133+
entry['path'],
134+
'Duplicate entry detected (ignoring `evaluation_id` and `retrieved_timestamp`).',
135+
title='DuplicateEntry',
130136
)
131137
print()
132138

133139
return 1
134140

135141

136-
if __name__ == "__main__":
142+
if __name__ == '__main__':
137143
raise SystemExit(main())

0 commit comments

Comments
 (0)