Skip to content

Commit 96528b1

Browse files
committed
added known list of modules to use for fuzzy matching
1 parent 0709173 commit 96528b1

1 file changed

Lines changed: 270 additions & 0 deletions

File tree

mypy/known_modules.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
"""Known Python module names for fuzzy matching import suggestions.
2+
3+
This module provides a curated list of popular Python package import names
4+
for suggesting corrections when a user mistypes an import statement.
5+
6+
Sources:
7+
- Python standard library (typeshed/stdlib/VERSIONS)
8+
- Top 200 PyPI packages by downloads (https://github.com/hugovk/top-pypi-packages)
9+
10+
Note: These are import names, not PyPI package names.
11+
"""
12+
13+
from __future__ import annotations
14+
15+
from typing import Final
16+
17+
from mypy.modulefinder import StdlibVersions
18+
19+
POPULAR_THIRD_PARTY_MODULES: Final[frozenset[str]] = frozenset({
20+
# Cloud
21+
"boto3",
22+
"botocore",
23+
"aiobotocore",
24+
"s3transfer",
25+
"s3fs",
26+
"awscli",
27+
28+
# HTTP / Networking
29+
"urllib3",
30+
"requests",
31+
"certifi",
32+
"idna",
33+
"charset_normalizer",
34+
"httpx",
35+
"httpcore",
36+
"aiohttp",
37+
"yarl",
38+
"multidict",
39+
"requests_oauthlib",
40+
"oauthlib",
41+
"websocket",
42+
"websockets",
43+
"h11",
44+
"sniffio",
45+
"requests_toolbelt",
46+
"httplib2",
47+
48+
# Typing / Extensions
49+
"typing_extensions",
50+
"mypy_extensions",
51+
"annotated_types",
52+
"typing_inspection",
53+
54+
# Core Utilities
55+
"setuptools",
56+
"packaging",
57+
"pip",
58+
"wheel",
59+
"virtualenv",
60+
"platformdirs",
61+
"filelock",
62+
"zipp",
63+
"importlib_metadata",
64+
"importlib_resources",
65+
"distlib",
66+
"distro",
67+
"appdirs",
68+
69+
# Data Science / Numerical
70+
"numpy",
71+
"pandas",
72+
"scipy",
73+
"sklearn",
74+
"matplotlib",
75+
"pyarrow",
76+
"networkx",
77+
"joblib",
78+
"threadpoolctl",
79+
"kiwisolver",
80+
"fontTools",
81+
"dill",
82+
"cloudpickle",
83+
84+
# Serialization / Config
85+
"yaml",
86+
"pydantic",
87+
"pydantic_core",
88+
"pydantic_settings",
89+
"attrs",
90+
"tomli",
91+
"tomlkit",
92+
"jsonschema",
93+
"jsonschema_specifications",
94+
"jsonpointer",
95+
"jmespath",
96+
"msgpack",
97+
"isodate",
98+
"ruamel",
99+
100+
# Cryptography / Security
101+
"cryptography",
102+
"cffi",
103+
"pycparser",
104+
"rsa",
105+
"pyjwt",
106+
"jwt",
107+
"pyasn1",
108+
"pyasn1_modules",
109+
"OpenSSL",
110+
"nacl",
111+
"bcrypt",
112+
"asn1crypto",
113+
"paramiko",
114+
"secretstorage",
115+
"msal",
116+
"msal_extensions",
117+
"keyring",
118+
119+
# Date / Time
120+
"dateutil",
121+
"pytz",
122+
"tzdata",
123+
"tzlocal",
124+
125+
# Google
126+
"google",
127+
"google_auth_oauthlib",
128+
"google_auth_httplib2",
129+
"google_crc32c",
130+
"googleapiclient",
131+
"grpc",
132+
"grpc_status",
133+
"grpc_tools",
134+
"protobuf",
135+
"proto",
136+
"googleapis_common_protos",
137+
138+
# Testing
139+
"pytest",
140+
"pluggy",
141+
"iniconfig",
142+
"coverage",
143+
"exceptiongroup",
144+
145+
# CLI / Terminal
146+
"click",
147+
"typer",
148+
"colorama",
149+
"rich",
150+
"tqdm",
151+
"tabulate",
152+
"prompt_toolkit",
153+
"shellingham",
154+
"wcwidth",
155+
156+
# Web Frameworks
157+
"flask",
158+
"werkzeug",
159+
"itsdangerous",
160+
"blinker",
161+
"fastapi",
162+
"starlette",
163+
"uvicorn",
164+
165+
# Templates / Markup
166+
"jinja2",
167+
"markupsafe",
168+
"pygments",
169+
"markdown_it",
170+
"mdurl",
171+
"docutils",
172+
173+
# Async
174+
"anyio",
175+
"greenlet",
176+
"aiosignal",
177+
"aiohappyeyeballs",
178+
"async_timeout",
179+
180+
# Database
181+
"sqlalchemy",
182+
"alembic",
183+
"redis",
184+
"psycopg2",
185+
186+
# Parsing / XML
187+
"lxml",
188+
"bs4",
189+
"soupsieve",
190+
"pyparsing",
191+
"regex",
192+
"et_xmlfile",
193+
194+
# OpenTelemetry
195+
"opentelemetry",
196+
197+
# Azure
198+
"azure",
199+
200+
# Other Popular Modules
201+
"six",
202+
"fsspec",
203+
"wrapt",
204+
"propcache",
205+
"rpds",
206+
"pathspec",
207+
"PIL",
208+
"pillow",
209+
"psutil",
210+
"referencing",
211+
"trove_classifiers",
212+
"openpyxl",
213+
"tenacity",
214+
"more_itertools",
215+
"sortedcontainers",
216+
"decorator",
217+
"ptyprocess",
218+
"pexpect",
219+
"hatchling",
220+
"dotenv",
221+
"python_dotenv",
222+
"huggingface_hub",
223+
"transformers",
224+
"openai",
225+
"langsmith",
226+
"dns",
227+
"dnspython",
228+
"git",
229+
"gitdb",
230+
"smmap",
231+
"deprecated",
232+
"chardet",
233+
"backoff",
234+
"ruff",
235+
"setuptools_scm",
236+
"pyproject_hooks",
237+
"jiter",
238+
"yandexcloud",
239+
"aliyunsdkcore",
240+
"uritemplate",
241+
"kubernetes",
242+
"snowflake",
243+
"multipart",
244+
})
245+
246+
247+
def get_stdlib_modules(
248+
stdlib_versions: StdlibVersions,
249+
python_version: tuple[int, int] | None = None,
250+
) -> frozenset[str]:
251+
modules: set[str] = set()
252+
for module, (min_ver, max_ver) in stdlib_versions.items():
253+
if python_version is not None:
254+
if python_version < min_ver:
255+
continue
256+
if max_ver is not None and python_version > max_ver:
257+
continue
258+
top_level = module.split(".")[0]
259+
modules.add(top_level)
260+
return frozenset(modules)
261+
262+
263+
def get_known_modules(
264+
stdlib_versions: StdlibVersions | None = None,
265+
python_version: tuple[int, int] | None = None,
266+
) -> frozenset[str]:
267+
modules: set[str] = set(POPULAR_THIRD_PARTY_MODULES)
268+
if stdlib_versions is not None:
269+
modules = modules.union(get_stdlib_modules(stdlib_versions, python_version))
270+
return frozenset(modules)

0 commit comments

Comments
 (0)