-
Notifications
You must be signed in to change notification settings - Fork 56
Expand file tree
/
Copy pathglob.py
More file actions
149 lines (136 loc) · 5.23 KB
/
glob.py
File metadata and controls
149 lines (136 loc) · 5.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
"""
This is a copy of the function of the same name in the python
standard library. The reason for its inclusion is that it has
been added in python3.13, but not earlier versions
https://github.com/python/cpython/blob/main/Lib/glob.py
https://github.com/python/cpython/blob/main/Lib/fnmatch.py
https://github.com/python/cpython/blob/main/Lib/functools.py
"""
import os
import re
from glob import fnmatch
def translate(pat, *, recursive=False, include_hidden=False, seps=None):
"""Translate a pathname with shell wildcards to a regular expression.
If `recursive` is true, the pattern segment '**' will match any number of
path segments.
If `include_hidden` is true, wildcards can match path segments beginning
with a dot ('.').
If a sequence of separator characters is given to `seps`, they will be
used to split the pattern into segments and match path separators. If not
given, os.path.sep and os.path.altsep (where available) are used.
"""
if not seps:
if os.path.altsep:
seps = (os.path.sep, os.path.altsep)
else:
seps = os.path.sep
escaped_seps = "".join(map(re.escape, seps))
any_sep = f"[{escaped_seps}]" if len(seps) > 1 else escaped_seps
not_sep = f"[^{escaped_seps}]"
if include_hidden:
one_last_segment = f"{not_sep}+"
one_segment = f"{one_last_segment}{any_sep}"
any_segments = f"(?:.+{any_sep})?"
any_last_segments = ".*"
else:
one_last_segment = f"[^{escaped_seps}.]{not_sep}*"
one_segment = f"{one_last_segment}{any_sep}"
any_segments = f"(?:{one_segment})*"
any_last_segments = f"{any_segments}(?:{one_last_segment})?"
results = []
parts = re.split(any_sep, pat)
last_part_idx = len(parts) - 1
for idx, part in enumerate(parts):
if part == "*":
results.append(one_segment if idx < last_part_idx else one_last_segment)
elif recursive and part == "**":
if idx < last_part_idx:
if parts[idx + 1] != "**":
results.append(any_segments)
else:
results.append(any_last_segments)
else:
if part:
if not include_hidden and part[0] in "*?":
results.append(r"(?!\.)")
results.extend(_translate(part, f"{not_sep}*", not_sep)[0])
if idx < last_part_idx:
results.append(any_sep)
res = "".join(results)
return rf"(?s:{res})\Z"
_re_setops_sub = re.compile(r"([&~|])").sub
def _translate(pat, star, question_mark):
res = []
add = res.append
star_indices = []
i, n = 0, len(pat)
while i < n:
c = pat[i]
i = i + 1
if c == "*":
# store the position of the wildcard
star_indices.append(len(res))
add(star)
# compress consecutive `*` into one
while i < n and pat[i] == "*":
i += 1
elif c == "?":
add(question_mark)
elif c == "[":
j = i
if j < n and pat[j] == "!":
j = j + 1
if j < n and pat[j] == "]":
j = j + 1
while j < n and pat[j] != "]":
j = j + 1
if j >= n:
add("\\[")
else:
stuff = pat[i:j]
if "-" not in stuff:
stuff = stuff.replace("\\", r"\\")
else:
chunks = []
k = i + 2 if pat[i] == "!" else i + 1
while True:
k = pat.find("-", k, j)
if k < 0:
break
chunks.append(pat[i:k])
i = k + 1
k = k + 3
chunk = pat[i:j]
if chunk:
chunks.append(chunk)
else:
chunks[-1] += "-"
# Remove empty ranges -- invalid in RE.
for k in range(len(chunks) - 1, 0, -1):
if chunks[k - 1][-1] > chunks[k][0]:
chunks[k - 1] = chunks[k - 1][:-1] + chunks[k][1:]
del chunks[k]
# Escape backslashes and hyphens for set difference (--).
# Hyphens that create ranges shouldn't be escaped.
stuff = "-".join(
s.replace("\\", r"\\").replace("-", r"\-") for s in chunks
)
i = j + 1
if not stuff:
# Empty range: never match.
add("(?!)")
elif stuff == "!":
# Negated empty range: match any character.
add(".")
else:
# Escape set operations (&&, ~~ and ||).
stuff = _re_setops_sub(r"\\\1", stuff)
if stuff[0] == "!":
stuff = "^" + stuff[1:]
elif stuff[0] in ("^", "["):
stuff = "\\" + stuff
add(f"[{stuff}]")
else:
add(re.escape(c))
assert i == n
return res, star_indices