Skip to content

Commit 22aadda

Browse files
committed
cleanup and add more utf8 tld's
1 parent add1cdc commit 22aadda

15 files changed

Lines changed: 402 additions & 874 deletions

.pylintrc

Lines changed: 0 additions & 637 deletions
This file was deleted.

Makefile.pypi

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,18 @@ export SHELL
99
MIN_PYTHON_VERSION := $(shell basename $$( ls /usr/bin/python3.[0-9][0-9] | awk '{print $0; exit}' ) )
1010
export MIN_PYTHON_VERSION
1111

12+
run: pypiTest
13+
1214
# ====================================================
1315
# uploading to pypi an pypiTestUpload
1416
# build a test-mypi and download the image in a venv ane run a test
1517
pypiTest: pypiTestUpload testTestPypi
1618

1719
# this is only the upload now for pypi builders
18-
pypiTestUpload:
19-
./bin/upload_to_pypiTest.sh
20+
pypiTestUpload: # untested ./bin/upload_to_pypiTest.sh
21+
VERSION=$(shell cat ./work/version );
22+
ls -l ./dist/*$(VERSION)*
23+
twine upload -r testpypi dist/*$(VERSION)*
2024

2125
testTestPypi:
2226
./bin/testTestPyPiUpload.sh 2>tmp/$@-2 | tee tmp/$@-1
@@ -25,5 +29,7 @@ testTestPypi:
2529
# needs to be called manually, can be transferred to github action
2630
release: pypi
2731

28-
pypi:
29-
./bin/upload_to_pypi.sh
32+
pypi: # ./bin/upload_to_pypi.sh
33+
VERSION=$(shell cat ./work/version )
34+
ls -l ./dist/*$(VERSION)*
35+
twine upload --verbose dist/*$(VERSION)*

analizer/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ format:
6868
ruff format *.py
6969

7070
check:
71-
ls
71+
ruff check --fix *.py
7272

7373
test:
7474
$(COMMON_VENV) \

analizer/analizeIanaTld.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
#! /usr/bin/env python3
22

3-
"""
4-
Analyze all tld's currently in the iana root db
5-
"""
3+
# Analyze all tld's currently in the iana root db
64

75
import io
86
import re
@@ -11,6 +9,7 @@
119
LRUCache,
1210
Resolver,
1311
)
12+
1413
from ianaCrawler import IanaCrawler
1514
from ianaDatabase import IanaDatabase
1615
from pslGrabber import PslGrabber
@@ -29,7 +28,7 @@ def prepDb(
2928

3029
def prepResolver() -> Resolver:
3130
resolver: Resolver = Resolver()
32-
resolver.cache = LRUCache() # type: ignore
31+
resolver.cache = LRUCache()
3332

3433

3534
def updateAllIanaTldData(

analizer/get_registrars.py

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77

88
import requests
99
from tld import get_tld
10-
from Typing import (
11-
List,
12-
)
1310

1411
FILE_NAME: str = "registrar-ids-1.csv"
1512
FILE_URL: str = f"https://www.iana.org/assignments/registrar-ids/{FILE_NAME}"
@@ -19,16 +16,15 @@ def getFileFromUrl(fileName: str, url: str) -> None:
1916
r = requests.get(
2017
url,
2118
allow_redirects=True,
19+
timeout=300,
2220
)
23-
pathlib.Path(fileName).open("wb").write(
24-
r.content,
25-
)
21+
pathlib.Path(fileName).write_bytes(r.content)
2622
return fileName
2723

2824

2925
def readCsvFile(fileName: str):
30-
result: List[List[str]] = []
31-
with pathlib.Path(fileName).open() as csv_file:
26+
result: list[list[str]] = []
27+
with pathlib.Path(fileName).open("r", encoding="utf8") as csv_file:
3228
csv_reader = csv.reader(csv_file, delimiter=",")
3329

3430
line_count = 0
@@ -46,15 +42,15 @@ def readCsvFile(fileName: str):
4642

4743
def xMain() -> None:
4844
fileName: str = getFileFromUrl(FILE_NAME, FILE_URL)
49-
result: List[List[str]] = readCsvFile(fileName)
45+
result: list[list[str]] = readCsvFile(fileName)
5046

51-
rdapList: List[str] = []
52-
hostList: List[str] = []
53-
tldList: List[str] = []
54-
fldList: List[str] = []
47+
rdapList: list[str] = []
48+
hostList: list[str] = []
49+
tldList: list[str] = []
50+
fldList: list[str] = []
5551

5652
for row in result:
57-
if row[3].strip() == "":
53+
if not row[3].strip():
5854
continue
5955

6056
rdap = row[3]

analizer/ianaCrawler.py

100755100644
Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ def _getPageFromUrlIntoSoupWithRetry(
6060
print(e, file=sys.stderr)
6161
time.sleep(15)
6262

63-
soup = BeautifulSoup(response.text, "html.parser")
64-
return soup
63+
return BeautifulSoup(response.text, "html.parser")
6564

6665
def _getAdditionalItem(
6766
self,
@@ -73,8 +72,8 @@ def _getAdditionalItem(
7372
z: str = f"{what}:"
7473
if z in data[i]:
7574
return data[i].replace(z, "").strip()
76-
except Exception as _:
77-
_ = _
75+
except Exception as e:
76+
_ = e
7877
return None
7978
return None
8079

@@ -105,7 +104,7 @@ def _resolveWhois(
105104
except Exception as e:
106105
print(whois, e, n, file=sys.stderr)
107106
time.sleep(5)
108-
n = n - 1
107+
n -= 1
109108

110109
for a in answer:
111110
s = str(a)
@@ -122,11 +121,14 @@ def extractInfoFromPageSoup(
122121
soup: BeautifulSoup,
123122
tldItem: list[Any],
124123
) -> None:
124+
# URL for registration services: http://www.aaa.com
125+
# WHOIS Server: whois.nic.aaa
126+
# RDAP Server: https://rdap.nic.aaa/
125127
zz = {
126128
"Whois": "WHOIS Server",
129+
"Rdap": "RDAP Server",
127130
"RegistrationUrl": "URL for registration services",
128131
}
129-
130132
for key, val in zz.items():
131133
regDataW: str | None = self._getTldParagraphWithString(soup, val)
132134
if not regDataW:
@@ -136,7 +138,8 @@ def extractInfoFromPageSoup(
136138
regDataW = regDataW.replace(val, key)
137139
regDataA = regDataW.split("\n")
138140
for s in [key]:
139-
tldItem.append(self._getAdditionalItem(s, regDataA))
141+
s = self._getAdditionalItem(s, regDataA)
142+
tldItem.append(s)
140143

141144
def doWhoisServerResolve_DoesItExist(
142145
self,
@@ -174,10 +177,12 @@ def _processOneTableData(self, trs: list[str]) -> list[str]:
174177
try:
175178
link = each.find("a")["href"]
176179
aa = link.split("/")
177-
record.append(aa[-1].replace(".html", ""))
178-
record.append(each.text.strip())
179-
except Exception as _:
180-
_ = _
180+
s = aa[-1].replace(".html", "")
181+
record.append(s)
182+
s = each.text.strip()
183+
record.append(s)
184+
except Exception as e:
185+
_ = f"{e}"
181186
record.append(each.text)
182187
return record
183188

@@ -194,10 +199,7 @@ def _processOneTableRow(self, tr: str) -> None:
194199
self.records.append(record)
195200

196201
def getTldInfoAllFromIanaUrl(self) -> None:
197-
"""
198-
Extract all current defined tld names from the main iana root db page
199-
200-
"""
202+
"""Extract all current defined tld names from the main iana root db page"""
201203
soup = self._getPageFromUrlIntoSoupWithRetry(self._getUrl())
202204
table: Any = soup.find("table") # the first table has the tld data
203205

@@ -217,6 +219,7 @@ def addInfoToAllTld(self) -> None:
217219
self.columns.insert(4, "Whois") # is there a whois server defined
218220
self.columns.insert(5, "RegistrationUrl") # is there a registration url defined
219221
self.columns.insert(6, "DnsResolve-A") # if we have a whois server does it actually resolve to sometething real
222+
self.columns.insert(7, "Rdap") # is there a whois server defined
220223

221224
for tldItem in self.records: # tldItem is a list
222225
rr = self._addInfoToOneTld(tldItem)

analizer/ianaDatabase.py

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ def connectDb(
2727

2828
def testValidConnection(self) -> None:
2929
if self.conn is None:
30-
raise Exception("No valid connection to the database exist")
30+
msg = "No valid connection to the database exist"
31+
raise Exception(msg)
3132

3233
def selectSql(
3334
self,
@@ -38,14 +39,11 @@ def selectSql(
3839
cur: Any = self.conn.cursor()
3940

4041
try:
41-
if data:
42-
result = cur.execute(sql, data)
43-
else:
44-
result = cur.execute(sql)
42+
result = cur.execute(sql, data) if data else cur.execute(sql)
4543

4644
except Exception as e:
4745
print(sql, data, e, file=sys.stderr)
48-
exit(101)
46+
sys.exit(101)
4947
return result, cur
5048

5149
def doSql(
@@ -58,17 +56,14 @@ def doSql(
5856
cur: Any = self.conn.cursor()
5957

6058
try:
61-
if data:
62-
result = cur.execute(sql, data)
63-
else:
64-
result = cur.execute(sql)
59+
result = cur.execute(sql, data) if data else cur.execute(sql)
6560

6661
if withCommit:
6762
self.conn.commit()
6863

6964
except Exception as e:
7065
print(sql, e, file=sys.stderr)
71-
exit(101)
66+
sys.exit(101)
7267
return result
7368

7469
def createTableTld(self) -> None:
@@ -80,11 +75,11 @@ def createTableTld(self) -> None:
8075
TLD_Manager TEXT,
8176
Whois TEXT,
8277
'DnsResolve-A' TEXT,
83-
RegistrationUrl TEXT
78+
RegistrationUrl TEXT,
79+
Rdap TEXT
8480
);
8581
"""
86-
rr = self.doSql(sql)
87-
return rr
82+
return self.doSql(sql)
8883

8984
def createTablePsl(self) -> None:
9085
sql = """
@@ -97,8 +92,7 @@ def createTablePsl(self) -> None:
9792
PRIMARY KEY (Tld, Psl)
9893
);
9994
"""
100-
rr = self.doSql(sql)
101-
return rr
95+
return self.doSql(sql)
10296

10397
def prepData(
10498
self,

analizer/investigateTld.py

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
)
1010

1111
import idna as idna2
12+
1213
from ianaDatabase import IanaDatabase
1314

1415
# the next 2 belong together
@@ -70,7 +71,7 @@ def __init__(
7071

7172
def _normalizeRow(self) -> None:
7273
self.tld = self.row[0].replace("'", "")
73-
self.tld2 = "".join(map(lambda s: s and re.sub(r"[^\w\s]", "", s), self.row[1]))
74+
self.tld2 = "".join(map(lambda s: s and re.sub(r"[^\w\s]", "", s), self.row[1])) # noqa: C417
7475
self.tld3 = self.row[1].replace(".", "").replace("'", "").replace("\u200f", "").replace("\u200e", "")
7576
self.tld4 = self.tld3
7677

@@ -131,10 +132,7 @@ def _skipSpecialResolve(self):
131132
if "whois.centralnicregistry.com." in self.resolve and self._doCentralNic(serverHint, self.thisTld):
132133
return True
133134

134-
if "whois.donuts.co" in self.resolve and self._doCentralNic(serverHint, self.thisTld):
135-
return True
136-
137-
return False
135+
return "whois.donuts.co" in self.resolve and self._doCentralNic(serverHint, self.thisTld)
138136

139137
def _doUtf8Preparations(self):
140138
try:
@@ -144,10 +142,6 @@ def _doUtf8Preparations(self):
144142
return None
145143

146144
self.tld4 = self.tld4.encode("idna").decode()
147-
if self.tld != self.tld2:
148-
if 0 and self.tld2 not in self.ss:
149-
print("# idna", self.tld, self.tld2, self.tld3, self.tld4, self.tld.encode("idna"))
150-
151145
if self.tld != self.tld3:
152146
print(f"#SKIP {self.tld} {self.tld2} {self.tld3}")
153147
return True
@@ -158,10 +152,7 @@ def _skipKnowTld(self):
158152
if self.tld2 == self.tld and self.tld in self.allKnownTldDict:
159153
return True
160154

161-
if self.tld2 in self.allKnownTldDict and self.tld in self.allKnownTldDict:
162-
return True
163-
164-
return False
155+
return self.tld2 in self.allKnownTldDict and self.tld in self.allKnownTldDict
165156

166157
def _doNoManagerTld(self):
167158
if self.manager == "NULL":
@@ -194,14 +185,11 @@ def _doNoWhois(self):
194185
# unclear,
195186
# we have existing ns records indicating some self.tld's actually exist
196187
# but have no whois, lets skip for now
197-
# TODO add ns records
188+
# TO_DO add ns records
198189
if self.tld not in self.allKnownTldDict:
199190
print(f'# ZZ["{self.tld}"] = ' + '{"_privateRegistry": True} # noWhois ')
200191

201-
if self.w == "NULL":
202-
return True
203-
204-
return False
192+
return self.w == "NULL"
205193

206194
def _doCleanuphois(self):
207195
def xx(zz):
@@ -263,9 +251,9 @@ def processRow(
263251
def extractServers(aDict: dict[str, Any]) -> dict[str, Any]:
264252
servers: dict[str, Any] = {}
265253
k = "_server"
266-
for key in aDict:
267-
if k in aDict[key]:
268-
server = aDict[key][k]
254+
for key, value in aDict.items():
255+
if k in value:
256+
server = value[k]
269257
if server not in servers:
270258
servers[server] = []
271259
servers[server].append(key)
@@ -323,7 +311,7 @@ def xMain() -> None:
323311

324312
iad = IanaDatabase(verbose=verbose)
325313
iad.connectDb(dbFileName)
326-
rr, cur = getAllDataTld(iad)
314+
_, cur = getAllDataTld(iad)
327315
for row in cur:
328316
ot = OneTld(tld_regexpr.ZZ, verbose=verbose)
329317
ot.processRow(row, allTld, ss)

analizer/pslGrabber.py

100755100644
Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ def getData(
2929
self,
3030
url: str,
3131
) -> Any:
32-
response = self.Session.get(url)
33-
return response
32+
return self.Session.get(url)
3433

3534
def ColumnsPsl(self) -> list[str]:
3635
return [

0 commit comments

Comments
 (0)