This repository was archived by the owner on May 4, 2025. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathdevdocs_service.py
More file actions
188 lines (140 loc) · 5.55 KB
/
devdocs_service.py
File metadata and controls
188 lines (140 loc) · 5.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
""" DevDocs Module """
import json
import os
import time
import difflib
import re
import requests
DEVDOCS_BASE_URL = 'https://devdocs.io'
DEVDOCS_INDEX_ALL_URL = 'https://devdocs.io/docs/docs.json'
DEVDOCS_DOC_ENTRIES_URL = 'https://devdocs.io/docs/%slug%/index.json'
class DevDocsService():
""" Service that handles everything related with devdocs """
def __init__(self, logger, cache_dir):
""" Constructor method """
self.logger = logger
self.cache_dir = cache_dir
self.index_file = os.path.join(self.cache_dir, "index.json")
self.entries_dir = os.path.join(self.cache_dir, "entries")
self.docs_to_fetch = []
self.ensure_cache_dirs()
@staticmethod
def get_base_url():
""" Returns the DevDocs base url """
return DEVDOCS_BASE_URL
@staticmethod
def get_index_cache_ttl():
""" Returns the cache ttl """
return 86400
def ensure_cache_dirs(self):
""" Creates all the necessary directories and files """
# Creates the main "cache" dir
if not os.path.exists(self.cache_dir):
os.makedirs(self.cache_dir)
# creates the main "index" file if it doesnt exist yet.
if not os.path.exists(self.index_file):
with open(self.index_file, 'w') as f:
json.dump([], f)
# creates the directory to save the entries.
if not os.path.isdir(self.entries_dir):
os.mkdir(self.entries_dir, 0o755)
def set_docs_to_fetch(self, docs):
""" Sets the list of docs that we want to fetch """
if isinstance(docs, str):
try:
docs = json.loads(docs)
except:
docs = []
docs = self.python_version_fallback(docs)
self.docs_to_fetch = docs
def index(self):
""" Fetch Documentation from DevDocs website """
self.logger.info("Start indexing documentation")
# 1. Fetch the available documentation from DevDocs website.
r = requests.get(DEVDOCS_INDEX_ALL_URL)
docs = r.json()
docs = [x for x in docs if x['slug'] in self.docs_to_fetch]
with open(os.path.join(self.index_file), 'w') as f:
json.dump(docs, f)
# For each doc fetch its associated entries.
for doc in docs:
self.fetch_doc_entries(doc['slug'])
time.sleep(0.5)
self.logger.info("Index Finished")
def fetch_doc_entries(self, doc):
""" Fetch all the entries from a single Dcoumentation """
self.logger.info("Fetching Docs for %s " % doc)
url = DEVDOCS_DOC_ENTRIES_URL.replace("%slug%", doc)
r = requests.get(url)
entries = r.json()
with open(os.path.join(self.entries_dir, doc + '.json'), 'w') as f:
json.dump(entries, f)
def get_docs(self, query=None):
""" Returns the list of available documentation """
with open(self.index_file, 'r') as f:
data = json.load(f)
if query:
data = [
x for x in data if query.strip().lower() in x['name'].lower()
]
return data
def get_doc_entries(self, lang_slug, query=""):
""" Returns a list of entries associated with a respective language """
entries_file = os.path.join(self.entries_dir, lang_slug + ".json")
if not os.path.exists(entries_file):
return []
with open(entries_file, 'r') as f:
data = json.load(f)
entries = data['entries']
if query:
entries = [
x for x in entries
if query.strip().lower() in x['name'].lower()
]
# Apply some stuff to get results with more relevance.
# @see https://stackoverflow.com/questions/17903706/how-to-sort-list-of-strings-by-best-match-difflib-ratio
entries = sorted(entries,
key=lambda x: difflib.SequenceMatcher(
None, x['name'], query).ratio(),
reverse=True)
return entries
def get_doc_by_slug(self, doc_slug):
""" Returns the documentation details based on a slug """
with open(self.index_file, 'r') as f:
data = json.load(f)
for doc in data:
if doc_slug == doc['slug']:
return doc
return None
def python_version_fallback(self, docs_to_fetch):
"""
If 'python' is present in docs_to_fetch (without version),
automatically replace it with the highest python~X.Y version from devdocs
"""
r = requests.get(DEVDOCS_INDEX_ALL_URL)
all_docs = r.json()
python_slugs = [d['slug'] for d in all_docs if d['slug'].startswith('python~')]
if not python_slugs:
return docs_to_fetch
version_regex = re.compile(r'python~(\d+\.\d+)')
versions = []
for slug in python_slugs:
match = version_regex.match(slug)
if match:
major_minor_str = match.group(1)
major_str, minor_str = major_minor_str.split('.')
major = int(major_str)
minor = int(minor_str)
versions.append((major, minor))
if not versions:
return docs_to_fetch
versions.sort()
(latest_major, latest_minor) = versions[-1]
latest_python_slug = f"python~{latest_major}.{latest_minor}"
new_docs_to_fetch = []
for slug in docs_to_fetch:
if slug == "python":
new_docs_to_fetch.append(latest_python_slug)
else:
new_docs_to_fetch.append(slug)
return new_docs_to_fetch