Skip to content

Commit 4fb2b51

Browse files
authored
typing improvements (#57)
1 parent f0f773c commit 4fb2b51

2 files changed

Lines changed: 21 additions & 6 deletions

File tree

src/metadata_parser/__init__.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,7 @@ def get_metadatas(
692692
field: str,
693693
strategy: "TYPES_STRATEGY" = None,
694694
encoder: Optional["TYPE_ENCODER"] = None,
695-
) -> Optional[Dict[str, Union[Dict, List]]]:
695+
) -> Optional[Dict[str, List[str]]]:
696696
"""
697697
looks for the field in various stores. defaults to the core
698698
strategy, though you may specify a certain item. if you search for
@@ -727,6 +727,17 @@ def encode(value: Union[str, dict], store:Optional[str]=None) -> str:
727727
simplify dc elements into just the 'content' text, not dict
728728
:type encoder:
729729
bool
730+
731+
EXAMPLE:
732+
733+
metadataParserPage.parsed_result.get_metadatas("title",strategy=None)
734+
>>> {'page': ['Title: Destination'],
735+
>>> 'og': ['meta.property=og:title'],
736+
>>> 'twitter': ['meta.name=twitter:title']}
737+
738+
metadataParserPage.parsed_result.get_metadatas("title",strategy=["og"])
739+
>>> {'og': ['meta.property=og:title']}
740+
730741
"""
731742
# normalize a strategy into a list of valid options.
732743
strategy = self._coerce_validate_strategy(strategy)
@@ -737,7 +748,7 @@ def encode(value: Union[str, dict], store:Optional[str]=None) -> str:
737748
elif encoder == "raw":
738749
encoder = None
739750

740-
def _lookup(store: str) -> Optional[List]:
751+
def _lookup(store: str) -> Optional[List[str]]:
741752
if field in self.metadata[store]:
742753
val = self.metadata[store][field]
743754
if not isinstance(val, list):
@@ -748,7 +759,7 @@ def _lookup(store: str) -> Optional[List]:
748759
return None
749760

750761
# returns List or None
751-
rval: Dict = {}
762+
rval: Dict[str, List[str]] = {}
752763
for store in strategy:
753764
if store in self.metadata:
754765
val = _lookup(store)
@@ -1833,7 +1844,9 @@ def get_url_canonical(
18331844
candidates = [c for c in candidates if c] if candidates else []
18341845
if not candidates:
18351846
return None
1836-
canonical = candidates[0]
1847+
canonical: Optional[str] = candidates[0]
1848+
if TYPE_CHECKING:
1849+
assert canonical
18371850

18381851
# does the canonical have valid characters?
18391852
# some websites, even BIG PROFESSIONAL ONES, will put html in here.
@@ -1911,7 +1924,9 @@ def get_url_opengraph(
19111924
candidates = [c for c in candidates if c] if candidates else []
19121925
if not candidates:
19131926
return None
1914-
og = candidates[0]
1927+
og: Optional[str] = candidates[0]
1928+
if TYPE_CHECKING:
1929+
assert og
19151930

19161931
# does the og have valid characters?
19171932
# some websites, even BIG PROFESSIONAL ONES, will put html in here.

tests/test_document_parsing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -444,7 +444,7 @@ class _TestDocumentParsingCore:
444444

445445
def _MakeOne(self, filename):
446446
"""lazy cache of files as needed"""
447-
global CACHED_FILESYSTEM_DOCUMENTS
447+
# global CACHED_FILESYSTEM_DOCUMENTS
448448
if filename not in CACHED_FILESYSTEM_DOCUMENTS:
449449
CACHED_FILESYSTEM_DOCUMENTS[filename] = open(
450450
os.path.join(_examples_dir, filename)

0 commit comments

Comments
 (0)