-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathget_libris.py
More file actions
117 lines (84 loc) · 3.41 KB
/
get_libris.py
File metadata and controls
117 lines (84 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from dataclasses import dataclass
import urllib.request
import json
import logging
from urllib.parse import urlparse
logger = logging.getLogger(__name__)
@dataclass
class LibrisItem:
librisID: str
title: str
yearPublished: str = ""
ISBN: str = ""
publisher: str = ""
creator: str = ""
samlaID: str = ""
relation: str = ""
# page: str = ""
searchURLs = {'http://libris.kb.se/xsearch?query=ZSER:(Sveriges%20kyrkor)&format=json&n=200',
'http://libris.kb.se/xsearch?query=ZSER:(Sveriges%20kyrkor)&format=json&n=200&start=201',
'http://libris.kb.se/xsearch?query=ZSER:(Sveriges%20kyrkor)&format=json&n=200&start=401',
'http://libris.kb.se/xsearch?query=ZSER:(Sveriges%20kyrkor)&format=json&n=200&start=601'}
# Python has bad support for overload --> a dirty solution below
def get_isbn(item):
return item.get('isbn', '')
def get_creator(item):
return item.get('creator', '')
def get_publisher(item):
return item.get('publisher', '')
def get_year_published(item):
return item.get('date', '')
def get_relation(item):
return item.get('relation', '')
def in_samla(item):
"""check url for pattern, LIBRIS miss something published by and mediatype?!?!?"""
for libris_url in item["free"]:
if str(libris_url).lower().find("raa/samla"):
return True
return False
def samla_id(item):
"""extract RAÄ samla id from URL"""
for libris_url in item["free"]:
try:
samlaurl = urlparse(libris_url)
if "raa/samla/html" in samlaurl.path:
return samlaurl.path.lower().replace('/raa/samla/html/', '')
except:
logger.warning("libris_url : %s", item)
return
def get_libris_identifier(item):
return item["identifier"].replace("http://libris.kb.se/bib/", "")
def clean_libris_title(item):
""" returns the LIBRIS title
- strip media type [Elektronisk resurs]
:rtype: str"""
cleanTitle = str(item["title"]).replace(" [Elektronisk resurs]", "")
return cleanTitle
def get_LIBRIS_svenska_kyrka(libris_svenska_kyrkan):
""" loops LIBRIS search URLs to get data related to RAÄ Samla
"""
for search in searchURLs:
logger.info("Search: %s", search)
with urllib.request.urlopen(search) as url:
data = json.loads(url.read().decode())
for item in data["xsearch"]["list"]:
try:
if 'free' not in item:
raise ValueError("No Free resource")
if in_samla(item):
libris_svenska_kyrkan.append(
LibrisItem(librisID=get_libris_identifier(item),
title=clean_libris_title(item),
yearPublished=get_year_published(item),
ISBN=get_isbn(item),
publisher=get_publisher(item),
creator=get_creator(item),
samlaID=samla_id(item),
relation=get_relation(item)
))
except ValueError:
logger.info("No 'Free' in LIBRIS item : %s", item)
pass
finally:
pass
return libris_svenska_kyrkan