-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathconfig.yml
More file actions
50 lines (47 loc) · 2.27 KB
/
config.yml
File metadata and controls
50 lines (47 loc) · 2.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# The list of repositories to be harvested. The name will be used in logging/tracing and to name the resulting file.
# The url denotes the so-called sitemap of the repository (at best it's really a sitemap).
# The sitemap specifies the parser for the sitemap url above. Currently two are available:
# - xml (for sitemaps conforming to the xml standard defined a https://sitemaps.org)
# - openagrar (the sitemap conists of a list of datasets from which the ID needs to be extracted and appended to
# the base url for downloading datasets)
# metadata denotes the metadata parser. Currently two are available:
# - embedded_jsonld (for schema.org JSON-LD metadata embedded into HTML)
# - jsonld (for standalone schema.org JSON-LD files)
sitemaps:
- name: bonares
url: https://maps.bonares.de/finder/resources/googleds/sitemap.xml
sitemap: xml
metadata: embedded_jsonld
- name: edal
url: https://doi.ipk-gatersleben.de/sitemap.xml
sitemap: xml
metadata: embedded_jsonld
- name: openagrar_native
url: "https://www.openagrar.de/servlets/solr/select?core=main&q=category.top%3A%22mir_genres%3Aresearch_data%22+AND+objectType%3Amods+AND+category.top%3A%22state%3Apublished%22&rows=1000&fl=id%2Cmods.identifier&wt=json&XSL.Style=xml"
sitemap: openagrar
metadata: embedded_jsonld
- name: publisso_native
url: "https://frl.publisso.de/find?q=contentType:researchData&from=0&until=200&format=json"
sitemap: publisso
# - name: thunen_atlas
# url: "https://atlas.thuenen.de/api/v2/resources?page_size=200&format=json"
# sitemap: thunen_atlas
# Configures the HTTP client uses to download sitemaps and datasets
http_client:
connection_limit: 20
receive_timeout: 120
connect_timeout: 60
# Configures the git client
# Note on paths: relative paths refer to the current working directory.
# You can omit the ssh_key_path if you have a local git setup with your personal key.
# If branch is omitted, the main branch is used
git:
repo_url: https://github.com/fairagro/middleware_repo.git
branch: main
local_path: output
user_name: "FAIRagro middleware"
user_email: "middleware@fairagro.net"
# If you operate an opentelemetry endpoint you can configure it here
opentelemetry: {}
# endpoint:
# insecure: true # set to true if you do not use TLS