-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdataverse.ts
More file actions
80 lines (71 loc) · 2.6 KB
/
dataverse.ts
File metadata and controls
80 lines (71 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import { env } from "../../config";
import {
DataverseDatasetWithDetails,
} from "../schemas/dataverse";
import { CkanResource, PortalJsCloudDataset } from "../schemas/portaljs-cloud";
import { getDatasetDetails, listAllDatasets } from "../lib/dataverse";
import { BaseHarvester, BaseHarvesterConfig } from "./base";
import { Harvester } from ".";
@Harvester
class DataverseHarvester extends BaseHarvester<DataverseDatasetWithDetails> {
constructor(args: BaseHarvesterConfig) {
super(args);
}
async getSourceDatasets(): Promise<DataverseDatasetWithDetails[]> {
const baseItems = await listAllDatasets(this.config.source.url);
const detailedItems = await Promise.all(
baseItems.map(async (ds) => {
const metadata = await getDatasetDetails(
this.config.source.url,
ds.global_id
);
return { ...ds, __details: metadata };
})
);
return detailedItems;
}
mapSourceDatasetToTarget(
ds: DataverseDatasetWithDetails
): PortalJsCloudDataset {
const owner_org = env.PORTALJS_CLOUD_MAIN_ORG;
const extras: PortalJsCloudDataset["extras"] = [];
const resources: CkanResource[] = [];
const version = ds.__details.latestVersion;
const tags: { name: string }[] = ds.keywords?.map( (k)=>({name:k}) )
// Add files as resources
for (const file of version.files || []) {
const df = file.dataFile;
resources.push({
name: df.filename,
url: `${this.config.source.url}/api/access/datafile/${df.id}`,
format: df.filename.split(".").pop()?.toUpperCase() || "FILE",
});
}
// Extras
extras.push({ key: "Source URL", value: ds.url });
extras.push({ key: "Global ID", value: ds.global_id });
extras.push({ key: "Last Harvested At", value: new Date().toISOString() });
extras.push({ key: "Version", value: `${ds.majorVersion}.${ds.minorVersion}` });
extras.push({ key: "Version State", value: ds.versionState });
extras.push({
key: "Version History URL",
value: `${this.config.source.url}/dataset.xhtml?persistentId=${ds.global_id}`,
});
extras.push({ key: "Publisher", value: ds.__details.publisher });
extras.push({ key: "DOI", value: ds.global_id });
return {
owner_org,
name: `${owner_org}--${ds.global_id
.replace(/[^a-z0-9-_]/gi, "-")
.toLowerCase()}`,
title: ds.name,
notes: ds.description || "No description",
author: ds.author,
language: "EN", //dataverse does not natively support full multilingual dataset metadata
resources,
tags,
extras,
};
}
}
export { DataverseHarvester };