-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweb-rss.ts
More file actions
98 lines (86 loc) · 3.33 KB
/
Copy pathweb-rss.ts
File metadata and controls
98 lines (86 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import { createCache, memoryStore } from "cache-manager";
import Parser from "rss-parser";
import { ParsedEntry } from "./parsed-entry";
export default class WebRss {
private parser = new Parser({
timeout: 9e3, // 9 seconds
headers: { "User-Agent": "Axobot feedparser" },
cache: createCache(memoryStore({
max: 1_000,
ttl: 60 * 60 * 1000, // 1 hour
})),
});
private imageUrlRegex = new RegExp(
[
"(http(s?):)",
"([/|.\\w\\s-])*",
"\\.(?:jpe?g|gif|png|webp)",
].join(""),
"i"
);
private imageAltRegex = new RegExp(/<img\b[^>]*?(?:title="([^"]+)"|alt="([^"]+)")[^>]*?>/, "i");
async getLastPost(url: string): Promise<ParsedEntry | undefined> {
const feed = await this.getFeed(url);
if (!feed) {
return undefined;
}
const entry = feed.items[0];
return this.parseEntry(feed, entry, url);
}
/**
* Parse an RSS feed from a given URL
* @returns The parsed feed
*/
private async getFeed(url: string): Promise<Parser.Output<unknown> | null> {
let feed: Parser.Output<unknown>;
try {
feed = await this.parser.parseURL(url);
} catch (err) {
const sanitizedUrl = url.replace(/[\n\r]/g, "").slice(0, 100);
console.warn(`Error while fetching RSS feed from ${sanitizedUrl}: ${err}`);
return null;
}
if (!feed || feed.items.length === 0) {
console.debug(!feed ? "No feed found" : "No items found in feed");
return null;
}
feed.items = await this.filterPinnedPosts(feed.items);
return feed;
}
/**
* Remove pinned posts from a feed entries
*/
private async filterPinnedPosts(entries: Parser.Item[]) {
while (entries.length >= 2 && (!entries[0].isoDate || !entries[1].isoDate || entries[0].isoDate < entries[1].isoDate)) {
entries.shift();
}
return entries;
}
private parseEntry(feed: Parser.Output<unknown>, entry: Parser.Item, feedUrl: string): ParsedEntry {
return {
url: entry.link || feed.link || feedUrl,
title: entry.title || "?",
pubDate: entry.isoDate!,
entryId: entry.id || entry.guid || entry.title || null,
author: entry.creator || entry.author || feed.title || null,
channel: feed.title || null,
image: entry.mediaThumbnail?.url || this.extractImageFromEnclosure(entry) || this.extractFirstImageFromContent(entry.content || "") || null,
imageAlt: this.extractFirstImageAltFromContent(entry.content || "") || null,
postText: entry.contentSnippet || null,
postDescription: entry.summary || null,
};
}
private extractImageFromEnclosure(entry: Parser.Item): string | null {
if (entry.enclosure?.type?.startsWith("image/")) {
return entry.enclosure.url;
}
return null;
}
private extractFirstImageFromContent(content: string): string | null {
return content.match(this.imageUrlRegex)?.[0] || null;
}
private extractFirstImageAltFromContent(content: string): string | null {
const match = content.match(this.imageAltRegex);
return match?.[1] || match?.[2] || null;
}
}