Skip to content

Commit 51ef0df

Browse files
authored
feat(route): add 国际能源网 (in-en.com) news route (#21669)
* feat(route): add 国际能源网 (in-en.com) news route Add RSS route for in-en.com (国际能源网), a major Chinese energy news portal. Supported channels via /in-en/news/:type: - solar — 光伏太阳能 - wind — 风电 - chuneng — 储能 - h2 — 氢能 - chd — 充换电 - newenergy — 新能源综合 - power — 电力 - huanbao — 环保 Features: fetches article list from each subdomain's news page, then retrieves full article body via detail page (cached). Handles Chinese relative timestamps (X分钟/小时/天前). * fix(in-en): add maintainer, fix timezone double-shift, translate comments to English * fix(in-en): correct news paths for h2/chd/huanbao, drop date-only pubDate override * fix(in-en): sort imports, fix numeric separators, add category field, use trueUA * fix(in-en): address review comments - Remove config.trueUA, use default UA - Remove custom parseRelTime, use parseDate directly - Fix title text extraction without cloning element - Remove unnecessary .first() on pubDate - Fix author: handle both em.ly and plain-text source via .prompt > span - Simplify category field (remove redundant length check) - Fix description selector: .article-body → #article - Fix detail author selector: .article-meta .source a → p.source a - Remove try-catch around detail fetch * fix(in-en): fix author including category text and handle relative pubDate formats * fix(in-en): use parseRelativeDate, simplify selectors, drop redundant fallbacks - Replace hand-rolled relative-date logic with parseRelativeDate (covers "刚刚 / N分钟前 / N小时前 / 今天 HH:MM / 昨天 HH:MM / YYYY-MM-DD"). Fixes Invalid Date on the first few entries and resolves the two oxlint findings (no-useless-undefined, numeric-separators-style). - Drop the `.first()` on `.listTxt .prompt i` — each `ul.infoList > li` carries exactly one `<i>` (date), confirmed against https://solar.in-en.com/news/. - Author selector: `.prompt > span:first-of-type`, then strip leading "来源:". Cheerio's `.text()` flattens `<em class="ly">…</em>` into the same string, so this matches both shapes shown on https://solar.in-en.com/news/ (plain text vs. wrapped in em.ly). - Category selector: `.prompt > span:not(:first-of-type) em a`, scoped to the second span so author text never leaks in. - Drop `|| undefined` on author and `?? ''` on description (redundant — DataItem already accepts undefined). - Add an HTML-shape comment pointing to the live sample URL so future reviewers can verify the layout assumptions. * fix(in-en): drop dead default + use new-media category - Hono treats `:type` as compulsory (path is `/news/:type`), so users without it never reach the handler — the `?? 'solar'` fallback was unreachable. Replace with a non-null assertion to keep the type narrow. - categories: 'traditional-media' → 'new-media' per review suggestion.
1 parent 63157fb commit 51ef0df

2 files changed

Lines changed: 123 additions & 0 deletions

File tree

lib/routes/in-en/index.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import { load } from 'cheerio';
2+
3+
import type { DataItem, Route } from '@/types';
4+
import cache from '@/utils/cache';
5+
import ofetch from '@/utils/ofetch';
6+
import { parseRelativeDate } from '@/utils/parse-date';
7+
8+
// Subdomain config: name = channel display name, newsPath = news list path
9+
const CATEGORIES: Record<string, { name: string; newsPath: string }> = {
10+
solar: { name: '光伏太阳能', newsPath: '/news/' },
11+
wind: { name: '风电', newsPath: '/windnews/' },
12+
chuneng: { name: '储能', newsPath: '/news/' },
13+
h2: { name: '氢能', newsPath: '/hydrogen/' },
14+
chd: { name: '充换电', newsPath: '/ChargingStation/' },
15+
newenergy: { name: '新能源', newsPath: '/news/' },
16+
power: { name: '电力', newsPath: '/news/' },
17+
huanbao: { name: '环保', newsPath: '/policy/' },
18+
};
19+
20+
export const route: Route = {
21+
path: '/news/:type',
22+
categories: ['new-media'],
23+
example: '/in-en/news/solar',
24+
parameters: {
25+
type: 'Channel type, see table below',
26+
},
27+
features: {
28+
requireConfig: false,
29+
requirePuppeteer: false,
30+
antiCrawler: false,
31+
supportBT: false,
32+
supportPodcast: false,
33+
supportScihub: false,
34+
},
35+
name: '新闻',
36+
maintainers: ['Harviewang'],
37+
description: `| 频道 | type 参数 |
38+
| --- | --- |
39+
| 光伏太阳能 | solar |
40+
| 风电 | wind |
41+
| 储能 | chuneng |
42+
| 氢能 | h2 |
43+
| 充换电 | chd |
44+
| 新能源综合 | newenergy |
45+
| 电力 | power |
46+
| 环保 | huanbao |`,
47+
48+
async handler(ctx) {
49+
const type = ctx.req.param('type')!;
50+
const cat = CATEGORIES[type];
51+
if (!cat) {
52+
throw new Error(`Unknown channel type: ${type}. Valid values: ${Object.keys(CATEGORIES).join(', ')}`);
53+
}
54+
55+
const baseUrl = `https://${type}.in-en.com`;
56+
const listUrl = `${baseUrl}${cat.newsPath}`;
57+
58+
const html = await ofetch(listUrl);
59+
const $ = load(html);
60+
61+
// Each `ul.infoList > li` carries a single `.prompt` block with this fixed shape:
62+
// <i>{relative or absolute date}</i>
63+
// <span>来源:{author or <em class="ly">{author}</em>}</span>
64+
// <span><em>{<a>{category}</a>}+</em></span>
65+
// See https://solar.in-en.com/news/ for live samples.
66+
const list: DataItem[] = $('ul.infoList > li')
67+
.toArray()
68+
.map((el) => {
69+
const $el = $(el);
70+
const $a = $el.find('.listTxt h5 a');
71+
const link = $a.attr('href') ?? '';
72+
const title = $a.attr('title')?.trim() || $a.text().trim();
73+
74+
const pubDateRaw = $el.find('.listTxt .prompt > i').text().trim();
75+
const author = $el.find('.listTxt .prompt > span').first().text().replace('来源:', '').trim();
76+
const category = $el
77+
.find('.listTxt .prompt > span:not(:first-of-type) em a')
78+
.toArray()
79+
.map((a) => $(a).text().trim())
80+
.filter(Boolean);
81+
82+
return {
83+
title,
84+
link,
85+
author,
86+
category,
87+
pubDate: pubDateRaw ? parseRelativeDate(pubDateRaw) : undefined,
88+
} as DataItem;
89+
})
90+
.filter((item) => Boolean(item.title && item.link));
91+
92+
const items = await Promise.all(
93+
list.map((item) =>
94+
cache.tryGet(item.link!, async () => {
95+
const detail = await ofetch(item.link!);
96+
const $d = load(detail);
97+
98+
item.description = $d('#article').html() ?? undefined;
99+
100+
const detailAuthor = $d('p.source a').text().trim();
101+
if (detailAuthor) {
102+
item.author = detailAuthor;
103+
}
104+
105+
return item;
106+
})
107+
)
108+
);
109+
110+
return {
111+
title: `国际能源网 · ${cat.name}`,
112+
link: listUrl,
113+
item: items as DataItem[],
114+
};
115+
},
116+
};

lib/routes/in-en/namespace.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import type { Namespace } from '@/types';
2+
3+
export const namespace: Namespace = {
4+
name: '国际能源网',
5+
url: 'www.in-en.com',
6+
lang: 'zh-CN',
7+
};

0 commit comments

Comments
 (0)