-
-
Notifications
You must be signed in to change notification settings - Fork 108
Expand file tree
/
Copy pathfetch_news.rake
More file actions
124 lines (105 loc) · 4.2 KB
/
fetch_news.rake
File metadata and controls
124 lines (105 loc) · 4.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
require 'rss'
require 'net/http'
require 'uri'
require 'yaml'
require 'time'
require 'active_support/broadcast_logger'
def safe_open(url)
uri = URI.parse(url)
return File.read(url) if uri.scheme.nil? || uri.scheme == 'file'
raise "不正なURLです: #{url}" unless uri.is_a?(URI::HTTP) || uri.is_a?(URI::HTTPS)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
request = Net::HTTP::Get.new(uri)
response = http.request(request)
response.body
end
end
namespace :news do
desc 'RSS フィードから最新ニュースを取得し、db/news.yml に書き出す'
task fetch: :environment do
# ロガー設定(ファイル+コンソール出力)
file_logger = ActiveSupport::Logger.new('log/news.log')
console = ActiveSupport::Logger.new(STDOUT)
logger = ActiveSupport::BroadcastLogger.new(file_logger, console)
logger.info('==== START news:fetch ====')
# YAML出力先を環境変数で上書きできるようにする
yaml_path = ENV['NEWS_YAML_PATH'] ? Pathname.new(ENV['NEWS_YAML_PATH']) : Rails.root.join('db', 'news.yml')
feed_urls = ENV['NEWS_RSS_PATH'] ? [ENV['NEWS_RSS_PATH']] :
(Rails.env.test? || Rails.env.staging? ?
[Rails.root.join('spec', 'fixtures', 'sample_news.rss').to_s] :
['https://news.coderdojo.jp/feed/'])
existing_news = if File.exist?(yaml_path)
YAML.safe_load(File.read(yaml_path), permitted_classes: [Time], aliases: true)['news'] || []
else
[]
end
# RSS 取得&パース
new_items = feed_urls.flat_map do |url|
logger.info("Fetching RSS → #{url}")
begin
rss = safe_open(url)
feed = RSS::Parser.parse(rss, false)
feed.items.map do |item|
{
'url' => item.link,
'title' => item.title,
'published_at' => item.pubDate.to_s
}
end
rescue => e
logger.warn("⚠️ Failed to fetch #{url}: #{e.message}")
[]
end
end
# 既存データをハッシュに変換(URL をキーに)
existing_items_hash = existing_news.index_by { |item| item['url'] }
# 新しいアイテムと既存アイテムを分離
truly_new_items = []
updated_items = []
new_items.each do |new_item|
if existing_items_hash.key?(new_item['url'])
# 既存アイテムの更新
existing_item = existing_items_hash[new_item['url']]
updated_item = existing_item.merge(new_item) # 新しい情報で更新
updated_items << updated_item
else
# 完全に新しいアイテム
truly_new_items << new_item
end
end
# 既存の最大IDを取得
max_existing_id = existing_news.map { |item| item['id'].to_i }.max || 0
# 新しいアイテムのみに ID を割り当て(古い順)
truly_new_items_sorted = truly_new_items.sort_by { |item|
Time.parse(item['published_at'])
}
truly_new_items_sorted.each_with_index do |item, index|
item['id'] = max_existing_id + index + 1
end
# 更新されなかった既存アイテムを取得
updated_urls = updated_items.map { |item| item['url'] }
unchanged_items = existing_news.reject { |item| updated_urls.include?(item['url']) }
# 全アイテムをマージ
all_items = unchanged_items + updated_items + truly_new_items_sorted
# 日付降順ソート
sorted_items = all_items.sort_by { |item|
Time.parse(item['published_at'])
}.reverse
sorted_items.each_with_index do |item, index|
item['id'] = index + 1
end
File.open(yaml_path, 'w') do |f|
formatted_items = sorted_items.map do |item|
{
'id' => item['id'],
'url' => item['url'],
'title' => item['title'],
'published_at' => item['published_at']
}
end
f.write({ 'news' => formatted_items }.to_yaml)
end
logger.info("✅ Wrote #{sorted_items.size} items to #{yaml_path} (#{truly_new_items_sorted.size} new, #{updated_items.size} updated)")
logger.info('==== END news:fetch ====')
end
end