@@ -14,7 +14,7 @@ import sys
1414import time
1515import urllib .parse
1616from html .parser import HTMLParser
17- from typing import Any , Dict , List , Optional , Tuple
17+ from typing import Any , Dict , List
1818
1919import feedparser
2020from typing_extensions import override
@@ -187,6 +187,11 @@ def elide_subject(subject: str) -> str:
187187 return subject
188188
189189
190+ def get_entry_time (entry : Any ) -> tuple [float , bool ]:
191+ entry_time = entry .get ("published_parsed" , entry .get ("updated_parsed" ))
192+ return (calendar .timegm (entry_time ), True ) if entry_time else (float ("-inf" ), False )
193+
194+
190195def send_zulip (entry : Any , feed_name : str ) -> Dict [str , Any ]:
191196 body : str = entry .summary
192197 if opts .unwrap :
@@ -237,17 +242,13 @@ for feed_url in feed_urls:
237242 new_hashes : List [str ] = []
238243 data = feedparser .parse (feed_url )
239244 feed_name : str = data .feed .title or feed_url
245+ # Safeguard to not process older entries in unordered feeds
246+ entry_threshold = time .time () - opts .earliest_entry_age * 60 * 60 * 24
240247
241248 for entry in data .entries :
242249 entry_hash = compute_entry_hash (entry )
243- # An entry has either been published or updated.
244- entry_time : Optional [Tuple [int , int ]] = entry .get (
245- "published_parsed" , entry .get ("updated_parsed" )
246- )
247- if (
248- entry_time is not None
249- and time .time () - calendar .timegm (entry_time ) > opts .earliest_entry_age * 60 * 60 * 24
250- ):
250+ entry_time , is_time_tagged = get_entry_time (entry )
251+ if (is_time_tagged and entry_time < entry_threshold ) or entry_hash in old_feed_hashes :
251252 # As a safeguard against misbehaving feeds, don't try to process
252253 # entries older than some threshold.
253254 continue
0 commit comments