@@ -226,8 +226,6 @@ client: zulip.Client = zulip.Client(
226226 client = "ZulipRSS/" + VERSION ,
227227)
228228
229- first_message = True
230-
231229for feed_url in feed_urls :
232230 feed_hashes_file = os .path .join (
233231 opts .data_dir , urllib .parse .urlparse (feed_url ).netloc
@@ -239,7 +237,7 @@ for feed_url in feed_urls:
239237 except OSError :
240238 old_feed_hashes = {}
241239
242- new_hashes : List [str ] = []
240+ unhashed_entries : List [tuple [ Any , str , float ] ] = []
243241 data = feedparser .parse (feed_url )
244242 feed_name : str = data .feed .title or feed_url
245243 # Safeguard to not process older entries in unordered feeds
@@ -249,32 +247,32 @@ for feed_url in feed_urls:
249247 entry_hash = compute_entry_hash (entry )
250248 entry_time , is_time_tagged = get_entry_time (entry )
251249 if (is_time_tagged and entry_time < entry_threshold ) or entry_hash in old_feed_hashes :
252- # As a safeguard against misbehaving feeds, don't try to process
253- # entries older than some threshold.
254250 continue
255- if entry_hash in old_feed_hashes :
256- # We've already seen this. No need to process any older entries.
257- break
258- if not old_feed_hashes and len (new_hashes ) >= opts .max_batch_size :
259- # On a first run, pick up the n (= opts.max_batch_size) most recent entries.
260- # An RSS feed has entries in reverse chronological order.
261- break
262-
263- response : Dict [str , Any ] = send_zulip (entry , feed_name )
264- if response ["result" ] != "success" :
265- logger .error ("Error processing %s" , feed_url )
266- logger .error ("%s" , response )
267- if first_message :
268- # This is probably some fundamental problem like the stream not
269- # existing or something being misconfigured, so bail instead of
270- # getting the same error for every RSS entry.
271- log_error_and_exit ("Failed to process first message" )
272- # Go ahead and move on -- perhaps this entry is corrupt.
273- new_hashes .append (entry_hash )
274- first_message = False
251+ unhashed_entries .append ((entry , entry_hash , entry_time ))
275252
276- with open ( feed_hashes_file , "a" ) as f :
277- for hash in new_hashes :
278- f . write ( hash + " \n " )
253+ # We process all entries to support unordered feeds,
254+ # but post only the latest ones in chronological order.
255+ sorted_entries = sorted ( unhashed_entries , key = lambda x : x [ 2 ])[ - opts . max_batch_size :]
279256
280- logger .info ("Sent zulips for %d %s entries" , len (new_hashes ), feed_url )
257+ with open (feed_hashes_file , "a" ) as f :
258+ for entry_tuple in sorted_entries :
259+ entry , entry_hash , _ = entry_tuple
260+
261+ response : Dict [str , Any ] = send_zulip (entry , feed_name )
262+ if response ["result" ] != "success" :
263+ logger .error ("Error processing %s" , feed_url )
264+ logger .error ("%s" , response )
265+ if not old_feed_hashes and entry_tuple == sorted_entries [0 ]:
266+ # This is probably some fundamental problem like the stream not
267+ # existing or something being misconfigured, so bail instead of
268+ # getting the same error for every RSS entry.
269+ log_error_and_exit ("Failed to process first message" )
270+ # Go ahead and move on -- perhaps this entry is corrupt.
271+ f .write (entry_hash + "\n " )
272+
273+ logger .info (
274+ "Processed %d entries from %s and sent %d zulips" ,
275+ len (unhashed_entries ),
276+ feed_url ,
277+ len (sorted_entries ),
278+ )
0 commit comments