11# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
22# See the file 'LICENSE' for copying permission.
33import csv
4+ import hashlib
45import logging
6+ import urllib .parse
57from datetime import datetime , timedelta
68
79import feedparser
810import requests
911from django .conf import settings
1012from django .contrib .postgres .aggregates import ArrayAgg
11- from django .core .cache import cache
13+ from django .core .cache import cache , caches
1214from django .db .models import Count , F , Max , Min , Q , Sum , Value
1315from django .db .models .functions import JSONObject
1416from django .http import HttpResponse , HttpResponseBadRequest , StreamingHttpResponse
1921from api .serializers import FeedsRequestSerializer , parse_feed_types
2022from greedybear .consts import CACHE_KEY_GREEDYBEAR_NEWS , CACHE_TIMEOUT_SECONDS , RSS_FEED_URL
2123from greedybear .enums import IpReputation
22- from greedybear .models import IOC , GeneralHoneypot , Statistics
24+ from greedybear .models import IOC , Honeypot , Statistics
2325from greedybear .utils import is_ip_address , is_valid_domain
2426
2527logger = logging .getLogger (__name__ )
@@ -144,8 +146,8 @@ def get_valid_feed_types() -> frozenset[str]:
144146 Returns:
145147 frozenset[str]: An immutable set of valid feed type strings
146148 """
147- general_honeypots = GeneralHoneypot .objects .filter (active = True )
148- feed_types = ["all" ] + [hp .name .lower () for hp in general_honeypots ]
149+ honeypots = Honeypot .objects .filter (active = True )
150+ feed_types = ["all" ] + [hp .name .lower () for hp in honeypots ]
149151 return frozenset (feed_types )
150152
151153
@@ -226,13 +228,13 @@ def get_queryset(request, feed_params, valid_feed_types, is_aggregated=False, se
226228 if "all" not in feed_params .feed_types :
227229 type_filter = Q ()
228230 for ft in feed_params .feed_types :
229- type_filter |= Q (general_honeypot__name__iexact = ft )
231+ type_filter |= Q (honeypots__name__iexact = ft )
230232 iocs = iocs .filter (type_filter )
231233
232234 # aggregated feeds calculate metrics differently and need all rows to be accurate.
233235 if not is_aggregated :
234- iocs = iocs .filter (general_honeypot__active = True )
235- iocs = iocs .annotate (honeypots = ArrayAgg ("general_honeypot__name " , distinct = True ))
236+ iocs = iocs .filter (honeypots__active = True )
237+ iocs = iocs .annotate (honeypot_names = ArrayAgg ("honeypots__name " , distinct = True ))
236238 # Only annotate tags metadata when the response format needs it (e.g. JSON),
237239 # to avoid unnecessary joins and aggregation work for txt/csv feeds.
238240 if getattr (feed_params , "format" , "" ).lower () == "json" :
@@ -315,7 +317,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
315317 "login_attempts" ,
316318 "recurrence_probability" ,
317319 "expected_interactions" ,
318- "honeypots " , # used to build feed_type; removed from response
320+ "honeypot_names " , # used to build feed_type; removed from response
319321 "destination_ports" , # used to calculate destination_port_count
320322 "attacker_country" ,
321323 "autonomous_system" ,
@@ -344,7 +346,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
344346 else :
345347 iocs_iter = iocs .values (* required_fields ).iterator (chunk_size = 2000 )
346348 for ioc in iocs_iter :
347- ioc_feed_type = [hp .lower () for hp in ioc .get ("honeypots " , []) if hp ]
349+ ioc_feed_type = [hp .lower () for hp in ioc .get ("honeypot_names " , []) if hp ]
348350
349351 data_ = ioc | {
350352 "first_seen" : ioc ["first_seen" ].strftime ("%Y-%m-%d" ),
@@ -358,7 +360,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
358360 if not verbose :
359361 data_ .pop ("destination_ports" , None )
360362 data_ .pop ("autonomous_system" , None )
361- data_ .pop ("honeypots " , None )
363+ data_ .pop ("honeypot_names " , None )
362364 data_ .pop ("id" , None )
363365
364366 json_list .append (data_ )
@@ -386,7 +388,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
386388 "first_seen" ,
387389 "last_seen" ,
388390 "recurrence_probability" ,
389- "honeypots " ,
391+ "honeypot_names " ,
390392 "ip_reputation" ,
391393 }
392394 # Fetch fields from database
@@ -416,7 +418,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
416418 confidence = 90
417419
418420 # Labels
419- labels = [hp .lower () for hp in ioc .get ("honeypots " , []) if hp ]
421+ labels = [hp .lower () for hp in ioc .get ("honeypot_names " , []) if hp ]
420422 if ioc .get ("ip_reputation" ):
421423 labels .append (ioc ["ip_reputation" ])
422424
@@ -446,15 +448,33 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
446448
447449def asn_aggregated_queryset (iocs_qs , request , feed_params ):
448450 """
449- Perform DB-level aggregation grouped by ASN.
451+ Retrieve ASN aggregation data. Caches the heavy aggregation query
452+ since the data only updates during the extraction cronjob.
450453
451454 Args
452455 iocs_qs (QuerySet): Filtered IOC queryset from get_queryset;
453456 request (Request): The API request object;
454457 feed_params (FeedRequestParams): Validated parameter object
455458
456- Returns: A values-grouped queryset with annotated metrics and honeypot arrays.
459+ Returns: A list of dicts with aggregated metrics and honeypot arrays per ASN .
457460 """
461+
462+ # Build reliable cache key from query params
463+ sorted_params = sorted (request .query_params .lists ())
464+ params_string = urllib .parse .urlencode (sorted_params , doseq = True )
465+ param_hash = hashlib .sha256 (params_string .encode ("utf-8" )).hexdigest ()
466+
467+ # To prevent per-worker continuous RAM bloat, use the shared DB-backed cache
468+ # instead of the default LocMemCache, since the JSON response size can be large.
469+ # The extraction pipeline invalidates this cache by bumping the version counter.
470+ shared_cache = caches ["django-q" ]
471+ version = shared_cache .get ("asn_feeds_version" , 1 )
472+ cache_key = f"asn_feeds_v{ version } _{ param_hash } "
473+
474+ cached_result = shared_cache .get (cache_key )
475+ if cached_result is not None :
476+ return cached_result
477+
458478 asn_filter = request .query_params .get ("asn" )
459479 if asn_filter :
460480 iocs_qs = iocs_qs .filter (autonomous_system__asn = asn_filter )
@@ -480,31 +500,35 @@ def asn_aggregated_queryset(iocs_qs, request, feed_params):
480500 first_seen = Min ("first_seen" ),
481501 last_seen = Max ("last_seen" ),
482502 )
483- .order_by (ordering )
484503 )
504+ numeric_agg = numeric_agg .order_by (ordering )
485505
506+ # Honeypot names still require a lightweight aggregation because
507+ # they depend on the active flag which can change independently.
486508 honeypot_agg = (
487509 iocs_qs .exclude (autonomous_system__isnull = True )
488- .filter (general_honeypot__active = True )
510+ .filter (honeypots__active = True )
489511 .values (asn = F ("autonomous_system__asn" ))
490512 .annotate (
491- honeypots = ArrayAgg (
492- "general_honeypot__name " ,
513+ honeypot_names = ArrayAgg (
514+ "honeypots__name " ,
493515 distinct = True ,
494516 )
495517 )
496518 )
497519
498- hp_lookup = {row ["asn" ]: row ["honeypots " ] or [] for row in honeypot_agg }
520+ hp_lookup = {row ["asn" ]: row ["honeypot_names " ] or [] for row in honeypot_agg }
499521
500- # merging numeric aggregate with honeypot names for each asn
501522 result = []
502523 for row in numeric_agg :
503524 asn = row ["asn" ]
504525 row_dict = dict (row )
505526 row_dict ["honeypots" ] = sorted (hp_lookup .get (asn , []))
506527 result .append (row_dict )
507528
529+ # Set cache with a 60-minute timeout (max extraction interval length) to prevent memory bloat
530+ shared_cache .set (cache_key , result , timeout = 3600 )
531+
508532 return result
509533
510534
0 commit comments