Skip to content

Commit 971c43d

Browse files
authored
Merge pull request GreedyBear-Project#1203 from GreedyBear-Project/develop
3.3.1
2 parents ed08ca6 + 7d9ac5a commit 971c43d

62 files changed

Lines changed: 1034 additions & 613 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/FUNDING.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
open_collective: intelowl-project
2-
github: intelowlproject

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,12 @@ To install it locally, Please refer to our [installation guide](https://intelowl
5454

5555
Thanks to [The Honeynet Project](https://www.honeynet.org) we are providing free public feeds available [here](https://greedybear.honeynet.org).
5656

57-
#### DigitalOcean
57+
#### Google Summer of Code
58+
<a href="https://summerofcode.withgoogle.com/"> <img style="border: 0.2px solid black" width=150 height=89 src="static/gsoc_logo.png" alt="GSoC logo"> </a>
5859

59-
In 2022 we joined the official [DigitalOcean Open Source Program](https://www.digitalocean.com/open-source?utm_medium=opensource&utm_source=IntelOwl).
60+
In 2026 we started participating to the [Google Summer of Code](https://summerofcode.withgoogle.com/) (GSoC)!
6061

62+
If you are interested in participating in the next Google Summer of Code, check all the info available in the [dedicated repository](https://github.com/intelowlproject/gsoc)!
6163

6264
## Maintainers and Key Contributors
6365

api/serializers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@
66
from rest_framework import serializers
77

88
from greedybear.consts import REGEX_DOMAIN
9-
from greedybear.models import IOC, GeneralHoneypot, Sensor, Tag
9+
from greedybear.models import IOC, Honeypot, Sensor, Tag
1010
from greedybear.utils import is_ip_address
1111

1212
logger = logging.getLogger(__name__)
1313

1414

15-
class GeneralHoneypotSerializer(serializers.ModelSerializer):
15+
class HoneypotSerializer(serializers.ModelSerializer):
1616
class Meta:
17-
model = GeneralHoneypot
17+
model = Honeypot
1818

1919
def to_representation(self, value):
2020
return value.name
@@ -33,7 +33,7 @@ class Meta:
3333

3434

3535
class IOCSerializer(serializers.ModelSerializer):
36-
general_honeypot = GeneralHoneypotSerializer(many=True, read_only=True)
36+
general_honeypot = HoneypotSerializer(many=True, read_only=True, source="honeypots")
3737
tags = TagSerializer(many=True, read_only=True)
3838
sensors = SensorSerializer(many=True, read_only=True)
3939

api/views/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from api.views.cowrie_session import *
33
from api.views.enrichment import *
44
from api.views.feeds import *
5-
from api.views.general_honeypot import *
65
from api.views.health import *
6+
from api.views.honeypots import *
77
from api.views.news import *
88
from api.views.statistics import *

api/views/health.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
IOC,
1616
CowrieSession,
1717
FireHolList,
18-
GeneralHoneypot,
18+
Honeypot,
1919
MassScanner,
2020
TorExitNode,
2121
)
@@ -71,8 +71,8 @@ def get_observables_overview(last_24h):
7171
)
7272

7373
honeypot_stats = {
74-
"total": GeneralHoneypot.objects.count(),
75-
"active": GeneralHoneypot.objects.filter(active=True).count(),
74+
"total": Honeypot.objects.count(),
75+
"active": Honeypot.objects.filter(active=True).count(),
7676
}
7777

7878
threat_list_stats = {
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from rest_framework.response import Response
77

88
from greedybear.consts import GET
9-
from greedybear.models import GeneralHoneypot
9+
from greedybear.models import Honeypot
1010

1111
logger = logging.getLogger(__name__)
1212

@@ -23,14 +23,14 @@ def general_honeypot_list(request):
2323
Response: A JSON response containing the list of general honeypots.
2424
"""
2525

26-
logger.info(f"Requested general honeypots list from {request.user}.")
26+
logger.info(f"Requested honeypots list from {request.user}.")
2727
active = request.query_params.get("onlyActive")
2828
honeypots = []
29-
general_honeypots = GeneralHoneypot.objects.all()
29+
honeypot_objs = Honeypot.objects.all()
3030
if active == "true":
31-
general_honeypots = general_honeypots.filter(active=True)
32-
logger.info(f"Requested only active general honeypots from {request.user}")
33-
honeypots.extend([hp.name for hp in general_honeypots])
31+
honeypot_objs = honeypot_objs.filter(active=True)
32+
logger.info(f"Requested only active honeypots from {request.user}")
33+
honeypots.extend([hp.name for hp in honeypot_objs])
3434

35-
logger.info(f"General honeypots: {honeypots} given back to user {request.user}")
35+
logger.info(f"Honeypots: {honeypots} given back to user {request.user}")
3636
return Response(honeypots)

api/views/statistics.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from rest_framework.decorators import action
1111
from rest_framework.response import Response
1212

13-
from greedybear.models import IOC, GeneralHoneypot, Statistics, ViewType
13+
from greedybear.models import IOC, Honeypot, Statistics, ViewType
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -92,7 +92,7 @@ def countries(self, request):
9292
qs = (
9393
IOC.objects.filter(last_seen__gte=delta)
9494
.exclude(attacker_country="")
95-
.filter(general_honeypot__active=True)
95+
.filter(honeypots__active=True)
9696
.values("attacker_country")
9797
.annotate(count=Count("id", distinct=True))
9898
.order_by("-count")
@@ -103,7 +103,7 @@ def countries(self, request):
103103
@action(detail=False, methods=["get"])
104104
def feeds_types(self, request):
105105
"""
106-
Retrieve statistics for different types of feeds using GeneralHoneypot M2M relationship.
106+
Retrieve statistics for different types of feeds using Honeypot M2M relationship.
107107
108108
Args:
109109
request: The incoming request object.
@@ -113,10 +113,10 @@ def feeds_types(self, request):
113113
"""
114114
# Build annotations for each active general honeypot
115115
annotations = {}
116-
general_honeypots = GeneralHoneypot.objects.all().filter(active=True)
117-
for hp in general_honeypots:
116+
honeypots = Honeypot.objects.all().filter(active=True)
117+
for hp in honeypots:
118118
# Use M2M relationship instead of boolean fields
119-
annotations[hp.name] = Count("name", distinct=True, filter=Q(general_honeypot__name__iexact=hp.name))
119+
annotations[hp.name] = Count("name", distinct=True, filter=Q(honeypots__name__iexact=hp.name))
120120
return self.__aggregation_response_static_ioc(annotations)
121121

122122
def __aggregation_response_static_statistics(self, annotations: dict) -> Response:
@@ -147,7 +147,7 @@ def __aggregation_response_static_ioc(self, annotations: dict) -> Response:
147147

148148
qs = (
149149
IOC.objects.filter(last_seen__gte=delta)
150-
.exclude(general_honeypot__active=False)
150+
.exclude(honeypots__active=False)
151151
.annotate(date=Trunc("last_seen", basis))
152152
.values("date")
153153
.annotate(**annotations)

api/views/utils.py

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
# This file is a part of GreedyBear https://github.com/honeynet/GreedyBear
22
# See the file 'LICENSE' for copying permission.
33
import csv
4+
import hashlib
45
import logging
6+
import urllib.parse
57
from datetime import datetime, timedelta
68

79
import feedparser
810
import requests
911
from django.conf import settings
1012
from django.contrib.postgres.aggregates import ArrayAgg
11-
from django.core.cache import cache
13+
from django.core.cache import cache, caches
1214
from django.db.models import Count, F, Max, Min, Q, Sum, Value
1315
from django.db.models.functions import JSONObject
1416
from django.http import HttpResponse, HttpResponseBadRequest, StreamingHttpResponse
@@ -19,7 +21,7 @@
1921
from api.serializers import FeedsRequestSerializer, parse_feed_types
2022
from greedybear.consts import CACHE_KEY_GREEDYBEAR_NEWS, CACHE_TIMEOUT_SECONDS, RSS_FEED_URL
2123
from greedybear.enums import IpReputation
22-
from greedybear.models import IOC, GeneralHoneypot, Statistics
24+
from greedybear.models import IOC, Honeypot, Statistics
2325
from greedybear.utils import is_ip_address, is_valid_domain
2426

2527
logger = logging.getLogger(__name__)
@@ -144,8 +146,8 @@ def get_valid_feed_types() -> frozenset[str]:
144146
Returns:
145147
frozenset[str]: An immutable set of valid feed type strings
146148
"""
147-
general_honeypots = GeneralHoneypot.objects.filter(active=True)
148-
feed_types = ["all"] + [hp.name.lower() for hp in general_honeypots]
149+
honeypots = Honeypot.objects.filter(active=True)
150+
feed_types = ["all"] + [hp.name.lower() for hp in honeypots]
149151
return frozenset(feed_types)
150152

151153

@@ -226,13 +228,13 @@ def get_queryset(request, feed_params, valid_feed_types, is_aggregated=False, se
226228
if "all" not in feed_params.feed_types:
227229
type_filter = Q()
228230
for ft in feed_params.feed_types:
229-
type_filter |= Q(general_honeypot__name__iexact=ft)
231+
type_filter |= Q(honeypots__name__iexact=ft)
230232
iocs = iocs.filter(type_filter)
231233

232234
# aggregated feeds calculate metrics differently and need all rows to be accurate.
233235
if not is_aggregated:
234-
iocs = iocs.filter(general_honeypot__active=True)
235-
iocs = iocs.annotate(honeypots=ArrayAgg("general_honeypot__name", distinct=True))
236+
iocs = iocs.filter(honeypots__active=True)
237+
iocs = iocs.annotate(honeypot_names=ArrayAgg("honeypots__name", distinct=True))
236238
# Only annotate tags metadata when the response format needs it (e.g. JSON),
237239
# to avoid unnecessary joins and aggregation work for txt/csv feeds.
238240
if getattr(feed_params, "format", "").lower() == "json":
@@ -315,7 +317,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
315317
"login_attempts",
316318
"recurrence_probability",
317319
"expected_interactions",
318-
"honeypots", # used to build feed_type; removed from response
320+
"honeypot_names", # used to build feed_type; removed from response
319321
"destination_ports", # used to calculate destination_port_count
320322
"attacker_country",
321323
"autonomous_system",
@@ -344,7 +346,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
344346
else:
345347
iocs_iter = iocs.values(*required_fields).iterator(chunk_size=2000)
346348
for ioc in iocs_iter:
347-
ioc_feed_type = [hp.lower() for hp in ioc.get("honeypots", []) if hp]
349+
ioc_feed_type = [hp.lower() for hp in ioc.get("honeypot_names", []) if hp]
348350

349351
data_ = ioc | {
350352
"first_seen": ioc["first_seen"].strftime("%Y-%m-%d"),
@@ -358,7 +360,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
358360
if not verbose:
359361
data_.pop("destination_ports", None)
360362
data_.pop("autonomous_system", None)
361-
data_.pop("honeypots", None)
363+
data_.pop("honeypot_names", None)
362364
data_.pop("id", None)
363365

364366
json_list.append(data_)
@@ -386,7 +388,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
386388
"first_seen",
387389
"last_seen",
388390
"recurrence_probability",
389-
"honeypots",
391+
"honeypot_names",
390392
"ip_reputation",
391393
}
392394
# Fetch fields from database
@@ -416,7 +418,7 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
416418
confidence = 90
417419

418420
# Labels
419-
labels = [hp.lower() for hp in ioc.get("honeypots", []) if hp]
421+
labels = [hp.lower() for hp in ioc.get("honeypot_names", []) if hp]
420422
if ioc.get("ip_reputation"):
421423
labels.append(ioc["ip_reputation"])
422424

@@ -446,15 +448,33 @@ def feeds_response(request=None, iocs=None, feed_params=None, valid_feed_types=N
446448

447449
def asn_aggregated_queryset(iocs_qs, request, feed_params):
448450
"""
449-
Perform DB-level aggregation grouped by ASN.
451+
Retrieve ASN aggregation data. Caches the heavy aggregation query
452+
since the data only updates during the extraction cronjob.
450453
451454
Args
452455
iocs_qs (QuerySet): Filtered IOC queryset from get_queryset;
453456
request (Request): The API request object;
454457
feed_params (FeedRequestParams): Validated parameter object
455458
456-
Returns: A values-grouped queryset with annotated metrics and honeypot arrays.
459+
Returns: A list of dicts with aggregated metrics and honeypot arrays per ASN.
457460
"""
461+
462+
# Build reliable cache key from query params
463+
sorted_params = sorted(request.query_params.lists())
464+
params_string = urllib.parse.urlencode(sorted_params, doseq=True)
465+
param_hash = hashlib.sha256(params_string.encode("utf-8")).hexdigest()
466+
467+
# To prevent per-worker continuous RAM bloat, use the shared DB-backed cache
468+
# instead of the default LocMemCache, since the JSON response size can be large.
469+
# The extraction pipeline invalidates this cache by bumping the version counter.
470+
shared_cache = caches["django-q"]
471+
version = shared_cache.get("asn_feeds_version", 1)
472+
cache_key = f"asn_feeds_v{version}_{param_hash}"
473+
474+
cached_result = shared_cache.get(cache_key)
475+
if cached_result is not None:
476+
return cached_result
477+
458478
asn_filter = request.query_params.get("asn")
459479
if asn_filter:
460480
iocs_qs = iocs_qs.filter(autonomous_system__asn=asn_filter)
@@ -480,31 +500,35 @@ def asn_aggregated_queryset(iocs_qs, request, feed_params):
480500
first_seen=Min("first_seen"),
481501
last_seen=Max("last_seen"),
482502
)
483-
.order_by(ordering)
484503
)
504+
numeric_agg = numeric_agg.order_by(ordering)
485505

506+
# Honeypot names still require a lightweight aggregation because
507+
# they depend on the active flag which can change independently.
486508
honeypot_agg = (
487509
iocs_qs.exclude(autonomous_system__isnull=True)
488-
.filter(general_honeypot__active=True)
510+
.filter(honeypots__active=True)
489511
.values(asn=F("autonomous_system__asn"))
490512
.annotate(
491-
honeypots=ArrayAgg(
492-
"general_honeypot__name",
513+
honeypot_names=ArrayAgg(
514+
"honeypots__name",
493515
distinct=True,
494516
)
495517
)
496518
)
497519

498-
hp_lookup = {row["asn"]: row["honeypots"] or [] for row in honeypot_agg}
520+
hp_lookup = {row["asn"]: row["honeypot_names"] or [] for row in honeypot_agg}
499521

500-
# merging numeric aggregate with honeypot names for each asn
501522
result = []
502523
for row in numeric_agg:
503524
asn = row["asn"]
504525
row_dict = dict(row)
505526
row_dict["honeypots"] = sorted(hp_lookup.get(asn, []))
506527
result.append(row_dict)
507528

529+
# Set cache with a 60-minute timeout (max extraction interval length) to prevent memory bloat
530+
shared_cache.set(cache_key, result, timeout=3600)
531+
508532
return result
509533

510534

configuration/gunicorn/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
1-
import multiprocessing
1+
import os
22

33
# Server socket
44
bind = "unix:/run/gunicorn/main.sock"
55

66
# Worker processes
7-
workers = 2 * multiprocessing.cpu_count() + 1
7+
workers = 2 * len(os.sched_getaffinity(0)) + 1
88
max_requests = 1000
99
max_requests_jitter = 50
1010

docker/Dockerfile

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,30 +37,37 @@ ENV UV_PROJECT_ENVIRONMENT=/usr/local
3737

3838
WORKDIR $APP_ROOT
3939

40-
# Install runtime dependencies
41-
# - libgomp1 is required for model training
42-
# - curl is used for healthcheck
43-
RUN apt-get update && apt-get install -y --no-install-recommends \
44-
libgomp1 curl gosu \
40+
# Layer 1: stable runtime OS deps — cached across pyproject.toml/uv.lock changes.
41+
# libgomp1: model training; curl: healthcheck; gosu: entrypoint privilege drop
42+
# libpq5: runtime shared library required by the psycopg[c] C extension
43+
RUN apt-get update \
44+
&& apt-get install -y --no-install-recommends libgomp1 curl gosu libpq5 \
4545
&& rm -rf /var/lib/apt/lists/*
4646

47-
# Install python packages
47+
# Layer 2: Python packages — only re-runs when pyproject.toml/uv.lock change.
48+
# Build-only deps (gcc, python3-dev, libpq-dev) compile the psycopg[c] C
49+
# extension and are purged in the same layer to keep the final image lean.
4850
COPY pyproject.toml uv.lock ./
49-
RUN uv sync --no-dev --locked
51+
RUN apt-get update \
52+
&& apt-get install -y --no-install-recommends gcc python3-dev libpq-dev \
53+
&& uv sync --no-dev --locked \
54+
&& uv cache clean \
55+
&& apt-get purge -y gcc python3-dev libpq-dev \
56+
&& apt-get autoremove -y \
57+
&& rm -rf /var/lib/apt/lists/*
5058

5159
# Copy files
5260
COPY . $APP_ROOT
5361
COPY --from=frontend-build /app/build /var/www/reactapp
5462

55-
# separation is required to avoid to re-execute os installation in case of change of python requirements
63+
# Set up log directories, fix permissions, and remove frontend source (served from /var/www/reactapp)
5664
RUN mkdir -p ${LOG_PATH}/django ${LOG_PATH}/gunicorn \
5765
&& touch ${LOG_PATH}/django/api.log ${LOG_PATH}/django/api_errors.log \
5866
&& touch ${LOG_PATH}/django/greedybear.log ${LOG_PATH}/django/greedybear_errors.log \
5967
&& touch ${LOG_PATH}/django/django_q.log ${LOG_PATH}/django/django_q_errors.log \
6068
&& touch ${LOG_PATH}/django/django_errors.log ${LOG_PATH}/django/elasticsearch.log \
6169
&& touch ${LOG_PATH}/django/authentication.log ${LOG_PATH}/django/authentication_errors.log \
6270
&& mkdir -p ${APP_ROOT}/mlmodels \
63-
&& usermod -u 2000 www-data \
6471
&& chown -R www-data:www-data ${LOG_PATH} /opt/deploy/ ${APP_ROOT}/mlmodels/ \
6572
&& rm -rf frontend/
6673

0 commit comments

Comments
 (0)