Skip to content

Commit 1e26c3d

Browse files
authored
Improve logging and error handling (#105)
- Prevents issue with loading event metadata from crashing ingestion. - Changes the default logging level to improve debugging.
1 parent 5cabbd7 commit 1e26c3d

7 files changed

Lines changed: 36 additions & 72 deletions

File tree

compose.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ services:
99
environment:
1010
- SPLUNK_START_ARGS=--accept-license
1111
- SPLUNK_PASSWORD=a_password
12+
- SPLUNK_GENERAL_TERMS=--accept-sgt-current-at-splunk-com
1213
volumes:
1314
- ./output/flare:/opt/splunk/etc/apps/flare
1415
- ./splunk/default.yml:/tmp/defaults/default.yml
15-
- ./logs:/opt/splunk/var/log/splunk
16+
- ./logs:/opt/splunk/var/log/splunk

packages/flare/bin/cron_job_ingest_events.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ def main(
4747
ingest_full_event_data = get_ingest_full_event_data(
4848
storage_passwords=storage_passwords
4949
)
50+
number_of_days_to_backfill = get_number_of_days_to_backfill(
51+
storage_passwords=storage_passwords
52+
)
5053
severities_filter = get_severities_filter(storage_passwords=storage_passwords)
5154
source_types_filter = get_source_types_filter(storage_passwords=storage_passwords)
5255

@@ -61,9 +64,6 @@ def main(
6164
# for identifiers 30 days prior to the day a tenant was first configured.
6265
start_date = data_store.get_earliest_ingested_by_tenant(tenant_id)
6366
if not start_date:
64-
number_of_days_to_backfill = get_number_of_days_to_backfill(
65-
storage_passwords=storage_passwords
66-
)
6767
start_date = datetime.now(timezone.utc) - timedelta(
6868
days=number_of_days_to_backfill
6969
)
@@ -93,7 +93,7 @@ def main(
9393
logger.info(f"Fetched {events_fetched_count} events on tenant {tenant_id}")
9494
total_events_fetched_count += events_fetched_count
9595

96-
logger.info(f"Fetched {events_fetched_count} events across all tenants")
96+
logger.info(f"Fetched {total_events_fetched_count} events across all tenants")
9797

9898

9999
def fetch_feed(
@@ -163,14 +163,13 @@ def get_tenant_ids(storage_passwords: client.StoragePasswords) -> list[int]:
163163
stored_tenant_ids = get_storage_password_value(
164164
storage_passwords=storage_passwords, password_key=PasswordKeys.TENANT_IDS.value
165165
)
166+
tenant_ids = None
166167
try:
167-
tenant_ids: Optional[list[int]] = (
168-
json.loads(stored_tenant_ids) if stored_tenant_ids else None
169-
)
168+
tenant_ids = json.loads(stored_tenant_ids) if stored_tenant_ids else None
170169
except Exception:
171170
pass
172171

173-
if not tenant_ids:
172+
if tenant_ids is None:
174173
raise Exception("Tenant IDs not found")
175174
return tenant_ids
176175

packages/flare/bin/data_store.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def _commit(self) -> None:
3333
def _sync(self) -> None:
3434
self._store.read(config_path)
3535

36+
def reset(self) -> None:
37+
self._store.clear()
38+
self._commit()
39+
3640
def get_last_fetch(self) -> Optional[datetime]:
3741
self._sync()
3842
last_fetched = self._store.get(

packages/flare/bin/flare.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,12 +78,18 @@ def fetch_feed_events(
7878
self.logger.debug(event_feed)
7979
next_token = event_feed["next"]
8080
for event in event_feed["items"]:
81-
if ingest_full_event_data:
82-
event = self._fetch_full_event_from_uid(
83-
uid=event["metadata"]["uid"]
84-
)
85-
time.sleep(1) # Don't hit rate limit
86-
yield (event, next_token)
81+
try:
82+
if ingest_full_event_data:
83+
event = self._fetch_full_event_from_uid(
84+
uid=event["metadata"]["uid"]
85+
)
86+
time.sleep(1) # Don't hit rate limit
87+
except:
88+
# There is already logging in the _fetch_full_event_from_uid
89+
# we want to continue getting the other events even if one fails.
90+
pass
91+
finally:
92+
yield (event, next_token)
8793

8894
def _fetch_event_feed_metadata(
8995
self,

packages/flare/bin/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(self, *, class_name: str) -> None:
2424
if os.environ.get("FLARE_ENV") == "dev":
2525
self._logger.setLevel(logging.DEBUG)
2626
else:
27-
self._logger.setLevel(logging.ERROR)
27+
self._logger.setLevel(logging.INFO)
2828
formatter = logging.Formatter("%(asctime)s %(levelname)-5s %(message)s")
2929
handler = TimedRotatingFileHandler(
3030
log_filepath, when="d", interval=1, backupCount=5

packages/flare/tests/bin/test_flare_wrapper.py

Lines changed: 9 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import pytest
21
import requests_mock
32

43
from conftest import FakeLogger
@@ -159,48 +158,7 @@ def test_flare_full_data_with_metadata(
159158
assert mock_full_event_2.called
160159

161160

162-
def test_flare_full_data_with_metadata_and_exception(
163-
logger: FakeLogger,
164-
disable_sleep: Any,
165-
) -> None:
166-
with requests_mock.Mocker() as mocker:
167-
mocker.register_uri(
168-
"POST",
169-
"https://api.flare.io/tokens/generate",
170-
status_code=200,
171-
json={"token": "access_token"},
172-
)
173-
174-
tenant_resp_page_1 = {
175-
"next": "some_next_value",
176-
"items": [
177-
{"not_metadata": {"uid": "some_uid_1"}},
178-
{"metadata": {"uid": "some_uid_2"}},
179-
],
180-
}
181-
182-
mocker.register_uri(
183-
"POST",
184-
"https://api.flare.io/firework/v4/events/tenant/_search",
185-
status_code=200,
186-
json=tenant_resp_page_1,
187-
)
188-
189-
flare_api = FlareAPI(api_key="some_key", tenant_id=111, logger=logger)
190-
191-
with pytest.raises(KeyError, match="metadata"):
192-
next(
193-
flare_api.fetch_feed_events(
194-
next=None,
195-
start_date=None,
196-
ingest_full_event_data=True,
197-
severities=[],
198-
source_types=[],
199-
)
200-
)
201-
202-
203-
def test_flare_full_data_retry_exception(
161+
def test_flare_full_data_retry_errors(
204162
logger: FakeLogger,
205163
disable_sleep: Any,
206164
) -> None:
@@ -235,19 +193,15 @@ def test_flare_full_data_retry_exception(
235193

236194
flare_api = FlareAPI(api_key="some_key", tenant_id=111, logger=logger)
237195

238-
with pytest.raises(
239-
Exception,
240-
match="failed to fetch full event data for some_uid_1 after 3 tries",
241-
):
242-
next(
243-
flare_api.fetch_feed_events(
244-
next=None,
245-
start_date=None,
246-
ingest_full_event_data=True,
247-
severities=[],
248-
source_types=[],
249-
)
196+
next(
197+
flare_api.fetch_feed_events(
198+
next=None,
199+
start_date=None,
200+
ingest_full_event_data=True,
201+
severities=[],
202+
source_types=[],
250203
)
204+
)
251205

252206
assert logger.messages == [
253207
"INFO: Failed to fetch event 1/3 retries: 500 Server Error: None for url: https://api.flare.io/firework/v2/activities/some_uid_1",

packages/flare/tests/bin/test_ingest_events.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,5 +139,5 @@ def test_main_expect_normal_run(
139139
"INFO: Fetched 2 events on tenant 11111",
140140
"INFO: Fetching tenant_id=22222, next=None, start_date=FakeDatetime(1999, 12, 2, 0, 0, tzinfo=datetime.timezone.utc)",
141141
"INFO: Fetched 2 events on tenant 22222",
142-
"INFO: Fetched 2 events across all tenants",
142+
"INFO: Fetched 4 events across all tenants",
143143
]

0 commit comments

Comments
 (0)