Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sdk/storage/azure-storage-blob-changefeed/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ This version and all future versions will require Python 3.9+. Python 3.8 is no

### Features Added

### Bugs Fixed
- Fixed an `IndexError` that occurred when listing change feed events on accounts where the
`$blobchangefeed/idx/segments/` hierarchy contains directory marker blobs (e.g.
`idx/segments/2026/02/20`). Such non-segment paths are now skipped instead of being parsed
as segment files.

## 12.0.0b5 (2024-04-16)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,10 @@ def _get_segment_paths(self, start_year=""):
while not start_year or start_year <= cur_year:
paths = self.client.list_blobs(name_starts_with=SEGMENT_COMMON_PATH + str(start_year))
for path in paths:
yield path.name
# Skip directory marker blobs that do not conform to the expected segment path shape.
# Azure Storage can return zero-length directory markers that are not real segment files.
if self._is_valid_segment_path(path.name):
yield path.name
Comment thread
weirongw23-msft marked this conversation as resolved.

# if not searching by prefix, all paths would have been iterated already, so it"s time to yield None
if not start_year:
Expand All @@ -291,6 +294,13 @@ def _get_segment_paths(self, start_year=""):
start_year += 1
yield None

@staticmethod
def _is_valid_segment_path(segment_path):
path_tokens = segment_path.split(PATH_DELIMITER)

# Expected: idx/segments/YYYY/MM/DD/HHMM/<file>
return len(path_tokens) >= 7 and path_tokens[6]

@staticmethod
def _parse_datetime_from_segment_path(segment_path):
path_tokens = segment_path.split("/")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from datetime import datetime, timedelta
from math import ceil
from time import sleep
from unittest.mock import Mock, patch

import pytest

Expand All @@ -23,11 +24,80 @@
# Then uncomment this import and comment out the other.
# from changefeed import ChangeFeedClient
from azure.storage.blob.changefeed import ChangeFeedClient
from azure.storage.blob.changefeed._models import ChangeFeed


def _build_change_feed(blob_names):
"""Build a ChangeFeed backed by a mock client, skipping the network-bound _initialize()."""
blobs = [Mock() for _ in blob_names]
for blob, name in zip(blobs, blob_names):
blob.name = name
client = Mock()
client.list_blobs.return_value = blobs
with patch.object(ChangeFeed, "_initialize"):
return ChangeFeed(client, page_size=100)


@pytest.mark.playback_test_only
class TestStorageChangeFeed(StorageRecordedTestCase):

@pytest.mark.parametrize(
"segment_path",
[
"idx/segments/2026/02/20/0000/meta.json",
"idx/segments/2022/11/28/2300/meta.json",
"idx/segments/1601/01/01/0000/meta.json",
],
)
def test_valid_segment_path_is_accepted(self, segment_path):
assert ChangeFeed._is_valid_segment_path(segment_path) is True

@pytest.mark.parametrize(
"segment_path",
[
"idx/segments/2026/02/20", # day-level directory marker (the reported crash)
"idx/segments/2026/02/20/0000", # minute-level directory marker
"idx/segments/2026/02", # month-level directory marker
"idx/segments/2026", # year-level directory marker
"idx/segments", # prefix only
"idx/segments/2026/02/20/0000/", # trailing slash -> empty file token
"idx/segments/abcd/02/20/0000/meta.json", # non-numeric year
],
)
def test_directory_marker_or_malformed_path_is_rejected(self, segment_path):
assert ChangeFeed._is_valid_segment_path(segment_path) is False

def test_parse_datetime_from_valid_segment_path(self):
assert ChangeFeed._parse_datetime_from_segment_path(
"idx/segments/2026/02/20/0000/meta.json"
) == datetime(2026, 2, 20, 0)

def test_get_segment_paths_skips_directory_markers(self):
blob_names = [
"idx/segments/2026/02/20", # day-level marker
"idx/segments/2026/02/20/0000", # minute-level marker
"idx/segments/2026/02/20/0000/meta.json", # real segment
"idx/segments/2026/02/20/0100/meta.json", # real segment
]
change_feed = _build_change_feed(blob_names)

results = list(change_feed._get_segment_paths(start_year=""))

# The generator yields a trailing None sentinel to signal "no more segments".
assert results[-1] is None
yielded_segments = [path for path in results if path is not None]
assert yielded_segments == [
"idx/segments/2026/02/20/0000/meta.json",
"idx/segments/2026/02/20/0100/meta.json",
]

def test_get_segment_paths_does_not_raise_on_directory_markers(self):
blob_names = ["idx/segments/2026/02/20", "idx/segments/2026/02/20/0000/meta.json"]
change_feed = _build_change_feed(blob_names)

yielded_segments = [path for path in change_feed._get_segment_paths(start_year="") if path]
assert yielded_segments == ["idx/segments/2026/02/20/0000/meta.json"]

# --Test cases for change feed -----------------------------------------
@ChangeFeedPreparer()
@recorded_by_proxy
Expand Down
Loading