Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions sdk/storage/azure-storage-blob-changefeed/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ This version and all future versions will require Python 3.9+. Python 3.8 is no

### Features Added

### Bugs Fixed
- Fixed an `IndexError` that occurred when listing change feed events on accounts where the
`$blobchangefeed/idx/segments/` hierarchy contains directory marker blobs (e.g.
`idx/segments/2026/02/20`). Such non-segment paths are now skipped instead of being parsed
as segment files.

## 12.0.0b5 (2024-04-16)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,10 @@ def _get_segment_paths(self, start_year=""):
while not start_year or start_year <= cur_year:
paths = self.client.list_blobs(name_starts_with=SEGMENT_COMMON_PATH + str(start_year))
for path in paths:
yield path.name
# Skip directory marker blobs that does not conform to the expected segment path shape.
Comment thread
weirongw23-msft marked this conversation as resolved.
Outdated
# Azure Storage can return zero-length directory markers that are not real segment files.
if self._is_valid_segment_path(path.name):
yield path.name
Comment thread
weirongw23-msft marked this conversation as resolved.

# if not searching by prefix, all paths would have been iterated already, so it"s time to yield None
if not start_year:
Expand All @@ -291,6 +294,23 @@ def _get_segment_paths(self, start_year=""):
start_year += 1
yield None

@staticmethod
def _is_valid_segment_path(segment_path):
# A valid segment path is of the form "idx/segments/YYYY/MM/DD/HHMM/<file>".
# Directory marker blobs (e.g. "idx/segments/2026/02/20") have too few tokens to
# represent a segment and must be skipped to avoid an IndexError while parsing.
path_tokens = segment_path.split(PATH_DELIMITER)
if len(path_tokens) < 6:
return False
Comment thread
weirongw23-msft marked this conversation as resolved.
Outdated
try:
Comment thread
weirongw23-msft marked this conversation as resolved.
Outdated
int(path_tokens[2]) # year
int(path_tokens[3]) # month
int(path_tokens[4]) # day
int(path_tokens[5][:2]) # hour (from HHMM)
except (ValueError, IndexError):
return False
return True

@staticmethod
def _parse_datetime_from_segment_path(segment_path):
path_tokens = segment_path.split("/")
Expand Down
Loading