Skip to content

Commit 1b1ab74

Browse files
authored
Merge pull request #1052 from KelvinTegelaar/dev
[pull] dev from KelvinTegelaar:dev
2 parents 4740566 + 38b18bc commit 1b1ab74

18 files changed

Lines changed: 1236 additions & 26 deletions

Config/CIPPTimers.json

Lines changed: 2 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,34 +19,14 @@
1919
},
2020
{
2121
"Id": "44a40668-ed71-403c-8c26-b32e320086ad",
22-
"Command": "Start-AuditLogOrchestrator",
23-
"Description": "Orchestrator to download audit logs",
22+
"Command": "Start-AuditLogPlannerV2",
23+
"Description": "Audit log V2 planner: create searches, download and process",
2424
"Cron": "0 */15 * * * *",
2525
"Priority": 2,
2626
"RunOnProcessor": true,
2727
"PreferredProcessor": "auditlog",
2828
"IsSystem": true
2929
},
30-
{
31-
"Id": "01cd512a-15c4-44a9-b8cb-1e5d879cfd2d",
32-
"Command": "Start-AuditLogProcessingOrchestrator",
33-
"Description": "Orchestrator to process audit logs",
34-
"Cron": "0 */15 * * * *",
35-
"Priority": 3,
36-
"RunOnProcessor": true,
37-
"PreferredProcessor": "auditlog",
38-
"IsSystem": true
39-
},
40-
{
41-
"Id": "03475c86-4314-4d7b-90f2-5a0639e3899b",
42-
"Command": "Start-AuditLogSearchCreation",
43-
"Description": "Timer to create audit log searches",
44-
"Cron": "0 */30 * * * *",
45-
"Priority": 4,
46-
"RunOnProcessor": true,
47-
"PreferredProcessor": "auditlog",
48-
"IsSystem": true
49-
},
5030
{
5131
"Id": "5ff6c500-e420-4a3b-8532-ace2e4da4f7d",
5232
"Command": "Start-ApplicationOrchestrator",
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
function Get-CippAuditLogNextAttempt {
2+
<#
3+
.SYNOPSIS
4+
Compute the next-attempt UTC time for a retried audit-log coverage row (exponential backoff).
5+
.DESCRIPTION
6+
Used by the V2 audit-log pipeline to schedule retries of failed search creations and
7+
downloads in the AuditLogCoverage ledger. Exponential backoff with jitter, capped. Because
8+
the timers run every 15 minutes, small delays effectively mean "retry next run"; larger ones
9+
defer a persistently failing window before it is eventually dead-lettered by the caller.
10+
.PARAMETER Attempts
11+
The attempt count that has just been consumed (1 = first failure).
12+
.OUTPUTS
13+
[datetime] (UTC) when the row becomes eligible to retry.
14+
.FUNCTIONALITY
15+
Internal
16+
#>
17+
[CmdletBinding()]
18+
param(
19+
[int]$Attempts,
20+
[int]$BaseMinutes = 5,
21+
[int]$CapMinutes = 240
22+
)
23+
$exp = [Math]::Max(0, $Attempts - 1)
24+
$delay = [Math]::Min($BaseMinutes * [Math]::Pow(2, $exp), $CapMinutes)
25+
$jitter = Get-Random -Minimum 0.8 -Maximum 1.2
26+
return (Get-Date).ToUniversalTime().AddMinutes($delay * $jitter)
27+
}
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
function Get-CippAuditLogPlannedWindows {
2+
<#
3+
.SYNOPSIS
4+
Compute the 35-minute audit-log search windows a tenant is missing (gaps + the newest settled).
5+
.DESCRIPTION
6+
Pure helper for the V2 audit-log pipeline. Windows are 35 minutes long on a 30-minute stride,
7+
so consecutive windows overlap by 5 minutes (covers boundary stragglers; alerting dedups by
8+
record id). Window ENDS sit on the 30-minute grid minus the settle (i.e. :25 / :55), which is
9+
exactly `floor_to_30min(now) - settle`. With the planner timer firing at :00/:15/:30/:45 and a
10+
5-minute settle, a fresh window becomes creatable exactly at a :00/:30 tick - no tick delay -
11+
and the :15/:45 ticks naturally have no new window (they do retries + download/process).
12+
13+
Backfill of older gaps is bounded by -HorizonHours and capped at -MaxPerRun per call (oldest
14+
first). A brand-new tenant is seeded with only the newest settled window.
15+
.PARAMETER ExistingRows
16+
The tenant's current AuditLogCoverage rows. Reconciliation rows (RowKey 'RECON-*') are ignored
17+
here; only regular 14-digit window keys are considered.
18+
.PARAMETER Now
19+
Reference time (UTC). Defaults to now.
20+
.OUTPUTS
21+
Array of [pscustomobject]@{ RowKey; WindowStart; WindowEnd } sorted oldest-first.
22+
.FUNCTIONALITY
23+
Internal
24+
#>
25+
[CmdletBinding()]
26+
param(
27+
[object[]]$ExistingRows,
28+
[datetime]$Now = (Get-Date).ToUniversalTime(),
29+
[int]$SettleMinutes = 5,
30+
[int]$WindowMinutes = 35,
31+
[int]$StrideMinutes = 30,
32+
[int]$HorizonHours = 24,
33+
[int]$MaxPerRun = 6
34+
)
35+
36+
$Now = $Now.ToUniversalTime()
37+
38+
# Newest window end: floor to the 30-min grid, minus the settle (lands on :25 / :55).
39+
$FloorMinute = $Now.Minute - ($Now.Minute % $StrideMinutes)
40+
$Floor = [datetime]::new($Now.Year, $Now.Month, $Now.Day, $Now.Hour, $FloorMinute, 0, [System.DateTimeKind]::Utc)
41+
$NewestEnd = $Floor.AddMinutes(-$SettleMinutes)
42+
43+
$HorizonStart = $Now.AddHours(-$HorizonHours)
44+
45+
# Existing regular window keys (ignore reconciliation rows).
46+
$ExistingKeys = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
47+
$ExistingStarts = [System.Collections.Generic.List[datetime]]::new()
48+
foreach ($Row in $ExistingRows) {
49+
if ($Row.RowKey -notmatch '^\d{14}$') { continue }
50+
[void]$ExistingKeys.Add([string]$Row.RowKey)
51+
if ($null -ne $Row.WindowStart) {
52+
try { $ExistingStarts.Add(([datetimeoffset]$Row.WindowStart).UtcDateTime) } catch {}
53+
}
54+
}
55+
56+
# Brand-new tenant: seed only the newest settled window.
57+
if ($ExistingStarts.Count -eq 0) {
58+
$Start = $NewestEnd.AddMinutes(-$WindowMinutes)
59+
if ($Start -lt $HorizonStart) { return @() }
60+
return , ([pscustomobject]@{
61+
RowKey = $Start.ToString('yyyyMMddHHmmss')
62+
WindowStart = $Start
63+
WindowEnd = $NewestEnd
64+
})
65+
}
66+
67+
# Established tenant: backfill missing windows from the lower bound up to NewestEnd (oldest first).
68+
$EarliestExisting = ($ExistingStarts | Measure-Object -Minimum).Minimum
69+
$LowerEnd = if ($EarliestExisting -gt $HorizonStart) { $EarliestExisting.AddMinutes($WindowMinutes) } else { $HorizonStart.AddMinutes($WindowMinutes) }
70+
71+
$Owed = [System.Collections.Generic.List[object]]::new()
72+
$End = $NewestEnd
73+
while ($End -ge $LowerEnd) {
74+
$Start = $End.AddMinutes(-$WindowMinutes)
75+
$Key = $Start.ToString('yyyyMMddHHmmss')
76+
if (-not $ExistingKeys.Contains($Key)) {
77+
$Owed.Add([pscustomobject]@{ RowKey = $Key; WindowStart = $Start; WindowEnd = $End })
78+
}
79+
$End = $End.AddMinutes(-$StrideMinutes)
80+
}
81+
82+
# $Owed is newest-first from the loop; reorder to oldest-first ([0]=oldest, [-1]=newest).
83+
$Owed.Reverse()
84+
if ($Owed.Count -le $MaxPerRun) {
85+
return @($Owed)
86+
}
87+
88+
# Backlog exceeds the per-run cap: always include the NEWEST window so the live period can
89+
# be created promptly (current-first, see Push-AuditLogSearchCreationV2), plus the oldest
90+
# (MaxPerRun-1) so historical gaps still drain - oldest first - before they age out of the
91+
# horizon. Without seeding the newest here it would never be Planned during a backlog.
92+
$Newest = $Owed[$Owed.Count - 1]
93+
$Backfill = @($Owed[0..($MaxPerRun - 2)])
94+
return @($Backfill + $Newest)
95+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
function Get-CippAuditLogReconciliationWindows {
2+
<#
3+
.SYNOPSIS
4+
Compute the 12-hour reconciliation audit-log windows a tenant is missing.
5+
.DESCRIPTION
6+
The fast 35-minute path searches each period soon after it closes, so late-landing / backfilled
7+
audit events (Microsoft can publish them hours later) can be missed. This helper produces wide
8+
catch-all windows aligned to 00:00-12:00 and 12:00-00:00 UTC, each created 3 hours after the
9+
block closes (a generous settle so backfilled data has landed). They flow through the normal
10+
download/process path; alerting dedups by record id, so overlap with the fast path is harmless.
11+
.PARAMETER ExistingRows
12+
The tenant's current AuditLogCoverage rows. Only reconciliation rows (RowKey 'RECON-*') are
13+
considered when finding gaps.
14+
.PARAMETER Now
15+
Reference time (UTC). Defaults to now.
16+
.OUTPUTS
17+
Array of [pscustomobject]@{ RowKey; WindowStart; WindowEnd } sorted oldest-first. RowKey is
18+
'RECON-<windowStart yyyyMMddHHmmss>'.
19+
.FUNCTIONALITY
20+
Internal
21+
#>
22+
[CmdletBinding()]
23+
param(
24+
[object[]]$ExistingRows,
25+
[datetime]$Now = (Get-Date).ToUniversalTime(),
26+
[int]$SettleHours = 3,
27+
[int]$HorizonHours = 24,
28+
[int]$MaxPerRun = 6
29+
)
30+
31+
$Now = $Now.ToUniversalTime()
32+
33+
# Newest 12h block end (00:00 / 12:00 UTC) whose close is at least SettleHours in the past.
34+
$T = $Now.AddHours(-$SettleHours)
35+
$BoundaryHour = if ($T.Hour -lt 12) { 0 } else { 12 }
36+
$NewestEnd = [datetime]::new($T.Year, $T.Month, $T.Day, $BoundaryHour, 0, 0, [System.DateTimeKind]::Utc)
37+
$HorizonStart = $Now.AddHours(-$HorizonHours)
38+
39+
$ExistingKeys = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
40+
foreach ($Row in $ExistingRows) {
41+
if ($Row.RowKey -like 'RECON-*') { [void]$ExistingKeys.Add([string]$Row.RowKey) }
42+
}
43+
44+
# No reconciliation history: seed only the newest settled block (avoid a first-run backfill spike;
45+
# the fast 35-min path already covers recent history). Established tenants backfill gaps below.
46+
if ($ExistingKeys.Count -eq 0) {
47+
$Start = $NewestEnd.AddHours(-12)
48+
if ($Start -lt $HorizonStart) { return @() }
49+
return , ([pscustomobject]@{ RowKey = 'RECON-' + $Start.ToString('yyyyMMddHHmmss'); WindowStart = $Start; WindowEnd = $NewestEnd })
50+
}
51+
52+
$Owed = [System.Collections.Generic.List[object]]::new()
53+
$End = $NewestEnd
54+
while ($End -ge $HorizonStart) {
55+
$Start = $End.AddHours(-12)
56+
$Key = 'RECON-' + $Start.ToString('yyyyMMddHHmmss')
57+
if (-not $ExistingKeys.Contains($Key)) {
58+
$Owed.Add([pscustomobject]@{ RowKey = $Key; WindowStart = $Start; WindowEnd = $End })
59+
}
60+
$End = $End.AddHours(-12)
61+
}
62+
63+
$Owed.Reverse()
64+
if ($Owed.Count -gt $MaxPerRun) {
65+
return @($Owed[0..($MaxPerRun - 1)])
66+
}
67+
return @($Owed)
68+
}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
function New-CippAuditLogSearchV2 {
2+
<#
3+
.SYNOPSIS
4+
Create a Microsoft Graph audit-log search for the V2 pipeline and return a classified result.
5+
.DESCRIPTION
6+
Thin wrapper over New-GraphPOSTRequest (which now honours 429 backoff). Unlike the V1
7+
New-CippAuditLogSearch, this writes to NO table - the AuditLogCoverage ledger is updated by
8+
the caller. Failures are classified so the caller can decide whether to retry (transient) or
9+
stop (auditing disabled).
10+
.PARAMETER TenantFilter
11+
Tenant default domain or customerId.
12+
.PARAMETER StartTime
13+
Window start (inclusive).
14+
.PARAMETER EndTime
15+
Window end (exclusive).
16+
.PARAMETER RecordTypeFilters
17+
Record types to capture. Defaults to the four the V1 pipeline used.
18+
.OUTPUTS
19+
[pscustomobject]@{ Id; Status; Outcome; Message } Outcome in 'Created','AuditingDisabled','Transient'.
20+
.FUNCTIONALITY
21+
Internal
22+
#>
23+
[CmdletBinding(SupportsShouldProcess = $true)]
24+
param(
25+
[Parameter(Mandatory = $true)][string]$TenantFilter,
26+
[Parameter(Mandatory = $true)][datetime]$StartTime,
27+
[Parameter(Mandatory = $true)][datetime]$EndTime,
28+
[string[]]$RecordTypeFilters = @('exchangeAdmin', 'azureActiveDirectory', 'azureActiveDirectoryAccountLogon', 'azureActiveDirectoryStsLogon'),
29+
[int]$MaxAttempts = 3,
30+
[string]$DisplayName = ('CIPP Audit Search V2 - ' + (Get-Date).ToString('yyyy-MM-dd HH:mm:ss'))
31+
)
32+
33+
$Body = @{
34+
displayName = $DisplayName
35+
filterStartDateTime = $StartTime.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss')
36+
filterEndDateTime = $EndTime.ToUniversalTime().ToString('yyyy-MM-ddTHH:mm:ss')
37+
recordTypeFilters = @($RecordTypeFilters)
38+
} | ConvertTo-Json -Compress
39+
40+
if (-not $PSCmdlet.ShouldProcess($TenantFilter, 'Create audit log search')) {
41+
return [pscustomobject]@{ Id = $null; Status = 'WhatIf'; Outcome = 'Transient'; Message = 'WhatIf'; Throttled = $false }
42+
}
43+
44+
for ($Attempt = 1; $Attempt -le $MaxAttempts; $Attempt++) {
45+
try {
46+
# maxRetries 1 = no retry inside the Graph helper; this function owns retry/backoff.
47+
$Query = New-GraphPOSTRequest -uri 'https://graph.microsoft.com/beta/security/auditLog/queries' -body $Body -tenantid $TenantFilter -AsApp $true -maxRetries 1
48+
return [pscustomobject]@{ Id = $Query.id; Status = $Query.status; Outcome = 'Created'; Message = $null; Throttled = $false }
49+
} catch {
50+
$Raw = $_.Exception.Data['RawErrorBody']
51+
if (-not $Raw) { $Raw = $_.ErrorDetails.Message }
52+
if (-not $Raw) { $Raw = $_.Exception.Message }
53+
$Parsed = $null
54+
if ($Raw) { try { $Parsed = ([string]$Raw) | ConvertFrom-Json -ErrorAction Stop } catch {} }
55+
56+
# AuditingDisabledTenant can be top-level Status or nested as JSON inside error.message.
57+
$AuditStatus = $Parsed.Status
58+
if (-not $AuditStatus) {
59+
$Inner = $Parsed.error.message ?? $Parsed.message
60+
if ($Inner -is [string]) { try { $AuditStatus = ($Inner | ConvertFrom-Json -ErrorAction Stop).Status } catch {} }
61+
}
62+
if ($AuditStatus -eq 'AuditingDisabledTenant') {
63+
return [pscustomobject]@{ Id = $null; Status = 'AuditingDisabledTenant'; Outcome = 'AuditingDisabled'; Message = 'Unified auditing is disabled for this tenant.'; Throttled = $false }
64+
}
65+
66+
$Code = $Parsed.error.code ?? $Parsed.code
67+
$Msg = $Parsed.error.message ?? $Parsed.message ?? $_.Exception.Message
68+
$StatusCode = $null
69+
try { $StatusCode = [int]$_.Exception.Response.StatusCode } catch {}
70+
71+
# 429 = the tenant's ~10 concurrent-search cap is full. Retrying in-process won't clear it,
72+
# so return immediately and let the planner defer this + remaining windows to next cycle.
73+
if (($Code -eq 'TooManyRequests') -or ($StatusCode -eq 429)) {
74+
return [pscustomobject]@{ Id = $null; Status = ([string]($Code ?? 'TooManyRequests')); Outcome = 'Transient'; Message = [string]$Msg; Throttled = $true }
75+
}
76+
77+
# Other transient (UnknownError, 5xx, gateway, timeout): usually a momentary EXO-backend
78+
# blip that clears on a quick re-submit. Retry in-process with >1s jitter before giving up.
79+
if ($Attempt -lt $MaxAttempts) {
80+
Start-Sleep -Seconds (Get-Random -Minimum 1.5 -Maximum 4.0)
81+
continue
82+
}
83+
return [pscustomobject]@{ Id = $null; Status = ([string]($Code ?? 'Error')); Outcome = 'Transient'; Message = [string]$Msg; Throttled = $false }
84+
}
85+
}
86+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
function Start-AuditLogIngestionV2 {
2+
<#
3+
.SYNOPSIS
4+
V2 audit-log ingestion timer. Drives both download and processing, decoupled so that pending
5+
logs are processed even when there is nothing new to download.
6+
.DESCRIPTION
7+
Runs offset 15 minutes from the creation timer and fans out two kinds of work:
8+
9+
1. Download tenants - AuditLogCoverage rows in state 'Created' (a search was created and is
10+
awaiting download) and due (not in backoff). Each gets a per-tenant orchestrator:
11+
Batch = AuditLogDownloadV2 (download succeeded searches -> CacheWebhooks)
12+
PostExecution = AuditLogProcessV2 (enqueue processing if any cache rows are pending)
13+
14+
2. Process-only tenants - tenants that have rows sitting in CacheWebhooks (downloaded but
15+
not yet processed, e.g. left behind by a worker crash mid-processing) but no pending
16+
download. These get a processing orchestrator fanned out DIRECTLY, skipping the no-op
17+
download orchestration.
18+
19+
This makes processing self-healing: a crashed/partial processing run is retried on the next
20+
cycle off the cache contents, not gated behind a fresh download.
21+
.FUNCTIONALITY
22+
Entrypoint
23+
#>
24+
[CmdletBinding(SupportsShouldProcess = $true)]
25+
param()
26+
try {
27+
$Ledger = Get-CippTable -TableName 'AuditLogCoverage'
28+
$Now = (Get-Date).ToUniversalTime()
29+
30+
# --- Download tenants: searches awaiting download (State = Created, due) ---
31+
$Created = @(Get-CIPPAzDataTableEntity @Ledger -Filter "State eq 'Created'")
32+
$DueCreated = $Created | Where-Object { -not $_.NextAttemptUtc -or ([datetimeoffset]$_.NextAttemptUtc).UtcDateTime -le $Now }
33+
$DownloadTenants = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
34+
foreach ($Name in @($DueCreated | Group-Object PartitionKey | ForEach-Object { $_.Name })) {
35+
if ($Name) { [void]$DownloadTenants.Add([string]$Name) }
36+
}
37+
38+
# --- Process-only tenants: rows pending in the webhook cache (downloaded, not yet processed) ---
39+
$CacheTable = Get-CippTable -TableName 'CacheWebhooks'
40+
$CacheRows = @(Get-CIPPAzDataTableEntity @CacheTable -Property @('PartitionKey', 'RowKey'))
41+
$CacheTenants = [System.Collections.Generic.HashSet[string]]::new([System.StringComparer]::OrdinalIgnoreCase)
42+
foreach ($Name in @($CacheRows | Group-Object PartitionKey | ForEach-Object { $_.Name })) {
43+
if ($Name) { [void]$CacheTenants.Add([string]$Name) }
44+
}
45+
46+
if ($DownloadTenants.Count -eq 0 -and $CacheTenants.Count -eq 0) {
47+
Write-Information 'AuditLogV2: nothing to download or process'
48+
return
49+
}
50+
51+
# 1) Download tenants -> download + post-exec processing in one orchestration.
52+
foreach ($TenantFilter in $DownloadTenants) {
53+
if ($PSCmdlet.ShouldProcess($TenantFilter, 'Download + process audit logs')) {
54+
Start-CIPPOrchestrator -InputObject ([PSCustomObject]@{
55+
OrchestratorName = "AuditLogIngestV2-$TenantFilter"
56+
Batch = @([PSCustomObject]@{ FunctionName = 'AuditLogDownloadV2'; TenantFilter = $TenantFilter })
57+
PostExecution = @{ FunctionName = 'AuditLogProcessV2'; Parameters = @{ TenantFilter = $TenantFilter } }
58+
SkipLog = $true
59+
})
60+
}
61+
}
62+
63+
# 2) Process-only tenants (pending cache, no pending download) -> process directly.
64+
$ProcessOnlyCount = 0
65+
foreach ($TenantFilter in $CacheTenants) {
66+
if ($DownloadTenants.Contains($TenantFilter)) { continue }
67+
$ProcessOnlyCount++
68+
if ($PSCmdlet.ShouldProcess($TenantFilter, 'Process pending audit logs')) {
69+
Start-CIPPOrchestrator -InputObject ([PSCustomObject]@{
70+
OrchestratorName = "AuditLogProcessV2-$TenantFilter"
71+
QueueFunction = [PSCustomObject]@{ FunctionName = 'AuditLogProcessingBatchV2'; Parameters = @{ TenantFilter = $TenantFilter } }
72+
SkipLog = $true
73+
})
74+
}
75+
}
76+
77+
Write-Information "AuditLogV2: ingestion fan-out - $($DownloadTenants.Count) download tenant(s), $ProcessOnlyCount process-only tenant(s)"
78+
} catch {
79+
Write-LogMessage -API 'AuditLogV2' -message 'Error in audit log ingestion orchestrator (V2)' -sev Error -LogData (Get-CippException -Exception $_)
80+
Write-Information ('AuditLogV2 ingestion error {0} line {1} - {2}' -f $_.InvocationInfo.ScriptName, $_.InvocationInfo.ScriptLineNumber, $_.Exception.Message)
81+
}
82+
}

0 commit comments

Comments
 (0)