|
| 1 | +// Copyright 2024 Google LLC |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +import {BatchSelection} from './batch_selection'; |
| 16 | +import {hexStringToRgb, rgbToHexString} from './color_setter'; |
| 17 | +import {Batch} from './entry'; |
| 18 | +import {GeometricMean} from './geometric_mean'; |
| 19 | +import {FieldMetricStats, SourceCount} from './metric'; |
| 20 | + |
| 21 | +function mergeBatchesWithSameCodec(batches: BatchSelection[]): BatchSelection| |
| 22 | + undefined { |
| 23 | + const mergedBatch = |
| 24 | + new Batch(batches[0].batch.url, batches[0].batch.folderPath); |
| 25 | + mergedBatch.index = batches[0].batch.index; // Used to access fields through |
| 26 | + // FieldMetric.fieldIndices. |
| 27 | + mergedBatch.codec = batches[0].batch.codec; |
| 28 | + mergedBatch.name = mergedBatch.codec; |
| 29 | + mergedBatch.version = batches[0].batch.version; |
| 30 | + for (const batch of batches) { |
| 31 | + if (batch.batch.version !== batches[0].batch.version) return undefined; |
| 32 | + } |
| 33 | + |
| 34 | + // Use the time field as a way of signaling the aggregation. |
| 35 | + mergedBatch.time = undefined; |
| 36 | + mergedBatch.timeStringShort = 'aggregate of'; |
| 37 | + for (const batch of batches) { |
| 38 | + mergedBatch.timeStringShort += ' ' + batch.batch.name; |
| 39 | + } |
| 40 | + mergedBatch.timeStringLong = mergedBatch.timeStringShort; |
| 41 | + |
| 42 | + // Shallow copy the fields and check their consistency accross the batches of |
| 43 | + // the same codec. |
| 44 | + mergedBatch.fields = batches[0].batch.fields; |
| 45 | + for (const batch of batches) { |
| 46 | + if (batch.batch.fields.length !== mergedBatch.fields.length) { |
| 47 | + console.log(batch.batch.name); |
| 48 | + return undefined; |
| 49 | + } |
| 50 | + for (let i = 0; i < mergedBatch.fields.length; i++) { |
| 51 | + if (batch.batch.fields[i].id !== mergedBatch.fields[i].id) { |
| 52 | + return undefined; |
| 53 | + } |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + // Average the colors. |
| 58 | + let [rSum, gSum, bSum] = [0, 0, 0]; |
| 59 | + for (const batch of batches) { |
| 60 | + const [r, g, b] = hexStringToRgb(batch.batch.color); |
| 61 | + rSum += r; |
| 62 | + gSum += g; |
| 63 | + bSum += b; |
| 64 | + } |
| 65 | + mergedBatch.color = rgbToHexString( |
| 66 | + rSum / batches.length, gSum / batches.length, bSum / batches.length); |
| 67 | + |
| 68 | + // Ignore all BatchSelection fields but the stats. |
| 69 | + const mergedBatchSelection = new BatchSelection(mergedBatch); |
| 70 | + for (const batch of batches) { |
| 71 | + if (batch.stats.length !== batches[0].stats.length) return undefined; |
| 72 | + } |
| 73 | + for (let stat = 0; stat < batches[0].stats.length; stat++) { |
| 74 | + const mergedStats = new FieldMetricStats(); |
| 75 | + mergedStats.minRatio = batches[0].stats[stat].minRatio; |
| 76 | + mergedStats.maxRatio = batches[0].stats[stat].maxRatio; |
| 77 | + mergedStats.arithmeticMean = 0; |
| 78 | + |
| 79 | + const geometricMean = new GeometricMean(); |
| 80 | + let weightSum = 0; |
| 81 | + for (const batch of batches) { |
| 82 | + // Weigh each batch by its match count. |
| 83 | + const weight = batch.matchedDataPoints.rows.length; |
| 84 | + // TODO: Check the validity of the aggregation methods. |
| 85 | + for (let p = 0; p < batch.matchedDataPoints.rows.length; ++p) { |
| 86 | + geometricMean.add(batch.stats[stat].geometricMean); |
| 87 | + } |
| 88 | + mergedStats.minRatio = |
| 89 | + Math.min(mergedStats.minRatio, batch.stats[stat].minRatio); |
| 90 | + mergedStats.maxRatio = |
| 91 | + Math.max(mergedStats.maxRatio, batch.stats[stat].maxRatio); |
| 92 | + mergedStats.arithmeticMean += batch.stats[stat].arithmeticMean * weight; |
| 93 | + weightSum += weight; |
| 94 | + } |
| 95 | + |
| 96 | + if (weightSum === 0) return undefined; |
| 97 | + mergedStats.geometricMean = geometricMean.get(); |
| 98 | + mergedStats.arithmeticMean /= weightSum; |
| 99 | + |
| 100 | + mergedBatchSelection.stats.push(mergedStats); |
| 101 | + } |
| 102 | + |
| 103 | + // No UI need for mergeHistograms() here. Skip it. |
| 104 | + |
| 105 | + return mergedBatchSelection; |
| 106 | +} |
| 107 | + |
| 108 | +/* Combine Batch stats based on same codec. |
| 109 | + * Returns partial data with shallow copies of members. |
| 110 | + * Returns an empty array in case of error or if there is nothing to merge. */ |
| 111 | +export function mergeBatches( |
| 112 | + batches: BatchSelection[], skipIndex: number): BatchSelection[] { |
| 113 | + const map = new Map<string, BatchSelection[]>(); |
| 114 | + for (const batch of batches) { |
| 115 | + const batchesWithSameCodec = map.get(batch.batch.codec); |
| 116 | + if (batchesWithSameCodec === undefined) { |
| 117 | + map.set(batch.batch.codec, [batch]); |
| 118 | + } else { |
| 119 | + batchesWithSameCodec.push(batch); |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + const skipCodec = (skipIndex >= 0 && skipIndex < batches.length) ? |
| 124 | + batches[skipIndex].batch.codec : |
| 125 | + undefined; |
| 126 | + let atLeastOneMerge = false; |
| 127 | + const mergedBatches: BatchSelection[] = []; |
| 128 | + for (const [codec, batches] of map) { |
| 129 | + if (codec === skipCodec) continue; |
| 130 | + |
| 131 | + if (batches.length === 1) { |
| 132 | + mergedBatches.push(batches[0]); |
| 133 | + } else { |
| 134 | + const mergedBatch = mergeBatchesWithSameCodec(batches); |
| 135 | + if (mergedBatch === undefined) { |
| 136 | + return []; |
| 137 | + } |
| 138 | + mergedBatches.push(mergedBatch); |
| 139 | + atLeastOneMerge = true; |
| 140 | + } |
| 141 | + } |
| 142 | + return atLeastOneMerge ? mergedBatches : []; |
| 143 | +} |
| 144 | + |
| 145 | +/** Aggregates histograms by sourceName. */ |
| 146 | +export function mergeHistograms(histograms: SourceCount[][]): SourceCount[] { |
| 147 | + const aggHisto = new Map<string, SourceCount>(); |
| 148 | + for (const histogram of histograms) { |
| 149 | + for (const sourceCount of histogram) { |
| 150 | + let aggSourceCount = aggHisto.get(sourceCount.sourceName); |
| 151 | + if (aggSourceCount === undefined) { |
| 152 | + aggSourceCount = new SourceCount(); |
| 153 | + aggSourceCount.sourceName = sourceCount.sourceName; |
| 154 | + aggSourceCount.sourcePath = sourceCount.sourcePath; |
| 155 | + aggSourceCount.previewPath = sourceCount.previewPath; |
| 156 | + aggSourceCount.count = sourceCount.count; |
| 157 | + aggHisto.set(aggSourceCount.sourceName, aggSourceCount); |
| 158 | + } else { |
| 159 | + // Keep the first set field/constant values and make sure they are |
| 160 | + // consistent across batches. |
| 161 | + if (aggSourceCount.sourcePath === undefined) { |
| 162 | + aggSourceCount.sourcePath = sourceCount.sourcePath; |
| 163 | + } else if ( |
| 164 | + sourceCount.sourcePath !== undefined && |
| 165 | + aggSourceCount.sourcePath !== sourceCount.sourcePath) { |
| 166 | + return []; // Should not happen. |
| 167 | + } |
| 168 | + if (aggSourceCount.previewPath === undefined) { |
| 169 | + aggSourceCount.previewPath = sourceCount.previewPath; |
| 170 | + } else if ( |
| 171 | + sourceCount.previewPath !== undefined && |
| 172 | + aggSourceCount.previewPath !== sourceCount.previewPath) { |
| 173 | + return []; // Should not happen. |
| 174 | + } |
| 175 | + |
| 176 | + aggSourceCount.count += sourceCount.count; |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + // Sort by decreasing occurrences. |
| 182 | + return Array.from(aggHisto.values()).sort((a, b) => b.count - a.count); |
| 183 | +} |
0 commit comments