|
| 1 | +/** |
| 2 | + * Description: This script fixes missing custom user properties in drill_meta |
| 3 | + * or adds new values to existing biglist properties. |
| 4 | + * It scans the app_users collection for the specified custom properties and |
| 5 | + * ensures they are present in drill_meta with correct types and values. |
| 6 | + * Path: $(countly dir)/bin/scripts/fix-data |
| 7 | + * Command: node fix_missing_custom_user_props.js |
| 8 | + */ |
| 9 | + |
| 10 | +const pluginManager = require('../../../plugins/pluginManager.js'); |
| 11 | + |
| 12 | +const APP_ID = ""; // required: set the app ID to process |
| 13 | +const PROPS = []; // required: specify custom property names to fix, e.g. ["myProp1", "myProp2"] |
| 14 | +const START = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); // default: 30 days ago |
| 15 | +const END = new Date(); // default: now |
| 16 | +const dry_run = true; |
| 17 | + |
| 18 | +let LIST_LIMIT = 100; |
| 19 | +let BIG_LIST_LIMIT = 1000; |
| 20 | + |
| 21 | +if (!APP_ID) { |
| 22 | + console.error("Error: APP_ID is required. Please set it in the script."); |
| 23 | + process.exit(1); |
| 24 | +} |
| 25 | + |
| 26 | +if (!PROPS.length) { |
| 27 | + console.error("Error: PROPS is required. Please specify at least one property name to fix."); |
| 28 | + process.exit(1); |
| 29 | +} |
| 30 | + |
| 31 | +Promise.all([ |
| 32 | + pluginManager.dbConnection("countly"), |
| 33 | + pluginManager.dbConnection("countly_drill") |
| 34 | +]).then(async function([countlyDb, drillDb]) { |
| 35 | + console.log("Connected to databases..."); |
| 36 | + console.log("Date range: " + START.toISOString() + " - " + END.toISOString()); |
| 37 | + console.log("Properties to fix: " + PROPS.join(", ")); |
| 38 | + |
| 39 | + var lacStart = Math.round(START.getTime() / 1000); |
| 40 | + var lacEnd = Math.round(END.getTime() / 1000); |
| 41 | + var lacMatch = {$gt: lacStart, $lt: lacEnd}; |
| 42 | + var collection = "app_users" + APP_ID; |
| 43 | + |
| 44 | + try { |
| 45 | + // Load drill config limits from the database if available |
| 46 | + var pluginsDoc = await countlyDb.collection("plugins").findOne({_id: "plugins"}); |
| 47 | + if (pluginsDoc && pluginsDoc.drill) { |
| 48 | + if (pluginsDoc.drill.list_limit !== undefined) { |
| 49 | + LIST_LIMIT = parseInt(pluginsDoc.drill.list_limit, 10) || LIST_LIMIT; |
| 50 | + } |
| 51 | + if (pluginsDoc.drill.big_list_limit !== undefined) { |
| 52 | + BIG_LIST_LIMIT = parseInt(pluginsDoc.drill.big_list_limit, 10) || BIG_LIST_LIMIT; |
| 53 | + } |
| 54 | + } |
| 55 | + console.log("Limits: list_limit=" + LIST_LIMIT + ", big_list_limit=" + BIG_LIST_LIMIT); |
| 56 | + |
| 57 | + // Get existing drill_meta for user properties |
| 58 | + var metaDoc = await drillDb |
| 59 | + .collection("drill_meta") |
| 60 | + .findOne({_id: APP_ID + "_meta_up"}); |
| 61 | + var existingCustom = (metaDoc && metaDoc.custom) ? metaDoc.custom : {}; |
| 62 | + console.log("Existing custom keys in drill_meta: " + Object.keys(existingCustom).length); |
| 63 | + |
| 64 | + // Load existing biglist documents for the requested props |
| 65 | + var existingBigLists = {}; |
| 66 | + for (var p = 0; p < PROPS.length; p++) { |
| 67 | + var bigListId = APP_ID + "_meta_up_custom." + encodeKey(PROPS[p]); |
| 68 | + var bigListDoc = await drillDb.collection("drill_meta").findOne({_id: bigListId}); |
| 69 | + if (bigListDoc && bigListDoc.values) { |
| 70 | + existingBigLists[PROPS[p]] = bigListDoc.values; |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + // Process each property |
| 75 | + var mainDocUpdate = {}; |
| 76 | + var bigListUpdates = []; |
| 77 | + var bigListCreates = []; |
| 78 | + |
| 79 | + for (var pi = 0; pi < PROPS.length; pi++) { |
| 80 | + var prop = PROPS[pi]; |
| 81 | + var fieldPath = "custom." + prop; |
| 82 | + var existingMeta = existingCustom[prop]; |
| 83 | + |
| 84 | + console.log("[" + (pi + 1) + "/" + PROPS.length + "] Processing \"" + prop + "\"..."); |
| 85 | + |
| 86 | + // If property already exists and is not a list type, skip entirely |
| 87 | + if (existingMeta && existingMeta.type !== "l" && existingMeta.type !== "a" && existingMeta.type !== "bl") { |
| 88 | + console.log(" [SKIP] Already exists with type: " + existingMeta.type + " (not a list type, nothing to update)"); |
| 89 | + continue; |
| 90 | + } |
| 91 | + |
| 92 | + if (!existingMeta) { |
| 93 | + // New property — determine type from a small sample first |
| 94 | + var samples = await countlyDb.collection(collection).aggregate([ |
| 95 | + {$match: {lac: lacMatch, [fieldPath]: {$exists: true}}}, |
| 96 | + {$project: {_id: 0, val: "$" + fieldPath}}, |
| 97 | + {$limit: 100} |
| 98 | + ]).toArray(); |
| 99 | + |
| 100 | + if (samples.length === 0) { |
| 101 | + console.log(" No users with this property in the given date range."); |
| 102 | + continue; |
| 103 | + } |
| 104 | + |
| 105 | + var isArray = samples.some(function(s) { |
| 106 | + return Array.isArray(s.val); |
| 107 | + }); |
| 108 | + var sampleValues = []; |
| 109 | + for (var si = 0; si < samples.length; si++) { |
| 110 | + if (Array.isArray(samples[si].val)) { |
| 111 | + for (var ai = 0; ai < samples[si].val.length; ai++) { |
| 112 | + sampleValues.push(samples[si].val[ai] + ""); |
| 113 | + } |
| 114 | + } |
| 115 | + else { |
| 116 | + sampleValues.push(samples[si].val + ""); |
| 117 | + } |
| 118 | + } |
| 119 | + |
| 120 | + var type = determineType(sampleValues, isArray); |
| 121 | + |
| 122 | + if (type === "n" || type === "d") { |
| 123 | + console.log(" [NEW] type: " + type + " (determined from " + samples.length + " samples, no values to collect)"); |
| 124 | + mainDocUpdate["custom." + prop + ".type"] = type; |
| 125 | + continue; |
| 126 | + } |
| 127 | + |
| 128 | + // List/array type — need full distinct values aggregation |
| 129 | + console.log(" [NEW] type: " + type + ", collecting distinct values..."); |
| 130 | + var distinctValues = await aggregateDistinctValues(countlyDb, collection, fieldPath, lacMatch, isArray); |
| 131 | + var totalDistinct = Object.keys(distinctValues).length; |
| 132 | + console.log(" Found " + totalDistinct + " distinct value(s)"); |
| 133 | + |
| 134 | + // Apply limits |
| 135 | + if (totalDistinct > BIG_LIST_LIMIT) { |
| 136 | + // Too many values — convert to string type, no values tracked |
| 137 | + console.log(" [WARNING] " + totalDistinct + " values exceeds big_list_limit (" + BIG_LIST_LIMIT + "). Setting type to 's' (string). Values will not be tracked."); |
| 138 | + mainDocUpdate["custom." + prop + ".type"] = "s"; |
| 139 | + continue; |
| 140 | + } |
| 141 | + |
| 142 | + if (totalDistinct > LIST_LIMIT) { |
| 143 | + // Between list_limit and big_list_limit — use bl type |
| 144 | + console.log(" [INFO] " + totalDistinct + " values exceeds list_limit (" + LIST_LIMIT + "). Setting type to 'bl' (big list) instead of '" + type + "'."); |
| 145 | + type = "bl"; |
| 146 | + } |
| 147 | + |
| 148 | + mainDocUpdate["custom." + prop + ".type"] = type; |
| 149 | + |
| 150 | + var createValues = {}; |
| 151 | + for (var cv in distinctValues) { |
| 152 | + var enc = encodeKey(cv).trim(); |
| 153 | + if (enc !== "") { |
| 154 | + createValues[enc] = true; |
| 155 | + } |
| 156 | + } |
| 157 | + bigListCreates.push({ |
| 158 | + _id: APP_ID + "_meta_up_custom." + encodeKey(prop), |
| 159 | + app_id: APP_ID, |
| 160 | + type: "up", |
| 161 | + e: "custom", |
| 162 | + biglist: true, |
| 163 | + values: createValues |
| 164 | + }); |
| 165 | + } |
| 166 | + else { |
| 167 | + // Existing list type — collect distinct values and find new ones |
| 168 | + console.log(" Existing type: " + existingMeta.type + ", collecting distinct values..."); |
| 169 | + |
| 170 | + var isArrayExisting = existingMeta.type === "a"; |
| 171 | + var distinctValuesExisting = await aggregateDistinctValues(countlyDb, collection, fieldPath, lacMatch, isArrayExisting); |
| 172 | + var existingValues = existingBigLists[prop] || {}; |
| 173 | + var existingCount = Object.keys(existingValues).length; |
| 174 | + |
| 175 | + var newValues = {}; |
| 176 | + for (var dv in distinctValuesExisting) { |
| 177 | + var enc2 = encodeKey(dv).trim(); |
| 178 | + if (enc2 !== "" && !existingValues[enc2]) { |
| 179 | + newValues[enc2] = true; |
| 180 | + } |
| 181 | + } |
| 182 | + |
| 183 | + var newCount = Object.keys(newValues).length; |
| 184 | + if (newCount === 0) { |
| 185 | + console.log(" [SKIP] No new values (all " + Object.keys(distinctValuesExisting).length + " already in biglist)"); |
| 186 | + continue; |
| 187 | + } |
| 188 | + |
| 189 | + var totalAfterUpdate = existingCount + newCount; |
| 190 | + |
| 191 | + // Check if adding new values would exceed big_list_limit |
| 192 | + if (totalAfterUpdate > BIG_LIST_LIMIT) { |
| 193 | + console.log(" [WARNING] Adding " + newCount + " new values would bring total to " + totalAfterUpdate + ", exceeding big_list_limit (" + BIG_LIST_LIMIT + "). Converting type to 's' (string). Values will no longer be tracked."); |
| 194 | + mainDocUpdate["custom." + prop + ".type"] = "s"; |
| 195 | + // Don't add values — the biglist doc will become stale but that's consistent |
| 196 | + // with how drill.js handles this (checkListsInMeta deletes it later) |
| 197 | + continue; |
| 198 | + } |
| 199 | + |
| 200 | + // Check if adding new values would cross the list_limit threshold (l -> bl) |
| 201 | + if (existingMeta.type === "l" && totalAfterUpdate > LIST_LIMIT) { |
| 202 | + console.log(" [INFO] Adding " + newCount + " new values would bring total to " + totalAfterUpdate + ", exceeding list_limit (" + LIST_LIMIT + "). Upgrading type from 'l' to 'bl' (big list)."); |
| 203 | + mainDocUpdate["custom." + prop + ".type"] = "bl"; |
| 204 | + } |
| 205 | + |
| 206 | + console.log(" [UPDATE] " + newCount + " new value(s) to add (existing: " + existingCount + ", after: " + totalAfterUpdate + ")"); |
| 207 | + bigListUpdates.push({ |
| 208 | + id: APP_ID + "_meta_up_custom." + encodeKey(prop), |
| 209 | + propKey: prop, |
| 210 | + newValues: newValues |
| 211 | + }); |
| 212 | + } |
| 213 | + } |
| 214 | + |
| 215 | + // Check if there's anything to do |
| 216 | + if (Object.keys(mainDocUpdate).length === 0 && bigListCreates.length === 0 && bigListUpdates.length === 0) { |
| 217 | + console.log("\nNothing to update."); |
| 218 | + return; |
| 219 | + } |
| 220 | + |
| 221 | + if (dry_run) { |
| 222 | + if (Object.keys(mainDocUpdate).length > 0) { |
| 223 | + console.log("\nDRY RUN: Would update main meta document (" + APP_ID + "_meta_up) with:"); |
| 224 | + console.log(JSON.stringify(mainDocUpdate, null, 2)); |
| 225 | + } |
| 226 | + if (bigListCreates.length > 0) { |
| 227 | + console.log("DRY RUN: Would create " + bigListCreates.length + " biglist document(s):"); |
| 228 | + for (var b = 0; b < bigListCreates.length; b++) { |
| 229 | + console.log(" _id: " + bigListCreates[b]._id + " (" + Object.keys(bigListCreates[b].values).length + " values)"); |
| 230 | + } |
| 231 | + } |
| 232 | + if (bigListUpdates.length > 0) { |
| 233 | + console.log("DRY RUN: Would update " + bigListUpdates.length + " existing biglist document(s):"); |
| 234 | + for (var u = 0; u < bigListUpdates.length; u++) { |
| 235 | + console.log(" _id: " + bigListUpdates[u].id + " (+" + Object.keys(bigListUpdates[u].newValues).length + " new values)"); |
| 236 | + } |
| 237 | + } |
| 238 | + } |
| 239 | + else { |
| 240 | + // Update main meta_up document for new properties |
| 241 | + if (Object.keys(mainDocUpdate).length > 0) { |
| 242 | + await drillDb.collection("drill_meta").updateOne( |
| 243 | + {_id: APP_ID + "_meta_up"}, |
| 244 | + {$set: mainDocUpdate}, |
| 245 | + {upsert: true} |
| 246 | + ); |
| 247 | + console.log("Updated main meta document."); |
| 248 | + } |
| 249 | + |
| 250 | + var bulk = null; |
| 251 | + |
| 252 | + // Create new biglist documents |
| 253 | + for (var b2 = 0; b2 < bigListCreates.length; b2++) { |
| 254 | + if (!bulk) { |
| 255 | + bulk = drillDb.collection("drill_meta").initializeUnorderedBulkOp(); |
| 256 | + } |
| 257 | + var doc = bigListCreates[b2]; |
| 258 | + var docId = doc._id; |
| 259 | + delete doc._id; |
| 260 | + bulk.find({_id: docId}).upsert().updateOne({$set: doc}); |
| 261 | + } |
| 262 | + |
| 263 | + // Update existing biglist documents with new values |
| 264 | + for (var u2 = 0; u2 < bigListUpdates.length; u2++) { |
| 265 | + if (!bulk) { |
| 266 | + bulk = drillDb.collection("drill_meta").initializeUnorderedBulkOp(); |
| 267 | + } |
| 268 | + var setObj = {}; |
| 269 | + var nv = bigListUpdates[u2].newValues; |
| 270 | + for (var vk in nv) { |
| 271 | + setObj["values." + vk] = true; |
| 272 | + } |
| 273 | + bulk.find({_id: bigListUpdates[u2].id}).upsert().updateOne({$set: setObj}); |
| 274 | + } |
| 275 | + |
| 276 | + if (bulk) { |
| 277 | + await bulk.execute(); |
| 278 | + console.log("Executed " + (bigListCreates.length + bigListUpdates.length) + " biglist operation(s)."); |
| 279 | + } |
| 280 | + } |
| 281 | + } |
| 282 | + catch (err) { |
| 283 | + console.error("Error:", err); |
| 284 | + } |
| 285 | + finally { |
| 286 | + countlyDb.close(); |
| 287 | + drillDb.close(); |
| 288 | + console.log("\nDone."); |
| 289 | + } |
| 290 | +}); |
| 291 | + |
| 292 | +/** |
| 293 | + * Aggregate distinct values for a custom property using MongoDB aggregation |
| 294 | + * @param {Db} db - database connection |
| 295 | + * @param {string} collectionName - app_users collection name |
| 296 | + * @param {string} fieldPath - dot-notation field path (e.g. "custom.myProp") |
| 297 | + * @param {object} lacMatch - lac range filter ({$gt, $lt}) |
| 298 | + * @param {boolean} isArray - whether to unwind array values |
| 299 | + * @returns {object} map of distinct values {value: true, ...} |
| 300 | + */ |
| 301 | +async function aggregateDistinctValues(db, collectionName, fieldPath, lacMatch, isArray) { |
| 302 | + var pipeline = [ |
| 303 | + {$match: {lac: lacMatch, [fieldPath]: {$exists: true}}} |
| 304 | + ]; |
| 305 | + |
| 306 | + if (isArray) { |
| 307 | + pipeline.push({$unwind: {path: "$" + fieldPath, preserveNullAndEmptyArrays: false}}); |
| 308 | + } |
| 309 | + |
| 310 | + pipeline.push({$group: {_id: "$" + fieldPath}}); |
| 311 | + |
| 312 | + var results = await db.collection(collectionName).aggregate(pipeline, {allowDiskUse: true}).toArray(); |
| 313 | + var values = {}; |
| 314 | + for (var i = 0; i < results.length; i++) { |
| 315 | + if (results[i]._id !== null && results[i]._id !== undefined) { |
| 316 | + values[results[i]._id + ""] = true; |
| 317 | + } |
| 318 | + } |
| 319 | + return values; |
| 320 | +} |
| 321 | + |
| 322 | +/** |
| 323 | + * Determine the type of a property based on its values |
| 324 | + * Types: "n" (number), "d" (date), "l" (list/string), "a" (array) |
| 325 | + * @param {string[]} values - array of string-encoded values |
| 326 | + * @param {boolean} isArray - whether any user had this as an array value |
| 327 | + * @returns {string} type code |
| 328 | + */ |
| 329 | +function determineType(values, isArray) { |
| 330 | + if (isArray) { |
| 331 | + return "a"; |
| 332 | + } |
| 333 | + |
| 334 | + var isNumber = true; |
| 335 | + var isDate = true; |
| 336 | + |
| 337 | + for (var i = 0; i < values.length; i++) { |
| 338 | + if (!isNumeric(values[i]) || values[i].length > 16) { |
| 339 | + isNumber = false; |
| 340 | + isDate = false; |
| 341 | + break; |
| 342 | + } |
| 343 | + if (values[i].length !== 10 && values[i].length !== 13) { |
| 344 | + isDate = false; |
| 345 | + } |
| 346 | + } |
| 347 | + |
| 348 | + if (isNumber && isDate) { |
| 349 | + return "d"; |
| 350 | + } |
| 351 | + if (isNumber) { |
| 352 | + return "n"; |
| 353 | + } |
| 354 | + return "l"; |
| 355 | +} |
| 356 | + |
| 357 | +/** |
| 358 | + * Check if value is numeric |
| 359 | + * @param {*} val - value to check |
| 360 | + * @returns {boolean} true if numeric |
| 361 | + */ |
| 362 | +function isNumeric(val) { |
| 363 | + if (typeof val === "number") { |
| 364 | + return true; |
| 365 | + } |
| 366 | + if (typeof val === "string" && val.trim() !== "") { |
| 367 | + return !isNaN(Number(val)); |
| 368 | + } |
| 369 | + return false; |
| 370 | +} |
| 371 | + |
| 372 | +/** |
| 373 | + * Encode key for MongoDB storage (replace $ and . characters) |
| 374 | + * @param {string} key - key to encode |
| 375 | + * @returns {string} encoded key |
| 376 | + */ |
| 377 | +function encodeKey(key) { |
| 378 | + return (key + "").replace(/^\$/g, "$").replace(/\./g, '.'); |
| 379 | +} |
0 commit comments