Skip to content

Commit 35a84af

Browse files
authored
Fix too many bins protection for histogram distribution back end calls (#1745)
* get jest setup working again * override transformIgnorePatterns to also ignore d3 * add test for too many bins failure and fix * renamed and reused max bins constant
1 parent 63b5248 commit 35a84af

6 files changed

Lines changed: 458 additions & 36 deletions

File tree

packages/libs/eda/config-overrides.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,25 @@ module.exports = function override(config, env) {
5858
},
5959
};
6060
};
61+
62+
/*
63+
* Jest (test runner) config overrides, applied by react-app-rewired.
64+
*
65+
* Several modules in this package transitively import the d3 family of
66+
* packages (e.g. d3, d3-scale), which ship as untransformed ES modules.
67+
* CRA's default `transformIgnorePatterns` skips everything in node_modules,
68+
* so Jest chokes on the `export` syntax with "Unexpected token 'export'".
69+
*
70+
* react-app-rewired *concatenates* array overrides from package.json onto
71+
* CRA's defaults, so we can't relax the pattern there - the broad default
72+
* still matches. Instead we replace `transformIgnorePatterns` outright here,
73+
* using a negative lookahead so the d3 packages (and their deps) ARE
74+
* transformed while everything else in node_modules is still skipped.
75+
*/
76+
module.exports.jest = function (config) {
77+
config.transformIgnorePatterns = [
78+
'[/\\\\]node_modules[/\\\\](?!(?:d3|d3-[\\w-]+|internmap|delaunator|robust-predicates)[/\\\\]).+\\.(js|jsx|mjs|cjs|ts|tsx)$',
79+
'^.+\\.module\\.(css|sass|scss)$',
80+
];
81+
return config;
82+
};

packages/libs/eda/package.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"react-cool-dimensions": "^2.0.7",
2020
"react-draggable": "^4.4.3",
2121
"react-resizable": "^3.0.4",
22+
"react-test-renderer": "^18.0.0",
2223
"uuid": "^8.3.2"
2324
},
2425
"peerDependencies": {
@@ -65,7 +66,7 @@
6566
"@tanstack/react-query-devtools": "^4.35.3",
6667
"@testing-library/jest-dom": "^5.16.5",
6768
"@testing-library/react": "^11.1.0",
68-
"@testing-library/react-hooks": "^5.0.3",
69+
"@testing-library/react-hooks": "^8.0.1",
6970
"@testing-library/user-event": "^12.1.10",
7071
"@types/date-arithmetic": "^4.1.1",
7172
"@types/debounce-promise": "^3.1.3",
@@ -79,6 +80,7 @@
7980
"@types/react-resizable": "^1.7.2",
8081
"@types/react-router-dom": "^5.3.3",
8182
"@types/react-table": "^7.7.12",
83+
"@types/react-test-renderer": "^18",
8284
"@types/uuid": "^8.3.4",
8385
"@veupathdb/browserslist-config": "workspace:^",
8486
"@veupathdb/eslint-config": "workspace:^",
@@ -90,6 +92,7 @@
9092
"buffer": "^6.0.3",
9193
"http-proxy-middleware": "^2.0.6",
9294
"ify-loader": "^1.1.0",
95+
"jest-watch-typeahead": "^3.0.1",
9396
"notistack": "^3.0.1",
9497
"path-browserify": "^1.0.1",
9598
"postcss-normalize": "^10.0.1",
@@ -109,5 +112,11 @@
109112
"stream-browserify": "^3.0.0",
110113
"typescript": "^4.3.4",
111114
"web-vitals": "^0.2.4"
115+
},
116+
"jest": {
117+
"testPathIgnorePatterns": [
118+
"/node_modules/",
119+
"useAnalysis\\.test\\.tsx"
120+
]
112121
}
113122
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/**
2+
* The maximum number of bins the backend will produce for a single
3+
* `distribution` request. Exceeding it yields a bad-request response:
4+
* "Variable ... Maximum number of allowed bins (2000) exceeded."
5+
*
6+
* Front-end code that computes histogram ranges/bin widths must keep
7+
* `(rangeMax - rangeMin) / binWidth` at or below this limit.
8+
*/
9+
export const MAX_DISTRIBUTION_BINS = 2000;
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { numberDateDefaultAxisRange } from './default-axis-range';
2+
import { NumberVariable } from '../types/study';
3+
import { NumberRange } from '../types/general';
4+
import { MAX_DISTRIBUTION_BINS } from '../constants';
5+
6+
function makeIntegerVariable(
7+
distributionDefaults: NumberVariable['distributionDefaults']
8+
): NumberVariable {
9+
return {
10+
id: 'VAR_TEST',
11+
providerLabel: 'test',
12+
displayName: 'Test integer variable',
13+
hideFrom: [],
14+
type: 'integer',
15+
units: '',
16+
dataShape: 'continuous',
17+
distinctValuesCount: 5,
18+
isTemporal: false,
19+
isFeatured: false,
20+
isMergeKey: false,
21+
isMultiValued: false,
22+
distributionDefaults,
23+
};
24+
}
25+
26+
describe('numberDateDefaultAxisRange - max bins protection', () => {
27+
// Real-life bug: a year-like integer variable with values 2000..2004,
28+
// continuous, binWidth 1. Because rangeMin (2000) !== rangeMax (2004),
29+
// the histogram's lower bound was set to 0, producing 2004 bins for a
30+
// displayRangeMin..displayRangeMax of 0..2004, exceeding the backend's
31+
// 2000-bin limit. https://github.com/VEuPathDB/web-monorepo (year integers)
32+
it('does not produce more than the max allowed bins for a large-valued integer variable', () => {
33+
const binWidth = 1;
34+
const variable = makeIntegerVariable({
35+
rangeMin: 2000,
36+
rangeMax: 2004,
37+
binWidth,
38+
});
39+
40+
const range = numberDateDefaultAxisRange(
41+
variable,
42+
undefined,
43+
undefined,
44+
undefined
45+
) as NumberRange;
46+
47+
const binCount = (range.max - range.min) / binWidth;
48+
expect(binCount).toBeLessThanOrEqual(MAX_DISTRIBUTION_BINS);
49+
50+
// and the range must still cover the data
51+
expect(range.min).toBeLessThanOrEqual(2000);
52+
expect(range.max).toBeGreaterThanOrEqual(2004);
53+
});
54+
55+
it('still starts ordinary positive integer variables at zero', () => {
56+
const variable = makeIntegerVariable({
57+
rangeMin: 5,
58+
rangeMax: 100,
59+
binWidth: 1,
60+
});
61+
62+
const range = numberDateDefaultAxisRange(
63+
variable,
64+
undefined,
65+
undefined,
66+
undefined
67+
) as NumberRange;
68+
69+
expect(range.min).toBe(0);
70+
expect(range.max).toBe(100);
71+
});
72+
});

packages/libs/eda/src/lib/core/utils/default-axis-range.ts

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import { NumberDistributionDefaults, Variable } from '../types/study';
22
import { NumberOrDateRange } from '@veupathdb/components/lib/types/general';
33
// type of computedVariableMetadata for computation apps such as alphadiv and abundance
44
import { VariableMapping } from '../api/DataClient/types';
5+
import { MAX_DISTRIBUTION_BINS } from '../constants';
56
import { min, max } from 'lodash';
67

78
export function numberDateDefaultAxisRange(
@@ -138,19 +139,19 @@ export function numberDateDefaultAxisRange(
138139
} else return undefined;
139140
}
140141

141-
const MAX_EDGE_CASE_BINS = 2000;
142-
143142
/**
144143
* The basic functionality is to return `min([ displayRangeMin ?? 0, rangeMin, observedMin ])`
145144
* The zero is targeted at histograms which should start at zero even if rangeMin > 0.
146145
*
147-
* However, there is an edge case when rangeMin === rangeMax. (When the default binWidth defaults to 1.)
146+
* However, there is an edge case for variables whose values are clustered far from zero
147+
* (e.g. a year-like integer with values 2000..2004 and a default `binWidth` of 1).
148148
* If we are making a `distribution` histogram request in this scenario, we do NOT want
149-
* to request `{ min: 0, max: rangeMax, binWidth: 1 }`
150-
* because this will cause an OOM error on the backend if `rangeMax` is large.
149+
* to anchor the lower bound at zero and request `{ min: 0, max: rangeMax, binWidth: 1 }`
150+
* because the bin count (`rangeMax / binWidth`) would exceed the backend's limit and
151+
* produce a "Maximum number of allowed bins exceeded" error.
151152
*
152-
* The solution is to check for the edge case and return
153-
* `rangeMin - binWidth` for the lower bound if the number of bins would be excessive.
153+
* The solution is to detect when anchoring at zero would create an excessive number of
154+
* bins and instead use `rangeMin - binWidth` for the lower bound.
154155
*/
155156
function getSafeLowerBound(
156157
{ displayRangeMin, rangeMin, rangeMax, binWidth }: NumberDistributionDefaults,
@@ -162,9 +163,9 @@ function getSafeLowerBound(
162163
if (displayRangeMin != null) {
163164
lowerBound = displayRangeMin;
164165
} else if (
165-
// check for the edge-case
166-
rangeMin === rangeMax &&
167-
rangeMax / (binWidth ?? 1) > MAX_EDGE_CASE_BINS
166+
// would anchoring the lower bound at zero create too many bins?
167+
rangeMax / (binWidth ?? 1) >
168+
MAX_DISTRIBUTION_BINS
168169
) {
169170
lowerBound = rangeMin - (binWidth ?? 1);
170171
}

0 commit comments

Comments
 (0)