Skip to content

Commit 34cf61f

Browse files
authored
Merge pull request #1693 from pelias/fix-layer-and-source-combination
Handle sources and layers param with address filter
2 parents 52a6928 + 404d3de commit 34cf61f

2 files changed

Lines changed: 79 additions & 47 deletions

File tree

sanitizer/_address_layer_filter.js

Lines changed: 59 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,51 @@ const nonEmptyString = (v) => _.isString(v) && !_.isEmpty(v);
2525

2626
const ADDRESS_FILTER_WARNING = 'performance optimization: excluding \'address\' layer';
2727

28+
function can_remove_addresses(clean) {
29+
// default to using the full 'clean.text'
30+
// note: this should already have superfluous characters removed
31+
let input = clean.text;
32+
33+
// if a parser has removed tokens, use the parsed text instead, this
34+
// is the text which will be queried against the 'name.default' field.
35+
// @todo: this logic is duplicated from 'query/text_parser.js' and may
36+
// be subject to change.
37+
if (_.isObject(clean.parsed_text) && !_.isEmpty(clean.parsed_text)) {
38+
39+
var isStreetAddress = clean.parsed_text.hasOwnProperty('housenumber') && clean.parsed_text.hasOwnProperty('street');
40+
41+
// use $subject where available (pelias parser)
42+
if (_.has(clean, 'parsed_text.subject')) {
43+
input = clean.parsed_text.subject;
44+
}
45+
46+
// if 'pelias_parser' or 'libpostal' identified input as a street address
47+
else if (isStreetAddress) {
48+
input = clean.parsed_text.housenumber + ' ' + clean.parsed_text.street;
49+
}
50+
51+
// else if the 'naive parser' was used, input is equal to 'name'
52+
else if (nonEmptyString(clean.parsed_text.admin_parts) && nonEmptyString(clean.parsed_text.name)) {
53+
input = clean.parsed_text.name;
54+
}
55+
}
56+
57+
// count the number of words specified
58+
let totalWords = input.split(/\s+/).filter(nonEmptyString).length;
59+
60+
// check that at least one numeral was specified
61+
let hasNumeral = /\d/.test(input);
62+
63+
// do not consider numeric street names, such as '26 st' in numeric check.
64+
if( _.has(clean, 'parsed_text.street') ){
65+
hasNumeral = /\d/.test(input.replace(clean.parsed_text.street, ''));
66+
}
67+
68+
// if less than two words were specified /or no numeral is present
69+
// then it is safe to apply the layer filter
70+
return totalWords < 2 || !hasNumeral;
71+
}
72+
2873
function _setup(tm) {
2974

3075
return {
@@ -38,54 +83,12 @@ function _setup(tm) {
3883
return messages;
3984
}
4085

41-
// default to using the full 'clean.text'
42-
// note: this should already have superfluous characters removed
43-
let input = clean.text;
44-
4586
// do nothing if no input text specified in the request
46-
if (!nonEmptyString(input)) {
87+
if (!nonEmptyString(clean.text)) {
4788
return messages;
4889
}
4990

50-
// if a parser has removed tokens, use the parsed text instead, this
51-
// is the text which will be queried against the 'name.default' field.
52-
// @todo: this logic is duplicated from 'query/text_parser.js' and may
53-
// be subject to change.
54-
if (_.isObject(clean.parsed_text) && !_.isEmpty(clean.parsed_text)) {
55-
56-
var isStreetAddress = clean.parsed_text.hasOwnProperty('housenumber') && clean.parsed_text.hasOwnProperty('street');
57-
58-
// use $subject where available (pelias parser)
59-
if (_.has(clean, 'parsed_text.subject')) {
60-
input = clean.parsed_text.subject;
61-
}
62-
63-
// if 'pelias_parser' or 'libpostal' identified input as a street address
64-
else if (isStreetAddress) {
65-
input = clean.parsed_text.housenumber + ' ' + clean.parsed_text.street;
66-
}
67-
68-
// else if the 'naive parser' was used, input is equal to 'name'
69-
else if (nonEmptyString(clean.parsed_text.admin_parts) && nonEmptyString(clean.parsed_text.name)) {
70-
input = clean.parsed_text.name;
71-
}
72-
}
73-
74-
// count the number of words specified
75-
let totalWords = input.split(/\s+/).filter(nonEmptyString).length;
76-
77-
// check that at least one numeral was specified
78-
let hasNumeral = /\d/.test(input);
79-
80-
// do not consider numeric street names, such as '26 st' in numeric check.
81-
if( _.has(clean, 'parsed_text.street') ){
82-
hasNumeral = /\d/.test(input.replace(clean.parsed_text.street, ''));
83-
}
84-
85-
// if less than two words were specified /or no numeral is present
86-
// then it is safe to apply the layer filter
87-
if (totalWords < 2 || !hasNumeral) {
88-
91+
if (can_remove_addresses(clean)) {
8992
// handle the common case where neither sources nor (positive) layers were specified
9093
if (!_.isArray(clean.sources) || _.isEmpty(clean.sources)) {
9194
// if there are no layers already set, start with the list of all of them
@@ -100,7 +103,6 @@ function _setup(tm) {
100103

101104
// handle the case where 'sources' were explicitly specified
102105
else if (_.isArray(clean.sources)) {
103-
104106
// we need to create a list of layers for the specified sources
105107
let sourceLayers = clean.sources.reduce((l, key) => l.concat(tm.layers_by_source[key] || []), []);
106108
sourceLayers = _.uniq(sourceLayers); // dedupe
@@ -111,8 +113,18 @@ function _setup(tm) {
111113
return messages;
112114
}
113115

114-
// target all layers for the sources specified except 'address'
115-
clean.layers = sourceLayers.filter(item => item !== 'address'); // exclude 'address'
116+
// create a list of all "possible layers": layers from the specified sources, minus address layer
117+
const possibleLayers = sourceLayers.filter(item => item !== 'address');
118+
119+
// intersect the possible layers with any already specified layer preferences
120+
if (_.isArray(clean.layers) && clean.layers.length > 1) {
121+
// layers already exist, intersect
122+
clean.layers = _.intersection(clean.layers, possibleLayers);
123+
} else {
124+
// no layers already, use all possible layers
125+
clean.layers = possibleLayers;
126+
}
127+
116128
messages.warnings.push(ADDRESS_FILTER_WARNING);
117129
}
118130
}

test/unit/sanitizer/_address_layer_filter.js

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,26 @@ module.exports.tests.sanitize = function (test, common) {
117117
t.end();
118118
});
119119

120+
test('sanitize - exclude addresses when negative layers and sources are specified', (t) => {
121+
// select all layers except venue to simulate value of clean.layers from targets sanitizer
122+
const clean_layers = real_type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();
123+
124+
let clean = { text: 'foo',
125+
layers: clean_layers,
126+
negative_layers: ['venue'],
127+
positive_layers: [],
128+
sources: ['openstreetmap', 'openaddresses','whosonfirst'],
129+
negative_sources: ['geonames'],
130+
positive_sources: []
131+
};
132+
133+
const expected_layers = clean_layers.filter(layer => layer !== 'address').sort();
134+
135+
t.deepEqual(real_sanitizer.sanitize(null, clean), STD_MESSAGES);
136+
t.deepEqual(clean.layers.sort(), expected_layers, 'layer list is reduced to exclude addresses');
137+
t.end();
138+
});
139+
120140
test('sanitize - exclude addresses when negative layers other than address are specified', (t) => {
121141
// select all layers except venue to simulate value of clean.layers from targets sanitizer
122142
const clean_layers = real_type_mapping.getCanonicalLayers().filter(layer => layer !== 'venue').sort();

0 commit comments

Comments
 (0)