diff --git a/src/data/override.ts b/src/data/override.ts index 0284982..7d7b79b 100644 --- a/src/data/override.ts +++ b/src/data/override.ts @@ -1,5 +1,9 @@ import type { NZGBSourceData, Ref } from '../types'; +export const ALLOW_INCONSISTENT_DIACRITICS = new Set([ + // +]); + export const OVERRIDES: Record> = { '7006;6879': { name: 'Whanganui' }, 47427: { name: 'Whanganui East' }, diff --git a/src/stage2-preprocess/fetchSouthPoleOsmFeatures.overpassql b/src/stage2-preprocess/fetchSouthPoleOsmFeatures.overpassql index af2a30d..4c523f0 100644 --- a/src/stage2-preprocess/fetchSouthPoleOsmFeatures.overpassql +++ b/src/stage2-preprocess/fetchSouthPoleOsmFeatures.overpassql @@ -1,7 +1,7 @@ // everything with a LINZ ref south of the -85th parallel [out:json][timeout:25]; ( - nwr["ref:linz:place_id"](-90,-180,-85,180); + nwr["ref:linz:place_id"](-90,-180,-75,180); // other queries are injected dynamically: /* DO NOT EDIT THIS COMMENT */ ); diff --git a/src/stage2-preprocess/maybeTeReoName.ts b/src/stage2-preprocess/maybeTeReoName.ts index 68cbdb3..4de7d5f 100644 --- a/src/stage2-preprocess/maybeTeReoName.ts +++ b/src/stage2-preprocess/maybeTeReoName.ts @@ -44,7 +44,7 @@ export function removeEnglishPrefixesAndSuffixes( const newName = name .replace(englishPrefixesRegExp, '') .replace(englishSuffixesRegExp, '') - .replace(/ Pa$/, ' Pā') + .replace(/\bPa\b/, 'Pā') .replace(/\bMaori\b/, 'Māori') .trim(); diff --git a/src/stage2-preprocess/preprocesOSM.ts b/src/stage2-preprocess/preprocesOSM.ts index 5906f05..0ea7f2f 100644 --- a/src/stage2-preprocess/preprocesOSM.ts +++ b/src/stage2-preprocess/preprocesOSM.ts @@ -89,7 +89,8 @@ function osmToJson( for (const topLevelTag of topLevelTags) { if (id) { - if (out[topLevelTag].withRef[id]) { + const existing = out[topLevelTag].withRef[id]; + if (existing && existing.osmId !== feature.osmId) { duplicates.add(id); } out[topLevelTag].withRef[id] = feature; diff --git a/src/stage3-conflate/compareFeatures/__tests__/compareFeatures.test.ts b/src/stage3-conflate/compareFeatures/__tests__/compareFeatures.test.ts index e80e376..b64f0f5 100644 --- a/src/stage3-conflate/compareFeatures/__tests__/compareFeatures.test.ts +++ b/src/stage3-conflate/compareFeatures/__tests__/compareFeatures.test.ts @@ -54,8 +54,7 @@ describe('compareFeatures', () => { ).toBeUndefined(); }); - // TODO: not sure why this test is failing - it.skip('does override name:mi if `name` is being changed', () => { + it('does override name:mi if `name` is being changed', () => { expect( conflateTags( { name: 'Ōtāhuhu Creek', nameMi: 'Ōtāhuhu' }, @@ -104,10 +103,25 @@ describe('compareFeatures', () => { ).toBeUndefined(); }); + it('allows the OSM feature to have more macrons if the NZGB has no official name', () => { + expect( + conflateTags( + { name: 'Ōtuwharekai', official: undefined }, + { name: 'Ōtūwharekai' }, + ), + ).toBeUndefined(); + }); + it('does not apply the above exception for features that have an official name', () => { expect( conflateTags({ name: 'Puhoi', official: true }, { name: 'Pūhoi' }), ).toStrictEqual({ name: 'Puhoi' }); + expect( + conflateTags( + { name: 'Ōtuwharekai', official: true }, + { name: 'Ōtūwharekai' }, + ), + ).toStrictEqual({ name: 'Ōtuwharekai' }); }); it('allows OSM to use a slash instead of the word "or"', () => { @@ -378,6 +392,15 @@ describe('compareFeatures', () => { ), ).toStrictEqual({ natural: 'spring' }); // default preset is suggested }); + + it('accepts lifecycle prefixes that duplicate a normal key', () => { + expect( + conflateTags( + { name: 'X', type: 'Island' }, + { name: 'X', place: 'suburb', 'not:place': 'island' }, + ), + ).toBeUndefined(); + }); }); describe('exceptions', () => { @@ -385,6 +408,8 @@ describe('compareFeatures', () => { nzgb | osm ${'Saint Martin'} | ${'St Martin'} ${'St Martin'} | ${'Saint Martin'} + ${'St. Martin'} | ${'Saint Martin'} + ${'St. Martin'} | ${'St Martin'} ${'Mt Martin'} | ${'Mount Martin'} ${'Mount martin'} | ${'Mt martin'} `('accepts an inconsistency between $nzgb & $osm', ({ nzgb, osm }) => { @@ -395,7 +420,7 @@ describe('compareFeatures', () => { it('allows dual names in the name tag', () => { expect( conflateTags( - { name: 'Ōmanawa Falls' }, + { name: 'Omanawa Falls' }, { name: 'Te Rere o Ōmanawa / Ōmanawa Falls', 'name:mi': 'Te Rere o Ōmanawa', diff --git a/src/stage3-conflate/compareFeatures/checkTagsFromFeaturePreset.ts b/src/stage3-conflate/compareFeatures/checkTagsFromFeaturePreset.ts index 789a560..9f2a8d7 100644 --- a/src/stage3-conflate/compareFeatures/checkTagsFromFeaturePreset.ts +++ b/src/stage3-conflate/compareFeatures/checkTagsFromFeaturePreset.ts @@ -32,10 +32,24 @@ function osmRemoveLifecyclePrefix(key: string) { return key; } +/** + * This converts an object like: + * ```js + * { place: 'suburb', 'not:place': 'island' } + * ``` + * into: + * ```js + * { place: ['suburb', 'island'] } + * ``` + */ function stripAllLifecyclePrefixes(tags: Tags) { - const out: Tags = {}; + const out: Record> = {}; for (const [key, value] of Object.entries(tags)) { - out[osmRemoveLifecyclePrefix(key)] = value; + if (value) { + const stripedKey = osmRemoveLifecyclePrefix(key); + out[stripedKey] ||= new Set(); + out[stripedKey].add(value); + } } return out; } @@ -60,11 +74,13 @@ export function checkTagsFromFeaturePreset( const tagChanges: Tags = {}; for (const [key, value] of Object.entries(presetToCheck)) { + const osmTagValues = [...(cleanedOsmTags[key] || [])]; const isTagWrong = - value === '*' ? !cleanedOsmTags[key] : cleanedOsmTags[key] !== value; + !osmTagValues.length || + (value !== '*' && osmTagValues.every((v) => v !== value)); if (isTagWrong) { - if (key === 'seamark:type' && cleanedOsmTags[key]) { + if (key === 'seamark:type' && !!osmTagValues.length) { // don't try to change seamark:type if it already has a more useful value // e.g. seamark:type=obstruction cf. seamark:type=sea_area // but if it's missing, we will add it diff --git a/src/stage3-conflate/compareFeatures/exceptions.ts b/src/stage3-conflate/compareFeatures/exceptions.ts index 0104c08..2146eeb 100644 --- a/src/stage3-conflate/compareFeatures/exceptions.ts +++ b/src/stage3-conflate/compareFeatures/exceptions.ts @@ -1,3 +1,4 @@ +import { ALLOW_INCONSISTENT_DIACRITICS } from '../../data'; import type { NZGBFeature, OSMFeature } from '../../types'; /** @@ -21,6 +22,26 @@ export function nameHasSlashForOldName(nzgb: NZGBFeature, osm: OSMFeature) { ); } +/** @internal @returns true if the names are similar enough */ +function isPrettyMuchEqual(nzgbName: string, osmName: string) { + // the number of charcaters must be the same + if (nzgbName.length !== osmName.length) return false; + + for (let index = 0; index < nzgbName.length; index++) { + // compare letter-by-letter + const nzgbChar = nzgbName[index]; + const osmChar = osmName[index]; + + const isOkay = + nzgbChar === osmChar || + nzgbChar === osmChar.normalize('NFD').replaceAll(/\p{Diacritic}/gu, ''); + + if (!isOkay) return false; + } + + return true; +} + /** * Special case to allow OSM to have macrons, if the NZGB name is unofficial. * @returns true if this is an exception and we shouldn't change the name @@ -29,11 +50,15 @@ export function isUnofficialAndOsmHasMacrons( nzgb: NZGBFeature, osm: OSMFeature, ) { - if (nzgb.official) return false; + // for official names, an inconsistency is only + // acceptable if there's an override for this ref. + const shouldAllowInconsistency = nzgb.official + ? ALLOW_INCONSISTENT_DIACRITICS.has(osm.tags['ref:linz:place_id']!) + : true; return ( - nzgb.name?.normalize('NFD').replaceAll(/\p{Diacritic}/gu, '') === - osm.tags.name?.normalize('NFD').replaceAll(/\p{Diacritic}/gu, '') + shouldAllowInconsistency && + isPrettyMuchEqual(nzgb.name, osm.tags.name || '') ); } @@ -48,7 +73,10 @@ export function allowSlashInsteadOfOr(nzgb: NZGBFeature, osm: OSMFeature) { /** @internal */ function normaliseTrivialNameDifferences(name: string) { - return name.replace(/\bMount\b/, 'Mt').replace(/\bSaint\b/, 'St'); + return name + .replace(/\bMount\b/, 'Mt') + .replace(/\bSaint\b/, 'St') + .replace(/\bSt\./, 'St'); } /** @@ -73,7 +101,9 @@ export function allowDualNames(nzgb: NZGBFeature, osm: OSMFeature) { return ( nameSegments?.length === 2 && nameSegments.every( - (segment) => segment === nzgb.name || segment === osm.tags['name:mi'], + (segment) => + isPrettyMuchEqual(nzgb.name, segment) || + segment === osm.tags['name:mi'], ) ); } diff --git a/src/stage3-conflate/index.ts b/src/stage3-conflate/index.ts index f879669..4969fde 100644 --- a/src/stage3-conflate/index.ts +++ b/src/stage3-conflate/index.ts @@ -46,7 +46,10 @@ function processOneType( // for on land vs subsea, then we merge two categories together const presets = 'tags' in categoryDefinition - ? findTopLevelTags(categoryDefinition.tags) + ? [ + ...findTopLevelTags(categoryDefinition.tags), + ...(categoryDefinition.acceptTags?.flatMap(findTopLevelTags) || []), + ] : [ ...findTopLevelTags(categoryDefinition.onLandTags), ...findTopLevelTags(categoryDefinition.subseaTags), @@ -191,7 +194,7 @@ const trivialKeys = new Set([ 'ref:linz:place_id', 'wikidata', 'wikipedia', - 'name:etymology', // only if there is also name:ety:wikidata + 'name:etymology', 'name:etymology:wikidata', 'source:name', 'source', @@ -260,10 +263,7 @@ async function main() { // if the only thing being editted is the ref tag or wikidata or wikipedia tag const isTrivial = f.properties.__action === 'edit' && - Object.keys(f.properties).every((key) => trivialKeys.has(key)) && - // either both name:ety & wikidata, or niether - !!f.properties['name:etymology'] === - !!f.properties['name:etymology:wikidata']; + Object.keys(f.properties).every((key) => trivialKeys.has(key)); if (isTrivial) { extraLayersObject[trivialLayerName].features.push(f); diff --git a/src/stage4-report/Report.tsx b/src/stage4-report/Report.tsx index b335120..2a5c719 100644 --- a/src/stage4-report/Report.tsx +++ b/src/stage4-report/Report.tsx @@ -115,7 +115,7 @@ export const Report: React.FC<{ data: StatsFile; css: string }> = ({ stats.addNodeCount + stats.addWayCount + stats.editCount; if (remaining === 0) { layerCount.okayCount += 1; // complete - } else if (remaining / stats.okayCount > 0.9) { + } else if (remaining / (stats.okayCount + remaining) < 0.1) { layerCount.editCount += 1; // >90% complete } else { layerCount.addNodeCount += 1; // incomplete