From 64c86e7e389634f5bdfa5add61cfe02d5fe30365 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:30:56 +0200 Subject: [PATCH] Refactor - auto update credential provider script (#22606) * Refactor - auto update credential provider script * fix: pull missing file AddressParser.sys.mjs --------- Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: Issam Mani --- .../AddressMetaDataExtension.sys.mjs | 1 + .../Assets/CC_Script/AddressParser.sys.mjs | 316 ++++++++++++++++++ .../Assets/CC_Script/CC_Python_Update.py | 1 + .../CC_Script/FormAutofillHandler.sys.mjs | 16 + .../CC_Script/FormAutofillHeuristics.sys.mjs | 36 +- .../CC_Script/FormAutofillUtils.sys.mjs | 13 + .../Assets/CC_Script/HeuristicsRegExp.sys.mjs | 6 +- 7 files changed, 375 insertions(+), 14 deletions(-) create mode 100644 firefox-ios/Client/Assets/CC_Script/AddressParser.sys.mjs diff --git a/firefox-ios/Client/Assets/CC_Script/AddressMetaDataExtension.sys.mjs b/firefox-ios/Client/Assets/CC_Script/AddressMetaDataExtension.sys.mjs index da13b667849d..f4dca18dde14 100644 --- a/firefox-ios/Client/Assets/CC_Script/AddressMetaDataExtension.sys.mjs +++ b/firefox-ios/Client/Assets/CC_Script/AddressMetaDataExtension.sys.mjs @@ -254,6 +254,7 @@ export const AddressMetaDataExtension = { }, "data/DE": { alpha_3_code: "DEU", + address_reversed: true, }, "data/GH": { alpha_3_code: "GHA", diff --git a/firefox-ios/Client/Assets/CC_Script/AddressParser.sys.mjs b/firefox-ios/Client/Assets/CC_Script/AddressParser.sys.mjs new file mode 100644 index 000000000000..e3d4c0e6cde0 --- /dev/null +++ b/firefox-ios/Client/Assets/CC_Script/AddressParser.sys.mjs @@ -0,0 +1,316 @@ +/* eslint-disable no-useless-concat */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// NamedCaptureGroup class represents a named capturing group in a regular expression +class NamedCaptureGroup { + // The named of this capturing group + #name = null; + + // The capturing group + #capture = null; + + // The matched result + #match = null; + + constructor(name, capture) { + this.#name = name; + this.#capture = capture; + } + + get name() { + return this.#name; + } + + get capture() { + return this.#capture; + } + + get match() { + return this.#match; + } + + // Setter for the matched result based on the match groups + setMatch(matchGroups) { + this.#match = matchGroups[this.#name]; + } +} + +// Base class for different part of a street address regular expression. +// The regular expression is constructed with prefix, pattern, suffix +// and separator to extract "value" part. +// For examplem, when we write "apt 4." to for floor number, its prefix is `apt`, +// suffix is `.` and value to represent apartment number is `4`. +class StreetAddressPartRegExp extends NamedCaptureGroup { + constructor(name, prefix, pattern, suffix, sep, optional = false) { + prefix = prefix ?? ""; + suffix = suffix ?? ""; + super( + name, + `((?:${prefix})(?<${name}>${pattern})(?:${suffix})(?:${sep})+)${ + optional ? "?" : "" + }` + ); + } +} + +// A regular expression to match the street number portion of a street address, +class StreetNumberRegExp extends StreetAddressPartRegExp { + static PREFIX = "((no|°|º|number)(\\.|-|\\s)*)?"; // From chromium source + + static PATTERN = "\\d+\\w?"; + + // TODO: possible suffix : (th\\.|\\.)? + static SUFFIX = null; + + constructor(sep, optional) { + super( + StreetNumberRegExp.name, + StreetNumberRegExp.PREFIX, + StreetNumberRegExp.PATTERN, + StreetNumberRegExp.SUFFIX, + sep, + optional + ); + } +} + +// A regular expression to match the street name portion of a street address, +class StreetNameRegExp extends StreetAddressPartRegExp { + static PREFIX = null; + + static PATTERN = "(?:[^\\s,]+(?:[^\\S\\r\\n]+[^\\s,]+)*?)"; // From chromium source + + // TODO: Should we consider suffix like (ave|st)? + static SUFFIX = null; + + constructor(sep, optional) { + super( + StreetNameRegExp.name, + StreetNameRegExp.PREFIX, + StreetNameRegExp.PATTERN, + StreetNameRegExp.SUFFIX, + sep, + optional + ); + } +} + +// A regular expression to match the apartment number portion of a street address, +class ApartmentNumberRegExp extends StreetAddressPartRegExp { + static keyword = "apt|apartment|wohnung|apto|-" + "|unit|suite|ste|#|room"; // From chromium source // Firefox specific + static PREFIX = `(${ApartmentNumberRegExp.keyword})(\\.|\\s|-)*`; + + static PATTERN = "\\w*([-|\\/]\\w*)?"; + + static SUFFIX = "(\\.|\\s|-)*(ª)?"; // From chromium source + + constructor(sep, optional) { + super( + ApartmentNumberRegExp.name, + ApartmentNumberRegExp.PREFIX, + ApartmentNumberRegExp.PATTERN, + ApartmentNumberRegExp.SUFFIX, + sep, + optional + ); + } +} + +// A regular expression to match the floor number portion of a street address, +class FloorNumberRegExp extends StreetAddressPartRegExp { + static keyword = + "floor|flur|fl|og|obergeschoss|ug|untergeschoss|geschoss|andar|piso|º" + // From chromium source + "|level|lvl"; // Firefox specific + static PREFIX = `(${FloorNumberRegExp.keyword})?(\\.|\\s|-)*`; // TODO + static PATTERN = "\\d{1,3}\\w?"; + static SUFFIX = `(st|nd|rd|th)?(\\.|\\s|-)*(${FloorNumberRegExp.keyword})?`; // TODO + + constructor(sep, optional) { + super( + FloorNumberRegExp.name, + FloorNumberRegExp.PREFIX, + FloorNumberRegExp.PATTERN, + FloorNumberRegExp.SUFFIX, + sep, + optional + ); + } +} + +/** + * Class represents a street address with the following fields: + * - street number + * - street name + * - apartment number + * - floor number + */ +export class StructuredStreetAddress { + #street_number = null; + #street_name = null; + #apartment_number = null; + #floor_number = null; + + // If name_first is true, then the street name is given first, + // otherwise the street number is given first. + constructor( + name_first, + street_number, + street_name, + apartment_number, + floor_number + ) { + this.#street_number = name_first + ? street_name?.toString() + : street_number?.toString(); + this.#street_name = name_first + ? street_number?.toString() + : street_name?.toString(); + this.#apartment_number = apartment_number?.toString(); + this.#floor_number = floor_number?.toString(); + } + + get street_number() { + return this.#street_number; + } + + get street_name() { + return this.#street_name; + } + + get apartment_number() { + return this.#apartment_number; + } + + get floor_number() { + return this.#floor_number; + } + + toString() { + return ` + street number: ${this.#street_number}\n + street name: ${this.#street_name}\n + apartment number: ${this.#apartment_number}\n + floor number: ${this.#floor_number}\n + `; + } +} + +export class AddressParser { + /** + * Parse street address with the following pattern. + * street number, street name, apartment number(optional), floor number(optional) + * For example, 2 Harrison St #175 floor 2 + * + * @param {string} address The street address to be parsed. + * @returns {StructuredStreetAddress} + */ + static parseStreetAddress(address) { + if (!address) { + return null; + } + + const separator = "(\\s|,|$)"; + + const regexpes = [ + new StreetNumberRegExp(separator), + new StreetNameRegExp(separator), + new ApartmentNumberRegExp(separator, true), + new FloorNumberRegExp(separator, true), + ]; + + if (AddressParser.parse(address, regexpes)) { + return new StructuredStreetAddress( + false, + ...regexpes.map(regexp => regexp.match) + ); + } + + // Swap the street number and name. + const regexpesReverse = [ + regexpes[1], + regexpes[0], + regexpes[2], + regexpes[3], + ]; + + if (AddressParser.parse(address, regexpesReverse)) { + return new StructuredStreetAddress( + true, + ...regexpesReverse.map(regexp => regexp.match) + ); + } + + return null; + } + + static parse(address, regexpes) { + const options = { + trim: true, + merge_whitespace: true, + }; + address = AddressParser.normalizeString(address, options); + + const match = address.match( + new RegExp(`^(${regexpes.map(regexp => regexp.capture).join("")})$`, "i") + ); + if (!match) { + return null; + } + + regexpes.forEach(regexp => regexp.setMatch(match.groups)); + return regexpes.reduce((acc, current) => { + return { ...acc, [current.name]: current.match }; + }, {}); + } + + static normalizeString(s, options) { + if (typeof s != "string") { + return s; + } + + if (options.ignore_case) { + s = s.toLowerCase(); + } + + // process punctuation before whitespace because if a punctuation + // is replaced with whitespace, we might want to merge it later + if (options.remove_punctuation) { + s = AddressParser.replacePunctuation(s, ""); + } else if ("replace_punctuation" in options) { + const replace = options.replace_punctuation; + s = AddressParser.replacePunctuation(s, replace); + } + + // process whitespace + if (options.merge_whitespace) { + s = AddressParser.mergeWhitespace(s); + } else if (options.remove_whitespace) { + s = AddressParser.removeWhitespace(s); + } + + return s.trim(); + } + + static replacePunctuation(s, replace) { + const regex = /\p{Punctuation}/gu; + return s?.replace(regex, replace); + } + + static removePunctuation(s) { + return s?.replace(/[.,\/#!$%\^&\*;:{}=\-_~()]/g, ""); + } + + static replaceControlCharacters(s) { + return s?.replace(/[\t\n\r]/g, " "); + } + + static removeWhitespace(s) { + return s?.replace(/[\s]/g, ""); + } + + static mergeWhitespace(s) { + return s?.replace(/\s{2,}/g, " "); + } +} diff --git a/firefox-ios/Client/Assets/CC_Script/CC_Python_Update.py b/firefox-ios/Client/Assets/CC_Script/CC_Python_Update.py index f6abbd36cb6a..f50f0e5c6248 100644 --- a/firefox-ios/Client/Assets/CC_Script/CC_Python_Update.py +++ b/firefox-ios/Client/Assets/CC_Script/CC_Python_Update.py @@ -23,6 +23,7 @@ "toolkit/components/formautofill/shared/AddressMetaDataExtension.sys.mjs", "toolkit/components/formautofill/shared/AddressMetaDataLoader.sys.mjs", "toolkit/components/formautofill/shared/AddressRecord.sys.mjs", + "toolkit/components/formautofill/shared/AddressParser.sys.mjs", "toolkit/components/formautofill/shared/AutofillTelemetry.sys.mjs", "toolkit/components/formautofill/shared/FormAutofillHandler.sys.mjs", "toolkit/components/formautofill/shared/FormAutofillHeuristics.sys.mjs", diff --git a/firefox-ios/Client/Assets/CC_Script/FormAutofillHandler.sys.mjs b/firefox-ios/Client/Assets/CC_Script/FormAutofillHandler.sys.mjs index 5d9b4613704a..2a071acc06f3 100644 --- a/firefox-ios/Client/Assets/CC_Script/FormAutofillHandler.sys.mjs +++ b/firefox-ios/Client/Assets/CC_Script/FormAutofillHandler.sys.mjs @@ -7,6 +7,7 @@ import { FormAutofillUtils } from "resource://gre/modules/shared/FormAutofillUti const lazy = {}; ChromeUtils.defineESModuleGetters(lazy, { + AddressParser: "resource://gre/modules/shared/AddressParser.sys.mjs", AutofillFormFactory: "resource://gre/modules/shared/AutofillFormFactory.sys.mjs", CreditCard: "resource://gre/modules/CreditCard.sys.mjs", @@ -823,6 +824,21 @@ export class FormAutofillHandler { } } } + + // If a house number field exists, split the address up into house number + // and street name. + if (this.getFieldDetailByName("address-housenumber")) { + let address = lazy.AddressParser.parseStreetAddress( + profile["street-address"] + ); + if (address) { + profile["address-housenumber"] = address.street_number; + let field = this.getFieldDetailByName("address-line1") + ? "address-line1" + : "street-address"; + profile[field] = address.street_name; + } + } } /** diff --git a/firefox-ios/Client/Assets/CC_Script/FormAutofillHeuristics.sys.mjs b/firefox-ios/Client/Assets/CC_Script/FormAutofillHeuristics.sys.mjs index 5cab79884948..0a371cf53514 100644 --- a/firefox-ios/Client/Assets/CC_Script/FormAutofillHeuristics.sys.mjs +++ b/firefox-ios/Client/Assets/CC_Script/FormAutofillHeuristics.sys.mjs @@ -296,13 +296,28 @@ export const FormAutofillHeuristics = { "address-line3", ]; + let houseNumberFields = 0; + + // We need to build a list of the address fields. A list of the indicies + // is also needed as the fields with a given name can change positions + // during the update. const fields = []; + const fieldIndicies = []; for (let idx = scanner.parsingIndex; !scanner.parsingFinished; idx++) { const detail = scanner.getFieldDetailByIndex(idx); + + // Skip over any house number fields. There should only be zero or one, + // but we'll skip over them all anyway. + if (detail?.fieldName == "address-housenumber") { + houseNumberFields++; + continue; + } + if (!INTERESTED_FIELDS.includes(detail?.fieldName)) { break; } fields.push(detail); + fieldIndicies.push(idx); } if (!fields.length) { @@ -315,7 +330,7 @@ export const FormAutofillHeuristics = { fields[0].reason != "autocomplete" && ["address-line2", "address-line3"].includes(fields[0].fieldName) ) { - scanner.updateFieldName(scanner.parsingIndex, "address-line1"); + scanner.updateFieldName(fieldIndicies[0], "address-line1"); } break; case 2: @@ -325,27 +340,22 @@ export const FormAutofillHeuristics = { (fields[1].fieldName == "address-line2" || fields[1].reason != "autocomplete") ) { - scanner.updateFieldName( - scanner.parsingIndex, - "address-line1", - true - ); + scanner.updateFieldName(fieldIndicies[0], "address-line1", true); } } else { - scanner.updateFieldName(scanner.parsingIndex, "address-line1"); + scanner.updateFieldName(fieldIndicies[0], "address-line1"); } - - scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2"); + scanner.updateFieldName(fieldIndicies[1], "address-line2"); break; case 3: default: - scanner.updateFieldName(scanner.parsingIndex, "address-line1"); - scanner.updateFieldName(scanner.parsingIndex + 1, "address-line2"); - scanner.updateFieldName(scanner.parsingIndex + 2, "address-line3"); + scanner.updateFieldName(fieldIndicies[0], "address-line1"); + scanner.updateFieldName(fieldIndicies[1], "address-line2"); + scanner.updateFieldName(fieldIndicies[2], "address-line3"); break; } - scanner.parsingIndex += fields.length; + scanner.parsingIndex += fields.length + houseNumberFields; return true; }, diff --git a/firefox-ios/Client/Assets/CC_Script/FormAutofillUtils.sys.mjs b/firefox-ios/Client/Assets/CC_Script/FormAutofillUtils.sys.mjs index d1143be59882..c5f1dc86de54 100644 --- a/firefox-ios/Client/Assets/CC_Script/FormAutofillUtils.sys.mjs +++ b/firefox-ios/Client/Assets/CC_Script/FormAutofillUtils.sys.mjs @@ -135,6 +135,10 @@ FormAutofillUtils = { "address-line3": "address", "address-level1": "address", "address-level2": "address", + // DE addresses are often split into street name and house number; + // combined they form address-line1 + "address-streetname": "address", + "address-housenumber": "address", "postal-code": "address", country: "address", "country-name": "address", @@ -380,6 +384,15 @@ FormAutofillUtils = { return addressParts.join(this.getAddressSeparator()); }, + /** + * Returns false if an address is written + * and true if an address is written . In the future, this + * can be expanded to format an address + */ + getAddressReversed(region) { + return this.getCountryAddressData(region).address_reversed; + }, + /** * In-place concatenate tel-related components into a single "tel" field and * delete unnecessary fields. diff --git a/firefox-ios/Client/Assets/CC_Script/HeuristicsRegExp.sys.mjs b/firefox-ios/Client/Assets/CC_Script/HeuristicsRegExp.sys.mjs index 49985d16414d..2a01b881bd26 100644 --- a/firefox-ios/Client/Assets/CC_Script/HeuristicsRegExp.sys.mjs +++ b/firefox-ios/Client/Assets/CC_Script/HeuristicsRegExp.sys.mjs @@ -10,6 +10,7 @@ export const HeuristicsRegExp = { tel: undefined, "street-address": undefined, "address-line1": undefined, + "address-housenumber": undefined, "address-line2": undefined, "address-line3": undefined, "address-level2": undefined, @@ -388,7 +389,7 @@ export const HeuristicsRegExp = { "address-line1": "^address$|address[_-]?line(one)?|address1|addr1|street" + "|(?:shipping|billing)address$" + - "|strasse|straße|hausnummer|housenumber" + // de-DE + "|strasse|straße" + // de-DE "|house.?name" + // en-GB "|direccion|dirección" + // es "|adresse" + // fr-FR @@ -466,6 +467,9 @@ export const HeuristicsRegExp = { "|((\\b|_|\\*)(eyalet|[şs]ehir|[İii̇]l(imiz)?|kent)(\\b|_|\\*))" + // tr "|^시[·・]?도", // ko-KR + "address-housenumber": + "housenumber|hausnummer|haus", + "postal-code": "zip|postal|post.*code|pcode" + "|pin.?code" + // en-IN