From 90f4d7b0cc0af70d4391c6a7e5110bc2019fab75 Mon Sep 17 00:00:00 2001 From: Timo Reimann Date: Tue, 20 Aug 2024 22:34:27 +0200 Subject: [PATCH] WIP --- go.mod | 1 + go.sum | 2 + isolated/slack_test.go | 23 + slack.go | 93 +- vendor/modules.txt | 3 + vendor/mvdan.cc/xurls/v2/.gitattributes | 2 + vendor/mvdan.cc/xurls/v2/.gitignore | 3 + vendor/mvdan.cc/xurls/v2/LICENSE | 27 + vendor/mvdan.cc/xurls/v2/README.md | 37 + vendor/mvdan.cc/xurls/v2/schemes.go | 375 ++++++ vendor/mvdan.cc/xurls/v2/tlds.go | 1500 +++++++++++++++++++++++ vendor/mvdan.cc/xurls/v2/tlds_pseudo.go | 24 + vendor/mvdan.cc/xurls/v2/unicode.go | 7 + vendor/mvdan.cc/xurls/v2/xurls.go | 200 +++ 14 files changed, 2294 insertions(+), 3 deletions(-) create mode 100644 isolated/slack_test.go create mode 100644 vendor/mvdan.cc/xurls/v2/.gitattributes create mode 100644 vendor/mvdan.cc/xurls/v2/.gitignore create mode 100644 vendor/mvdan.cc/xurls/v2/LICENSE create mode 100644 vendor/mvdan.cc/xurls/v2/README.md create mode 100644 vendor/mvdan.cc/xurls/v2/schemes.go create mode 100644 vendor/mvdan.cc/xurls/v2/tlds.go create mode 100644 vendor/mvdan.cc/xurls/v2/tlds_pseudo.go create mode 100644 vendor/mvdan.cc/xurls/v2/unicode.go create mode 100644 vendor/mvdan.cc/xurls/v2/xurls.go diff --git a/go.mod b/go.mod index d1b969b..84b77e9 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/slack-go/slack v0.6.3 github.com/urfave/cli/v2 v2.1.1 gopkg.in/yaml.v2 v2.2.8 + mvdan.cc/xurls/v2 v2.5.0 ) require ( diff --git a/go.sum b/go.sum index c74c7f3..4c8412d 100644 --- a/go.sum +++ b/go.sum @@ -54,3 +54,5 @@ gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +mvdan.cc/xurls/v2 v2.5.0 h1:lyBNOm8Wo71UknhUs4QTFUNNMyxy2JEIaKKo0RWOh+8= +mvdan.cc/xurls/v2 v2.5.0/go.mod h1:yQgaGQ1rFtJUzkmKiHYSSfuQxqfYmd//X6PxvholpeE= diff --git a/isolated/slack_test.go b/isolated/slack_test.go new file mode 100644 index 0000000..3a63db5 --- /dev/null +++ b/isolated/slack_test.go @@ -0,0 +1,23 @@ +package isolated + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/slack-go/slack" +) + +func TestTest(t *testing.T) { + obj := slack.NewTextBlockObject("plain_text", "here is a url: golang.org", false, false) + block := slack.NewSectionBlock(obj, nil, nil) + msg := slack.NewBlockMessage(block) + + b, err := json.MarshalIndent(msg, "", " ") + if err != nil { + t.Fatal(err) + } + + fmt.Println(string(b)) + +} diff --git a/slack.go b/slack.go index 39873b3..4b19904 100644 --- a/slack.go +++ b/slack.go @@ -4,18 +4,20 @@ import ( "context" "errors" "fmt" + "net/url" "strings" "time" + "github.com/PagerDuty/go-pagerduty" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/matryer/try" - - "github.com/PagerDuty/go-pagerduty" - "github.com/slack-go/slack" + "mvdan.cc/xurls/v2" ) +var rxRelaxed = xurls.Relaxed() + type slackUsers []slackUser func (users slackUsers) findByPDUser(pdUser pagerduty.User) *slackUser { @@ -276,6 +278,91 @@ func (metaClient *slackMetaClient) updateTopic(ctx context.Context, channelID st return nil } +func escapeText(txt string) (string, error) { + obj := slack.NewTextBlockObject("mrkdown", "foobarbar", false, false) + block := slack.NewSectionBlock(obj, nil, nil) + msg := slack.NewBlockMessage(block) + msg. + + remainingTxt := escapeAmpersands(txt) + + for { + idxPair := rxRelaxed.FindStringIndex(remainingTxt) + if idxPair == nil { + break + } + + startIdx := idxPair[0] + if startIdx > 0 && startIdx != '<' && startIdx != '|' { + continue + } + + rxRelaxed.ReplaceAllString() + + match := remainingTxt[startIdx:indexPair[1]] + parsedURL, err := url.Parse(match) + if err != nil { + return "", fmt.Errorf("failed to parse URL %q: %s", match, err) + } + if parsedURL.Scheme == "" { + strings.ReplaceAll(txt, match, fmt.Sprintf("", match, match)) + } else { + strings.ReplaceAll(txt, match, fmt.Sprintf("", match, match)) + } + } + +} + +func escapeAmpersands(txt string) string { + parts := strings.Split(txt, "&") + if len(parts) == 1 { + return txt + } + + const ampSuffix = "amp;" + + var b strings.Builder + for i, part := range parts { + b.WriteString(part) + if i == len(parts)-1 { + break + } + b.WriteRune('&') + nextIdx := i + 1 + if strings.HasPrefix(parts[nextIdx], ampSuffix) { + b.WriteString(ampSuffix) + parts[nextIdx] = strings.TrimPrefix(parts[nextIdx], ampSuffix) + } + } + + return b.String() +} + +func isEncodedURL(txt string, pair []int) bool { + left := pair[0] - 1 + if left == 0 { + return false + } + right := pair[1] + 1 + if right == len(txt)-1 { + return false + } + return txt[left] == '<' && txt[right] == '>' +} + +func textMatchesAt(txt string, i int, substr string) bool { + if len(txt) < i { + return false + } + + suffix := txt[i:] + lenSubstr := len(substr) + if len(suffix) < lenSubstr { + return false + } + return suffix[:lenSubstr] == substr +} + func createSlackUser(apiUser slack.User) slackUser { return slackUser{ id: apiUser.ID, diff --git a/vendor/modules.txt b/vendor/modules.txt index ed08eb8..20f276d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -48,3 +48,6 @@ golang.org/x/xerrors/internal # gopkg.in/yaml.v2 v2.2.8 ## explicit gopkg.in/yaml.v2 +# mvdan.cc/xurls/v2 v2.5.0 +## explicit; go 1.19 +mvdan.cc/xurls/v2 diff --git a/vendor/mvdan.cc/xurls/v2/.gitattributes b/vendor/mvdan.cc/xurls/v2/.gitattributes new file mode 100644 index 0000000..6f95229 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/.gitattributes @@ -0,0 +1,2 @@ +# To prevent CRLF breakages on Windows for fragile files, like testdata. +* -text diff --git a/vendor/mvdan.cc/xurls/v2/.gitignore b/vendor/mvdan.cc/xurls/v2/.gitignore new file mode 100644 index 0000000..663c8cb --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/.gitignore @@ -0,0 +1,3 @@ +cmd/xurls/xurls +generate/tldsgen/tldsgen +generate/regexgen/regexgen diff --git a/vendor/mvdan.cc/xurls/v2/LICENSE b/vendor/mvdan.cc/xurls/v2/LICENSE new file mode 100644 index 0000000..7d71d51 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2015, Daniel Martí. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/mvdan.cc/xurls/v2/README.md b/vendor/mvdan.cc/xurls/v2/README.md new file mode 100644 index 0000000..3065d13 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/README.md @@ -0,0 +1,37 @@ +# xurls + +[![Go Reference](https://pkg.go.dev/badge/mvdan.cc/xurls/v2.svg)](https://pkg.go.dev/mvdan.cc/xurls/v2) + +Extract urls from text using regular expressions. Requires Go 1.19 or later. + +```go +import "mvdan.cc/xurls/v2" + +func main() { + rxRelaxed := xurls.Relaxed() + rxRelaxed.FindString("Do gophers live in golang.org?") // "golang.org" + rxRelaxed.FindString("This string does not have a URL") // "" + + rxStrict := xurls.Strict() + rxStrict.FindAllString("must have scheme: http://foo.com/.", -1) // []string{"http://foo.com/"} + rxStrict.FindAllString("no scheme, no match: foo.com", -1) // []string{} +} +``` + +Since API is centered around [regexp.Regexp](https://golang.org/pkg/regexp/#Regexp), +many other methods are available, such as finding the [byte indexes](https://golang.org/pkg/regexp/#Regexp.FindAllIndex) +for all matches. + +The regular expressions are compiled when the API is first called. +Any subsequent calls will use the same regular expression pointers. + +#### cmd/xurls + +To install the tool globally: + + go install mvdan.cc/xurls/v2/cmd/xurls@latest + +```shell +$ echo "Do gophers live in http://golang.org?" | xurls +http://golang.org +``` diff --git a/vendor/mvdan.cc/xurls/v2/schemes.go b/vendor/mvdan.cc/xurls/v2/schemes.go new file mode 100644 index 0000000..3be6a19 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/schemes.go @@ -0,0 +1,375 @@ +// Generated by schemesgen + +package xurls + +// Schemes is a sorted list of all IANA assigned schemes. +// +// Source: https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv +var Schemes = []string{ + `aaa`, + `aaas`, + `about`, + `acap`, + `acct`, + `acd`, + `acr`, + `adiumxtra`, + `adt`, + `afp`, + `afs`, + `aim`, + `amss`, + `android`, + `appdata`, + `apt`, + `ar`, + `ark`, + `attachment`, + `aw`, + `barion`, + `bb`, + `beshare`, + `bitcoin`, + `bitcoincash`, + `blob`, + `bolo`, + `browserext`, + `cabal`, + `calculator`, + `callto`, + `cap`, + `cast`, + `casts`, + `chrome`, + `chrome-extension`, + `cid`, + `coap`, + `coap+tcp`, + `coap+ws`, + `coaps`, + `coaps+tcp`, + `coaps+ws`, + `com-eventbrite-attendee`, + `content`, + `content-type`, + `crid`, + `cstr`, + `cvs`, + `dab`, + `dat`, + `data`, + `dav`, + `diaspora`, + `dict`, + `did`, + `dis`, + `dlna-playcontainer`, + `dlna-playsingle`, + `dns`, + `dntp`, + `doi`, + `dpp`, + `drm`, + `drop`, + `dtmi`, + `dtn`, + `dvb`, + `dvx`, + `dweb`, + `ed2k`, + `eid`, + `elsi`, + `embedded`, + `ens`, + `ethereum`, + `example`, + `facetime`, + `fax`, + `feed`, + `feedready`, + `fido`, + `file`, + `filesystem`, + `finger`, + `first-run-pen-experience`, + `fish`, + `fm`, + `ftp`, + `fuchsia-pkg`, + `geo`, + `gg`, + `git`, + `gitoid`, + `gizmoproject`, + `go`, + `gopher`, + `graph`, + `grd`, + `gtalk`, + `h323`, + `ham`, + `hcap`, + `hcp`, + `http`, + `https`, + `hxxp`, + `hxxps`, + `hydrazone`, + `hyper`, + `iax`, + `icap`, + `icon`, + `im`, + `imap`, + `info`, + `iotdisco`, + `ipfs`, + `ipn`, + `ipns`, + `ipp`, + `ipps`, + `irc`, + `irc6`, + `ircs`, + `iris`, + `iris.beep`, + `iris.lwz`, + `iris.xpc`, + `iris.xpcs`, + `isostore`, + `itms`, + `jabber`, + `jar`, + `jms`, + `keyparc`, + `lastfm`, + `lbry`, + `ldap`, + `ldaps`, + `leaptofrogans`, + `lorawan`, + `lpa`, + `lvlt`, + `magnet`, + `mailserver`, + `mailto`, + `maps`, + `market`, + `matrix`, + `message`, + `microsoft.windows.camera`, + `microsoft.windows.camera.multipicker`, + `microsoft.windows.camera.picker`, + `mid`, + `mms`, + `modem`, + `mongodb`, + `moz`, + `ms-access`, + `ms-appinstaller`, + `ms-browser-extension`, + `ms-calculator`, + `ms-drive-to`, + `ms-enrollment`, + `ms-excel`, + `ms-eyecontrolspeech`, + `ms-gamebarservices`, + `ms-gamingoverlay`, + `ms-getoffice`, + `ms-help`, + `ms-infopath`, + `ms-inputapp`, + `ms-lockscreencomponent-config`, + `ms-media-stream-id`, + `ms-meetnow`, + `ms-mixedrealitycapture`, + `ms-mobileplans`, + `ms-newsandinterests`, + `ms-officeapp`, + `ms-people`, + `ms-project`, + `ms-powerpoint`, + `ms-publisher`, + `ms-remotedesktop-launch`, + `ms-restoretabcompanion`, + `ms-screenclip`, + `ms-screensketch`, + `ms-search`, + `ms-search-repair`, + `ms-secondary-screen-controller`, + `ms-secondary-screen-setup`, + `ms-settings`, + `ms-settings-airplanemode`, + `ms-settings-bluetooth`, + `ms-settings-camera`, + `ms-settings-cellular`, + `ms-settings-cloudstorage`, + `ms-settings-connectabledevices`, + `ms-settings-displays-topology`, + `ms-settings-emailandaccounts`, + `ms-settings-language`, + `ms-settings-location`, + `ms-settings-lock`, + `ms-settings-nfctransactions`, + `ms-settings-notifications`, + `ms-settings-power`, + `ms-settings-privacy`, + `ms-settings-proximity`, + `ms-settings-screenrotation`, + `ms-settings-wifi`, + `ms-settings-workplace`, + `ms-spd`, + `ms-stickers`, + `ms-sttoverlay`, + `ms-transit-to`, + `ms-useractivityset`, + `ms-virtualtouchpad`, + `ms-visio`, + `ms-walk-to`, + `ms-whiteboard`, + `ms-whiteboard-cmd`, + `ms-word`, + `msnim`, + `msrp`, + `msrps`, + `mss`, + `mt`, + `mtqp`, + `mumble`, + `mupdate`, + `mvn`, + `news`, + `nfs`, + `ni`, + `nih`, + `nntp`, + `notes`, + `num`, + `ocf`, + `oid`, + `onenote`, + `onenote-cmd`, + `opaquelocktoken`, + `openpgp4fpr`, + `otpauth`, + `p1`, + `pack`, + `palm`, + `paparazzi`, + `payment`, + `payto`, + `pkcs11`, + `platform`, + `pop`, + `pres`, + `prospero`, + `proxy`, + `pwid`, + `psyc`, + `pttp`, + `qb`, + `query`, + `quic-transport`, + `redis`, + `rediss`, + `reload`, + `res`, + `resource`, + `rmi`, + `rsync`, + `rtmfp`, + `rtmp`, + `rtsp`, + `rtsps`, + `rtspu`, + `sarif`, + `secondlife`, + `secret-token`, + `service`, + `session`, + `sftp`, + `sgn`, + `shc`, + `sieve`, + `simpleledger`, + `simplex`, + `sip`, + `sips`, + `skype`, + `smb`, + `smp`, + `sms`, + `smtp`, + `snews`, + `snmp`, + `soap.beep`, + `soap.beeps`, + `soldat`, + `spiffe`, + `spotify`, + `ssb`, + `ssh`, + `starknet`, + `steam`, + `stun`, + `stuns`, + `submit`, + `svn`, + `swh`, + `swid`, + `swidpath`, + `tag`, + `taler`, + `teamspeak`, + `tel`, + `teliaeid`, + `telnet`, + `tftp`, + `things`, + `thismessage`, + `tip`, + `tn3270`, + `tool`, + `turn`, + `turns`, + `tv`, + `udp`, + `unreal`, + `upt`, + `urn`, + `ut2004`, + `uuid-in-package`, + `v-event`, + `vemmi`, + `ventrilo`, + `ves`, + `videotex`, + `vnc`, + `view-source`, + `vscode`, + `vscode-insiders`, + `vsls`, + `w3`, + `wais`, + `web3`, + `wcr`, + `webcal`, + `web+ap`, + `wifi`, + `wpid`, + `ws`, + `wss`, + `wtai`, + `wyciwyg`, + `xcon`, + `xcon-userid`, + `xfire`, + `xmlrpc.beep`, + `xmlrpc.beeps`, + `xmpp`, + `xri`, + `ymsgr`, + `z39.50`, + `z39.50r`, + `z39.50s`, +} diff --git a/vendor/mvdan.cc/xurls/v2/tlds.go b/vendor/mvdan.cc/xurls/v2/tlds.go new file mode 100644 index 0000000..51e66f1 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/tlds.go @@ -0,0 +1,1500 @@ +// Generated by tldsgen + +package xurls + +// TLDs is a sorted list of all public top-level domains. +// +// Sources: +// - https://data.iana.org/TLD/tlds-alpha-by-domain.txt +// - https://publicsuffix.org/list/effective_tld_names.dat +var TLDs = []string{ + `aaa`, + `aarp`, + `abarth`, + `abb`, + `abbott`, + `abbvie`, + `abc`, + `able`, + `abogado`, + `abudhabi`, + `ac`, + `academy`, + `accenture`, + `accountant`, + `accountants`, + `aco`, + `actor`, + `ad`, + `ads`, + `adult`, + `ae`, + `aeg`, + `aero`, + `aetna`, + `af`, + `afl`, + `africa`, + `ag`, + `agakhan`, + `agency`, + `ai`, + `aig`, + `airbus`, + `airforce`, + `airtel`, + `akdn`, + `al`, + `alfaromeo`, + `alibaba`, + `alipay`, + `allfinanz`, + `allstate`, + `ally`, + `alsace`, + `alstom`, + `am`, + `amazon`, + `americanexpress`, + `americanfamily`, + `amex`, + `amfam`, + `amica`, + `amsterdam`, + `analytics`, + `android`, + `anquan`, + `anz`, + `ao`, + `aol`, + `apartments`, + `app`, + `apple`, + `aq`, + `aquarelle`, + `ar`, + `arab`, + `aramco`, + `archi`, + `army`, + `arpa`, + `art`, + `arte`, + `as`, + `asda`, + `asia`, + `associates`, + `at`, + `athleta`, + `attorney`, + `au`, + `auction`, + `audi`, + `audible`, + `audio`, + `auspost`, + `author`, + `auto`, + `autos`, + `avianca`, + `aw`, + `aws`, + `ax`, + `axa`, + `az`, + `azure`, + `ba`, + `baby`, + `baidu`, + `banamex`, + `bananarepublic`, + `band`, + `bank`, + `bar`, + `barcelona`, + `barclaycard`, + `barclays`, + `barefoot`, + `bargains`, + `baseball`, + `basketball`, + `bauhaus`, + `bayern`, + `bb`, + `bbc`, + `bbt`, + `bbva`, + `bcg`, + `bcn`, + `bd`, + `be`, + `beats`, + `beauty`, + `beer`, + `bentley`, + `berlin`, + `best`, + `bestbuy`, + `bet`, + `bf`, + `bg`, + `bh`, + `bharti`, + `bi`, + `bible`, + `bid`, + `bike`, + `bing`, + `bingo`, + `bio`, + `biz`, + `bj`, + `black`, + `blackfriday`, + `blockbuster`, + `blog`, + `bloomberg`, + `blue`, + `bm`, + `bms`, + `bmw`, + `bn`, + `bnpparibas`, + `bo`, + `boats`, + `boehringer`, + `bofa`, + `bom`, + `bond`, + `boo`, + `book`, + `booking`, + `bosch`, + `bostik`, + `boston`, + `bot`, + `boutique`, + `box`, + `br`, + `bradesco`, + `bridgestone`, + `broadway`, + `broker`, + `brother`, + `brussels`, + `bs`, + `bt`, + `build`, + `builders`, + `business`, + `buy`, + `buzz`, + `bv`, + `bw`, + `by`, + `bz`, + `bzh`, + `ca`, + `cab`, + `cafe`, + `cal`, + `call`, + `calvinklein`, + `cam`, + `camera`, + `camp`, + `canon`, + `capetown`, + `capital`, + `capitalone`, + `car`, + `caravan`, + `cards`, + `care`, + `career`, + `careers`, + `cars`, + `casa`, + `case`, + `cash`, + `casino`, + `cat`, + `catering`, + `catholic`, + `cba`, + `cbn`, + `cbre`, + `cbs`, + `cc`, + `cd`, + `center`, + `ceo`, + `cern`, + `cf`, + `cfa`, + `cfd`, + `cg`, + `ch`, + `chanel`, + `channel`, + `charity`, + `chase`, + `chat`, + `cheap`, + `chintai`, + `christmas`, + `chrome`, + `church`, + `ci`, + `cipriani`, + `circle`, + `cisco`, + `citadel`, + `citi`, + `citic`, + `city`, + `cityeats`, + `ck`, + `cl`, + `claims`, + `cleaning`, + `click`, + `clinic`, + `clinique`, + `clothing`, + `cloud`, + `club`, + `clubmed`, + `cm`, + `cn`, + `co`, + `coach`, + `codes`, + `coffee`, + `college`, + `cologne`, + `com`, + `comcast`, + `commbank`, + `community`, + `company`, + `compare`, + `computer`, + `comsec`, + `condos`, + `construction`, + `consulting`, + `contact`, + `contractors`, + `cooking`, + `cookingchannel`, + `cool`, + `coop`, + `corsica`, + `country`, + `coupon`, + `coupons`, + `courses`, + `cpa`, + `cr`, + `credit`, + `creditcard`, + `creditunion`, + `cricket`, + `crown`, + `crs`, + `cruise`, + `cruises`, + `cu`, + `cuisinella`, + `cv`, + `cw`, + `cx`, + `cy`, + `cymru`, + `cyou`, + `cz`, + `dabur`, + `dad`, + `dance`, + `data`, + `date`, + `dating`, + `datsun`, + `day`, + `dclk`, + `dds`, + `de`, + `deal`, + `dealer`, + `deals`, + `degree`, + `delivery`, + `dell`, + `deloitte`, + `delta`, + `democrat`, + `dental`, + `dentist`, + `desi`, + `design`, + `dev`, + `dhl`, + `diamonds`, + `diet`, + `digital`, + `direct`, + `directory`, + `discount`, + `discover`, + `dish`, + `diy`, + `dj`, + `dk`, + `dm`, + `dnp`, + `do`, + `docs`, + `doctor`, + `dog`, + `domains`, + `dot`, + `download`, + `drive`, + `dtv`, + `dubai`, + `dunlop`, + `dupont`, + `durban`, + `dvag`, + `dvr`, + `dz`, + `earth`, + `eat`, + `ec`, + `eco`, + `edeka`, + `edu`, + `education`, + `ee`, + `eg`, + `email`, + `emerck`, + `energy`, + `engineer`, + `engineering`, + `enterprises`, + `epson`, + `equipment`, + `er`, + `ericsson`, + `erni`, + `es`, + `esq`, + `estate`, + `et`, + `etisalat`, + `eu`, + `eurovision`, + `eus`, + `events`, + `exchange`, + `expert`, + `exposed`, + `express`, + `extraspace`, + `fage`, + `fail`, + `fairwinds`, + `faith`, + `family`, + `fan`, + `fans`, + `farm`, + `farmers`, + `fashion`, + `fast`, + `fedex`, + `feedback`, + `ferrari`, + `ferrero`, + `fi`, + `fiat`, + `fidelity`, + `fido`, + `film`, + `final`, + `finance`, + `financial`, + `fire`, + `firestone`, + `firmdale`, + `fish`, + `fishing`, + `fit`, + `fitness`, + `fj`, + `fk`, + `flickr`, + `flights`, + `flir`, + `florist`, + `flowers`, + `fly`, + `fm`, + `fo`, + `foo`, + `food`, + `foodnetwork`, + `football`, + `ford`, + `forex`, + `forsale`, + `forum`, + `foundation`, + `fox`, + `fr`, + `free`, + `fresenius`, + `frl`, + `frogans`, + `frontdoor`, + `frontier`, + `ftr`, + `fujitsu`, + `fun`, + `fund`, + `furniture`, + `futbol`, + `fyi`, + `ga`, + `gal`, + `gallery`, + `gallo`, + `gallup`, + `game`, + `games`, + `gap`, + `garden`, + `gay`, + `gb`, + `gbiz`, + `gd`, + `gdn`, + `ge`, + `gea`, + `gent`, + `genting`, + `george`, + `gf`, + `gg`, + `ggee`, + `gh`, + `gi`, + `gift`, + `gifts`, + `gives`, + `giving`, + `gl`, + `glass`, + `gle`, + `global`, + `globo`, + `gm`, + `gmail`, + `gmbh`, + `gmo`, + `gmx`, + `gn`, + `godaddy`, + `gold`, + `goldpoint`, + `golf`, + `goo`, + `goodyear`, + `goog`, + `google`, + `gop`, + `got`, + `gov`, + `gp`, + `gq`, + `gr`, + `grainger`, + `graphics`, + `gratis`, + `green`, + `gripe`, + `grocery`, + `group`, + `gs`, + `gt`, + `gu`, + `guardian`, + `gucci`, + `guge`, + `guide`, + `guitars`, + `guru`, + `gw`, + `gy`, + `hair`, + `hamburg`, + `hangout`, + `haus`, + `hbo`, + `hdfc`, + `hdfcbank`, + `health`, + `healthcare`, + `help`, + `helsinki`, + `here`, + `hermes`, + `hgtv`, + `hiphop`, + `hisamitsu`, + `hitachi`, + `hiv`, + `hk`, + `hkt`, + `hm`, + `hn`, + `hockey`, + `holdings`, + `holiday`, + `homedepot`, + `homegoods`, + `homes`, + `homesense`, + `honda`, + `horse`, + `hospital`, + `host`, + `hosting`, + `hot`, + `hoteles`, + `hotels`, + `hotmail`, + `house`, + `how`, + `hr`, + `hsbc`, + `ht`, + `hu`, + `hughes`, + `hyatt`, + `hyundai`, + `ibm`, + `icbc`, + `ice`, + `icu`, + `id`, + `ie`, + `ieee`, + `ifm`, + `ikano`, + `il`, + `im`, + `imamat`, + `imdb`, + `immo`, + `immobilien`, + `in`, + `inc`, + `industries`, + `infiniti`, + `info`, + `ing`, + `ink`, + `institute`, + `insurance`, + `insure`, + `int`, + `international`, + `intuit`, + `investments`, + `io`, + `ipiranga`, + `iq`, + `ir`, + `irish`, + `is`, + `ismaili`, + `ist`, + `istanbul`, + `it`, + `itau`, + `itv`, + `jaguar`, + `java`, + `jcb`, + `je`, + `jeep`, + `jetzt`, + `jewelry`, + `jio`, + `jll`, + `jm`, + `jmp`, + `jnj`, + `jo`, + `jobs`, + `joburg`, + `jot`, + `joy`, + `jp`, + `jpmorgan`, + `jprs`, + `juegos`, + `juniper`, + `kaufen`, + `kddi`, + `ke`, + `kerryhotels`, + `kerrylogistics`, + `kerryproperties`, + `kfh`, + `kg`, + `kh`, + `ki`, + `kia`, + `kids`, + `kim`, + `kinder`, + `kindle`, + `kitchen`, + `kiwi`, + `km`, + `kn`, + `koeln`, + `komatsu`, + `kosher`, + `kp`, + `kpmg`, + `kpn`, + `kr`, + `krd`, + `kred`, + `kuokgroup`, + `kw`, + `ky`, + `kyoto`, + `kz`, + `la`, + `lacaixa`, + `lamborghini`, + `lamer`, + `lancaster`, + `lancia`, + `land`, + `landrover`, + `lanxess`, + `lasalle`, + `lat`, + `latino`, + `latrobe`, + `law`, + `lawyer`, + `lb`, + `lc`, + `lds`, + `lease`, + `leclerc`, + `lefrak`, + `legal`, + `lego`, + `lexus`, + `lgbt`, + `li`, + `lidl`, + `life`, + `lifeinsurance`, + `lifestyle`, + `lighting`, + `like`, + `lilly`, + `limited`, + `limo`, + `lincoln`, + `link`, + `lipsy`, + `live`, + `living`, + `lk`, + `llc`, + `llp`, + `loan`, + `loans`, + `locker`, + `locus`, + `lol`, + `london`, + `lotte`, + `lotto`, + `love`, + `lpl`, + `lplfinancial`, + `lr`, + `ls`, + `lt`, + `ltd`, + `ltda`, + `lu`, + `lundbeck`, + `luxe`, + `luxury`, + `lv`, + `ly`, + `ma`, + `madrid`, + `maif`, + `maison`, + `makeup`, + `man`, + `management`, + `mango`, + `map`, + `market`, + `marketing`, + `markets`, + `marriott`, + `marshalls`, + `maserati`, + `mattel`, + `mba`, + `mc`, + `mckinsey`, + `md`, + `me`, + `med`, + `media`, + `meet`, + `melbourne`, + `meme`, + `memorial`, + `men`, + `menu`, + `merckmsd`, + `mg`, + `mh`, + `miami`, + `microsoft`, + `mil`, + `mini`, + `mint`, + `mit`, + `mitsubishi`, + `mk`, + `ml`, + `mlb`, + `mls`, + `mm`, + `mma`, + `mn`, + `mo`, + `mobi`, + `mobile`, + `moda`, + `moe`, + `moi`, + `mom`, + `monash`, + `money`, + `monster`, + `mormon`, + `mortgage`, + `moscow`, + `moto`, + `motorcycles`, + `mov`, + `movie`, + `mp`, + `mq`, + `mr`, + `ms`, + `msd`, + `mt`, + `mtn`, + `mtr`, + `mu`, + `museum`, + `music`, + `mutual`, + `mv`, + `mw`, + `mx`, + `my`, + `mz`, + `na`, + `nab`, + `nagoya`, + `name`, + `natura`, + `navy`, + `nba`, + `nc`, + `ne`, + `nec`, + `net`, + `netbank`, + `netflix`, + `network`, + `neustar`, + `new`, + `news`, + `next`, + `nextdirect`, + `nexus`, + `nf`, + `nfl`, + `ng`, + `ngo`, + `nhk`, + `ni`, + `nico`, + `nike`, + `nikon`, + `ninja`, + `nissan`, + `nissay`, + `nl`, + `no`, + `nokia`, + `northwesternmutual`, + `norton`, + `now`, + `nowruz`, + `nowtv`, + `np`, + `nr`, + `nra`, + `nrw`, + `ntt`, + `nu`, + `nyc`, + `nz`, + `obi`, + `observer`, + `office`, + `okinawa`, + `olayan`, + `olayangroup`, + `oldnavy`, + `ollo`, + `om`, + `omega`, + `one`, + `ong`, + `onion`, + `onl`, + `online`, + `ooo`, + `open`, + `oracle`, + `orange`, + `org`, + `organic`, + `origins`, + `osaka`, + `otsuka`, + `ott`, + `ovh`, + `pa`, + `page`, + `panasonic`, + `paris`, + `pars`, + `partners`, + `parts`, + `party`, + `passagens`, + `pay`, + `pccw`, + `pe`, + `pet`, + `pf`, + `pfizer`, + `pg`, + `ph`, + `pharmacy`, + `phd`, + `philips`, + `phone`, + `photo`, + `photography`, + `photos`, + `physio`, + `pics`, + `pictet`, + `pictures`, + `pid`, + `pin`, + `ping`, + `pink`, + `pioneer`, + `pizza`, + `pk`, + `pl`, + `place`, + `play`, + `playstation`, + `plumbing`, + `plus`, + `pm`, + `pn`, + `pnc`, + `pohl`, + `poker`, + `politie`, + `porn`, + `post`, + `pr`, + `pramerica`, + `praxi`, + `press`, + `prime`, + `pro`, + `prod`, + `productions`, + `prof`, + `progressive`, + `promo`, + `properties`, + `property`, + `protection`, + `pru`, + `prudential`, + `ps`, + `pt`, + `pub`, + `pw`, + `pwc`, + `py`, + `qa`, + `qpon`, + `quebec`, + `quest`, + `racing`, + `radio`, + `re`, + `read`, + `realestate`, + `realtor`, + `realty`, + `recipes`, + `red`, + `redstone`, + `redumbrella`, + `rehab`, + `reise`, + `reisen`, + `reit`, + `reliance`, + `ren`, + `rent`, + `rentals`, + `repair`, + `report`, + `republican`, + `rest`, + `restaurant`, + `review`, + `reviews`, + `rexroth`, + `rich`, + `richardli`, + `ricoh`, + `ril`, + `rio`, + `rip`, + `ro`, + `rocher`, + `rocks`, + `rodeo`, + `rogers`, + `room`, + `rs`, + `rsvp`, + `ru`, + `rugby`, + `ruhr`, + `run`, + `rw`, + `rwe`, + `ryukyu`, + `sa`, + `saarland`, + `safe`, + `safety`, + `sakura`, + `sale`, + `salon`, + `samsclub`, + `samsung`, + `sandvik`, + `sandvikcoromant`, + `sanofi`, + `sap`, + `sarl`, + `sas`, + `save`, + `saxo`, + `sb`, + `sbi`, + `sbs`, + `sc`, + `sca`, + `scb`, + `schaeffler`, + `schmidt`, + `scholarships`, + `school`, + `schule`, + `schwarz`, + `science`, + `scot`, + `sd`, + `se`, + `search`, + `seat`, + `secure`, + `security`, + `seek`, + `select`, + `sener`, + `services`, + `seven`, + `sew`, + `sex`, + `sexy`, + `sfr`, + `sg`, + `sh`, + `shangrila`, + `sharp`, + `shaw`, + `shell`, + `shia`, + `shiksha`, + `shoes`, + `shop`, + `shopping`, + `shouji`, + `show`, + `showtime`, + `si`, + `silk`, + `sina`, + `singles`, + `site`, + `sj`, + `sk`, + `ski`, + `skin`, + `sky`, + `skype`, + `sl`, + `sling`, + `sm`, + `smart`, + `smile`, + `sn`, + `sncf`, + `so`, + `soccer`, + `social`, + `softbank`, + `software`, + `sohu`, + `solar`, + `solutions`, + `song`, + `sony`, + `soy`, + `spa`, + `space`, + `sport`, + `spot`, + `sr`, + `srl`, + `ss`, + `st`, + `stada`, + `staples`, + `star`, + `statebank`, + `statefarm`, + `stc`, + `stcgroup`, + `stockholm`, + `storage`, + `store`, + `stream`, + `studio`, + `study`, + `style`, + `su`, + `sucks`, + `supplies`, + `supply`, + `support`, + `surf`, + `surgery`, + `suzuki`, + `sv`, + `swatch`, + `swiss`, + `sx`, + `sy`, + `sydney`, + `systems`, + `sz`, + `tab`, + `taipei`, + `talk`, + `taobao`, + `target`, + `tatamotors`, + `tatar`, + `tattoo`, + `tax`, + `taxi`, + `tc`, + `tci`, + `td`, + `tdk`, + `team`, + `tech`, + `technology`, + `tel`, + `temasek`, + `tennis`, + `teva`, + `tf`, + `tg`, + `th`, + `thd`, + `theater`, + `theatre`, + `tiaa`, + `tickets`, + `tienda`, + `tiffany`, + `tips`, + `tires`, + `tirol`, + `tj`, + `tjmaxx`, + `tjx`, + `tk`, + `tkmaxx`, + `tl`, + `tm`, + `tmall`, + `tn`, + `to`, + `today`, + `tokyo`, + `tools`, + `top`, + `toray`, + `toshiba`, + `total`, + `tours`, + `town`, + `toyota`, + `toys`, + `tr`, + `trade`, + `trading`, + `training`, + `travel`, + `travelchannel`, + `travelers`, + `travelersinsurance`, + `trust`, + `trv`, + `tt`, + `tube`, + `tui`, + `tunes`, + `tushu`, + `tv`, + `tvs`, + `tw`, + `tz`, + `ua`, + `ubank`, + `ubs`, + `ug`, + `uk`, + `unicom`, + `university`, + `uno`, + `uol`, + `ups`, + `us`, + `uy`, + `uz`, + `va`, + `vacations`, + `vana`, + `vanguard`, + `vc`, + `ve`, + `vegas`, + `ventures`, + `verisign`, + `vermögensberater`, + `vermögensberatung`, + `versicherung`, + `vet`, + `vg`, + `vi`, + `viajes`, + `video`, + `vig`, + `viking`, + `villas`, + `vin`, + `vip`, + `virgin`, + `visa`, + `vision`, + `viva`, + `vivo`, + `vlaanderen`, + `vn`, + `vodka`, + `volkswagen`, + `volvo`, + `vote`, + `voting`, + `voto`, + `voyage`, + `vu`, + `vuelos`, + `wales`, + `walmart`, + `walter`, + `wang`, + `wanggou`, + `watch`, + `watches`, + `weather`, + `weatherchannel`, + `webcam`, + `weber`, + `website`, + `wed`, + `wedding`, + `weibo`, + `weir`, + `wf`, + `whoswho`, + `wien`, + `wiki`, + `williamhill`, + `win`, + `windows`, + `wine`, + `winners`, + `wme`, + `wolterskluwer`, + `woodside`, + `work`, + `works`, + `world`, + `wow`, + `ws`, + `wtc`, + `wtf`, + `xbox`, + `xerox`, + `xfinity`, + `xihuan`, + `xin`, + `xxx`, + `xyz`, + `yachts`, + `yahoo`, + `yamaxun`, + `yandex`, + `ye`, + `yodobashi`, + `yoga`, + `yokohama`, + `you`, + `youtube`, + `yt`, + `yun`, + `za`, + `zappos`, + `zara`, + `zero`, + `zip`, + `zm`, + `zone`, + `zuerich`, + `zw`, + `ελ`, + `ευ`, + `бг`, + `бел`, + `дети`, + `ею`, + `католик`, + `ком`, + `мкд`, + `мон`, + `москва`, + `онлайн`, + `орг`, + `рус`, + `рф`, + `сайт`, + `срб`, + `укр`, + `қаз`, + `հայ`, + `ישראל`, + `קום`, + `ابوظبي`, + `اتصالات`, + `ارامكو`, + `الاردن`, + `البحرين`, + `الجزائر`, + `السعودية`, + `السعوديه`, + `السعودیة`, + `السعودیۃ`, + `العليان`, + `المغرب`, + `اليمن`, + `امارات`, + `ايران`, + `ایران`, + `بارت`, + `بازار`, + `بيتك`, + `بھارت`, + `تونس`, + `سودان`, + `سوريا`, + `سورية`, + `شبكة`, + `عراق`, + `عرب`, + `عمان`, + `فلسطين`, + `قطر`, + `كاثوليك`, + `كوم`, + `مصر`, + `مليسيا`, + `موريتانيا`, + `موقع`, + `همراه`, + `پاكستان`, + `پاکستان`, + `ڀارت`, + `कॉम`, + `नेट`, + `भारत`, + `भारतम्`, + `भारोत`, + `संगठन`, + `বাংলা`, + `ভারত`, + `ভাৰত`, + `ਭਾਰਤ`, + `ભારત`, + `ଭାରତ`, + `இந்தியா`, + `இலங்கை`, + `சிங்கப்பூர்`, + `భారత్`, + `ಭಾರತ`, + `ഭാരതം`, + `ලංකා`, + `คอม`, + `ไทย`, + `ລາວ`, + `გე`, + `みんな`, + `アマゾン`, + `クラウド`, + `グーグル`, + `コム`, + `ストア`, + `セール`, + `ファッション`, + `ポイント`, + `世界`, + `中信`, + `中国`, + `中國`, + `中文网`, + `亚马逊`, + `企业`, + `佛山`, + `信息`, + `健康`, + `八卦`, + `公司`, + `公益`, + `台湾`, + `台灣`, + `商城`, + `商店`, + `商标`, + `嘉里`, + `嘉里大酒店`, + `在线`, + `大拿`, + `天主教`, + `娱乐`, + `家電`, + `广东`, + `微博`, + `慈善`, + `我爱你`, + `手机`, + `招聘`, + `政务`, + `政府`, + `新加坡`, + `新闻`, + `时尚`, + `書籍`, + `机构`, + `淡马锡`, + `游戏`, + `澳門`, + `澳门`, + `点看`, + `移动`, + `组织机构`, + `网址`, + `网店`, + `网站`, + `网络`, + `联通`, + `臺灣`, + `谷歌`, + `购物`, + `通販`, + `集团`, + `電訊盈科`, + `飞利浦`, + `食品`, + `餐厅`, + `香格里拉`, + `香港`, + `닷넷`, + `닷컴`, + `삼성`, + `한국`, +} diff --git a/vendor/mvdan.cc/xurls/v2/tlds_pseudo.go b/vendor/mvdan.cc/xurls/v2/tlds_pseudo.go new file mode 100644 index 0000000..0f346ca --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/tlds_pseudo.go @@ -0,0 +1,24 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +package xurls + +// PseudoTLDs is a sorted list of some widely used unofficial TLDs. +// +// Sources: +// - https://en.wikipedia.org/wiki/Pseudo-top-level_domain +// - https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains +// - https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00 +// - https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml +var PseudoTLDs = []string{ + `bit`, // Namecoin + `example`, // Example domain + `exit`, // Tor exit node + `gnu`, // GNS by public key + `i2p`, // I2P network + `invalid`, // Invalid domain + `local`, // Local network + `localhost`, // Local network + `test`, // Test domain + `zkey`, // GNS domain name +} diff --git a/vendor/mvdan.cc/xurls/v2/unicode.go b/vendor/mvdan.cc/xurls/v2/unicode.go new file mode 100644 index 0000000..68944c9 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/unicode.go @@ -0,0 +1,7 @@ +// Generated by unicodegen + +package xurls + +const allowedUcsChar = "¡-ᙿᚁ-\u1fff\u200b-‧\u202a-\u202e‰-⁞\u2060-\u2fff、-\ud7ff豈-\ufdcfﷰ-\uffef𐀀-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd" + +const allowedUcsCharMinusPunc = "¢-¦¨-µ¸-¾À-ͽͿ-ΆΈ-ՙՠ-ֈ֊-ֿׁ-ׂׄ-ׇׅ-ײ\u05f5-؈؋؎-ؚ\u061c-\u061dؠ-٩ٮ-ۓە-ۿ\u070e-߶ߺ-\u082f\u083f-\u085d\u085f-ॣ०-९ॱ-ৼ৾-ੵ\u0a77-૯૱-\u0c76౸-ಃಅ-ෳ\u0df5-๎๐-๙\u0e5c-༃༓༕-྄྆-࿏࿕-࿘\u0fdb-၉ၐ-ჺჼ-፟፩-᙭ᙯ-ᙿᚁ-ᛪᛮ-᜴\u1737-៓ៗ៛-\u17ff᠆᠋-\u1943᥆-\u1a1dᨠ-\u1a9fᪧ\u1aae-᭙᭡-\u1bfbᰀ-\u1c3a᱀-ᱽᲀ-Ჿ\u1cc8-᳔᳒-\u1fff\u200b-―‘-‟\u202a-\u202e‹-›‿-⁀⁄-⁆⁒⁔\u2060-\u2cf8⳽ⴀ-ⵯ\u2d71-ⷿ⸂-⸅⸉-⸊⸌-⸍⸗⸚⸜-⸝⸠-⸩ⸯ⸺-⸻⹀⹂⹐-⹑\u2e53-\u2fff〄-〼〾-ヺー-ꓽꔀ-ꘌꘐ-꙲ꙴ-꙽ꙿ-꛱\ua6f8-ꡳ\ua878-\ua8cd꣐-ꣷꣻꣽ-꤭ꤰ-\ua95eꥠ-꧀\ua9ce-\ua9ddꧠ-\uaa5bꩠ-ꫝꫠ-ꫯꫲ-ꯪ꯬-\ud7ff豈-\ufdcfﷰ-️︗-︘\ufe1a-︯︱-﹄﹇-﹈﹍-﹏\ufe53﹘-﹞﹢-\ufe67﹩\ufe6c-\uff00$(-)+-0-9<->A-[]-⦆「-」ヲ-\uffef𐀀-\U000100ff\U00010103-\U0001039e𐎠-𐏏𐏑-\U0001056e\U00010570-\U00010856𐡘-\U0001091e𐤠-\U0001093e\U00010940-\U00010a4f\U00010a59-𐩾𐪀-𐫯\U00010af7-\U00010b38𐭀-\U00010b98\U00010b9d-𐽔\U00010f5a-𑁆\U0001104e-𑂺\U000110bd\U000110c2-𑄿𑅄-𑅳𑅶-𑇄𑇉-𑇌𑇎-𑇚𑇜\U000111e0-𑈷𑈾-𑊨\U000112aa-𑑊𑑐-𑑙\U0001145c𑑞-𑓅𑓇-𑗀𑗘-𑙀𑙄-\U0001165f\U0001166d-𑜻𑜿-𑠺\U0001183c-𑥃\U00011947-𑧡𑧣-𑨾𑩇-𑪙𑪝\U00011aa3-𑱀\U00011c46-\U00011c6f𑱲-𑻶\U00011ef9-\U00011ffe𒀀-\U0001246f\U00012475-\U00016a6d\U00016a70-𖫴\U00016af6-𖬶𖬼-𖭃𖭅-𖺖\U00016e9b-𖿡𖿣-𛲞\U0001bca0-𝪆\U0001da8c-\U0001e95d\U0001e960-\U0001fffd𠀀-\U0002fffd𰀀-\U0003fffd\U00040000-\U0004fffd\U00050000-\U0005fffd\U00060000-\U0006fffd\U00070000-\U0007fffd\U00080000-\U0008fffd\U00090000-\U0009fffd\U000a0000-\U000afffd\U000b0000-\U000bfffd\U000c0000-\U000cfffd\U000d0000-\U000dfffd\U000e1000-\U000efffd" diff --git a/vendor/mvdan.cc/xurls/v2/xurls.go b/vendor/mvdan.cc/xurls/v2/xurls.go new file mode 100644 index 0000000..4113b07 --- /dev/null +++ b/vendor/mvdan.cc/xurls/v2/xurls.go @@ -0,0 +1,200 @@ +// Copyright (c) 2015, Daniel Martí +// See LICENSE for licensing information + +// Package xurls extracts urls from plain text using regular expressions. +package xurls + +import ( + "regexp" + "strings" + "sync" + "unicode/utf8" +) + +//go:generate go run ./generate/tldsgen +//go:generate go run ./generate/schemesgen +//go:generate go run ./generate/unicodegen + +const ( + // pathCont is based on https://www.rfc-editor.org/rfc/rfc3987#section-2.2 + // but does not match separators anywhere or most puncutation in final position, + // to avoid creating asymmetries like + // `Did you know that **https://example.com/** is reserved for documentation?` + // from `Did you know that **https://example.com/** is reserved for documentation?`. + unreservedChar = `a-zA-Z0-9\-._~` + endUnreservedChar = `a-zA-Z0-9\-_~` + midSubDelimChar = `!$&'*+,;=` + endSubDelimChar = `$&+=` + midIPathSegmentChar = unreservedChar + `%` + midSubDelimChar + `:@` + allowedUcsChar + endIPathSegmentChar = endUnreservedChar + `%` + endSubDelimChar + allowedUcsCharMinusPunc + iPrivateChar = `\x{E000}-\x{F8FF}\x{F0000}-\x{FFFFD}\x{100000}-\x{10FFFD}` + midIChar = `/?#\\` + midIPathSegmentChar + iPrivateChar + endIChar = `/#` + endIPathSegmentChar + iPrivateChar + wellParen = `\((?:[` + midIChar + `]|\([` + midIChar + `]*\))*\)` + wellBrack = `\[(?:[` + midIChar + `]|\[[` + midIChar + `]*\])*\]` + wellBrace = `\{(?:[` + midIChar + `]|\{[` + midIChar + `]*\})*\}` + wellAll = wellParen + `|` + wellBrack + `|` + wellBrace + pathCont = `(?:[` + midIChar + `]*(?:` + wellAll + `|[` + endIChar + `]))+` + + letter = `\p{L}` + mark = `\p{M}` + number = `\p{N}` + iriChar = letter + mark + number + iri = `[` + iriChar + `](?:[` + iriChar + `\-]*[` + iriChar + `])?` + subdomain = `(?:` + iri + `\.)+` + octet = `(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])` + ipv4Addr = octet + `\.` + octet + `\.` + octet + `\.` + octet + + // ipv6Addr is based on https://datatracker.ietf.org/doc/html/rfc4291#section-2.2 + // with a specific alternative for each valid count of leading 16-bit hexadecimal "chomps" + // that have not been replaced with a `::` elision. + h4 = `[0-9a-fA-F]{1,4}` + ipv6AddrMinusEmpty = `(?:` + + // 7 colon-terminated chomps, followed by a final chomp or the rest of an elision. + `(?:` + h4 + `:){7}(?:` + h4 + `|:)|` + + // 6 chomps, followed by an IPv4 address or elision with final chomp or final elision. + `(?:` + h4 + `:){6}(?:` + ipv4Addr + `|:` + h4 + `|:)|` + + // 5 chomps, followed by an elision with optional IPv4 or up to 2 final chomps. + `(?:` + h4 + `:){5}(?::` + ipv4Addr + `|(?::` + h4 + `){1,2}|:)|` + + // 4 chomps, followed by an elision with optional IPv4 (optionally preceded by a chomp) or + // up to 3 final chomps. + `(?:` + h4 + `:){4}(?:(?::` + h4 + `){0,1}:` + ipv4Addr + `|(?::` + h4 + `){1,3}|:)|` + + // 3 chomps, followed by an elision with optional IPv4 (preceded by up to 2 chomps) or + // up to 4 final chomps. + `(?:` + h4 + `:){3}(?:(?::` + h4 + `){0,2}:` + ipv4Addr + `|(?::` + h4 + `){1,4}|:)|` + + // 2 chomps, followed by an elision with optional IPv4 (preceded by up to 3 chomps) or + // up to 5 final chomps. + `(?:` + h4 + `:){2}(?:(?::` + h4 + `){0,3}:` + ipv4Addr + `|(?::` + h4 + `){1,5}|:)|` + + // 1 chomp, followed by an elision with optional IPv4 (preceded by up to 4 chomps) or + // up to 6 final chomps. + `(?:` + h4 + `:){1}(?:(?::` + h4 + `){0,4}:` + ipv4Addr + `|(?::` + h4 + `){1,6}|:)|` + + // elision, followed by optional IPv4 (preceded by up to 5 chomps) or + // up to 7 final chomps. + // `:` is an intentionally omitted alternative, to avoid matching `::`. + `:(?:(?::` + h4 + `){0,5}:` + ipv4Addr + `|(?::` + h4 + `){1,7})` + + `)` + ipv6Addr = `(?:` + ipv6AddrMinusEmpty + `|::)` + ipAddrMinusEmpty = `(?:` + ipv6AddrMinusEmpty + `|\b` + ipv4Addr + `\b)` + port = `(?::[0-9]*)?` +) + +// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid +// scheme, and not just the known ones. +var AnyScheme = `(?:[a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)` + +// SchemesNoAuthority is a sorted list of some well-known url schemes that are +// followed by ":" instead of "://". The list includes both officially +// registered and unofficial schemes. +var SchemesNoAuthority = []string{ + `bitcoin`, // Bitcoin + `cid`, // Content-ID + `file`, // Files + `magnet`, // Torrent magnets + `mailto`, // Mail + `mid`, // Message-ID + `sms`, // SMS + `tel`, // Telephone + `xmpp`, // XMPP +} + +// SchemesUnofficial is a sorted list of some well-known url schemes which +// aren't officially registered just yet. They tend to correspond to software. +// +// Mostly collected from https://en.wikipedia.org/wiki/List_of_URI_schemes#Unofficial_but_common_URI_schemes. +var SchemesUnofficial = []string{ + `gemini`, // gemini + `jdbc`, // Java database Connectivity + `moz-extension`, // Firefox extension + `postgres`, // PostgreSQL (short form) + `postgresql`, // PostgreSQL + `slack`, // Slack + `zoommtg`, // Zoom (desktop) + `zoomus`, // Zoom (mobile) +} + +// The regular expressions are compiled when the API is first called. +// Any subsequent calls will use the same regular expression pointers. +// +// We do not need to make a copy of them for each API call, +// as Copy is now only useful if one copy calls Longest but not another, +// and we always call Longest after compiling the regular expression. +var ( + strictRe *regexp.Regexp + strictInit sync.Once + + relaxedRe *regexp.Regexp + relaxedInit sync.Once +) + +func anyOf(strs ...string) string { + var b strings.Builder + b.WriteString("(?:") + for i, s := range strs { + if i != 0 { + b.WriteByte('|') + } + b.WriteString(regexp.QuoteMeta(s)) + } + b.WriteByte(')') + return b.String() +} + +func strictExp() string { + schemes := `(?:(?i)(?:` + anyOf(Schemes...) + `|` + anyOf(SchemesUnofficial...) + `)://|` + anyOf(SchemesNoAuthority...) + `:)` + return schemes + pathCont +} + +func relaxedExp() string { + var asciiTLDs, unicodeTLDs []string + for i, tld := range TLDs { + if tld[0] >= utf8.RuneSelf { + asciiTLDs = TLDs[:i:i] + unicodeTLDs = TLDs[i:] + break + } + } + punycode := `xn--[a-z0-9-]+` + + // Use \b to make sure ASCII TLDs are immediately followed by a word break. + // We can't do that with unicode TLDs, as they don't see following + // whitespace as a word break. + tlds := `(?:(?i)` + punycode + `|` + anyOf(append(asciiTLDs, PseudoTLDs...)...) + `\b|` + anyOf(unicodeTLDs...) + `)` + domain := subdomain + tlds + + hostName := `(?:` + domain + `|\[` + ipv6Addr + `\]|\b` + ipv4Addr + `\b)` + webURL := hostName + port + `(?:/` + pathCont + `|/)?` + email := `[a-zA-Z0-9._%\-+]+@` + domain + return strictExp() + `|` + webURL + `|` + email + `|` + ipv6AddrMinusEmpty +} + +// Strict produces a regexp that matches any URL with a scheme in either the +// Schemes or SchemesNoAuthority lists. +func Strict() *regexp.Regexp { + strictInit.Do(func() { + strictRe = regexp.MustCompile(strictExp()) + strictRe.Longest() + }) + return strictRe +} + +// Relaxed produces a regexp that matches any URL matched by Strict, plus any +// URL with no scheme or email address. +func Relaxed() *regexp.Regexp { + relaxedInit.Do(func() { + relaxedRe = regexp.MustCompile(relaxedExp()) + relaxedRe.Longest() + }) + return relaxedRe +} + +// StrictMatchingScheme produces a regexp similar to Strict, but requiring that +// the scheme match the given regular expression. See AnyScheme too. +func StrictMatchingScheme(exp string) (*regexp.Regexp, error) { + strictMatching := `(?i)(?:` + exp + `)(?-i)` + pathCont + re, err := regexp.Compile(strictMatching) + if err != nil { + return nil, err + } + re.Longest() + return re, nil +}