diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..8d70389 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,15 @@ +# Base +FROM golang:1.23.2-alpine AS builder +RUN apk add --no-cache build-base +WORKDIR /app +COPY . /app +RUN go mod download +RUN go build ./cmd/urlfinder + +# Release +FROM alpine:3.20.3 +RUN apk -U upgrade --no-cache \ + && apk add --no-cache bind-tools ca-certificates +COPY --from=builder /app/urlfinder /usr/local/bin/ + +ENTRYPOINT ["urlfinder"] \ No newline at end of file diff --git a/README.md b/README.md index e00d1a5..bc929bf 100644 --- a/README.md +++ b/README.md @@ -66,8 +66,8 @@ INPUT: -d, -list string[] target domain or list of domains SOURCE: - -s, -sources string[] specific sources for discovery (e.g., -s censys,dnsrepo) - -es, -exclude-sources string[] sources to exclude (e.g., -es censys,dnsrepo) + -s, -sources string[] specific sources for discovery (e.g., -s alienvault,commoncrawl) + -es, -exclude-sources string[] sources to exclude (e.g., -es alienvault,commoncrawl) -all use all sources (may be slower) FILTER: diff --git a/go.mod b/go.mod index 3f7092e..41912e9 100644 --- a/go.mod +++ b/go.mod @@ -9,10 +9,10 @@ require ( github.com/pkg/errors v0.9.1 github.com/projectdiscovery/dnsx v1.1.6 github.com/projectdiscovery/fdmax v0.0.4 - github.com/projectdiscovery/goflags v0.1.64 - github.com/projectdiscovery/gologger v1.1.30 - github.com/projectdiscovery/ratelimit v0.0.26 - github.com/projectdiscovery/utils v0.2.18 + github.com/projectdiscovery/goflags v0.1.65 + github.com/projectdiscovery/gologger v1.1.32 + github.com/projectdiscovery/ratelimit v0.0.62 + github.com/projectdiscovery/utils v0.2.19 github.com/rs/xid v1.5.0 golang.org/x/exp v0.0.0-20240119083558-1b970713d09a gopkg.in/yaml.v3 v3.0.1 @@ -66,7 +66,7 @@ require ( github.com/projectdiscovery/blackrock v0.0.1 // indirect github.com/projectdiscovery/cdncheck v1.0.9 // indirect github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 // indirect - github.com/projectdiscovery/retryabledns v1.0.83 // indirect + github.com/projectdiscovery/retryabledns v1.0.85 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect github.com/shirou/gopsutil/v3 v3.23.7 // indirect @@ -93,6 +93,7 @@ require ( golang.org/x/sys v0.25.0 // indirect golang.org/x/term v0.24.0 // indirect golang.org/x/text v0.18.0 // indirect + golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/protobuf v1.33.0 // indirect diff --git a/go.sum b/go.sum index ac89ece..b6e316a 100644 --- a/go.sum +++ b/go.sum @@ -151,18 +151,18 @@ github.com/projectdiscovery/dnsx v1.1.6 h1:QdKVKC0n/fpgaB4q3s6A2wn+qqg75CY0XxNkU github.com/projectdiscovery/dnsx v1.1.6/go.mod h1:9rkLQzJHxQ26qiD1PhfoJDrhqCVN8lKLsxiAON1uDxM= github.com/projectdiscovery/fdmax v0.0.4 h1:K9tIl5MUZrEMzjvwn/G4drsHms2aufTn1xUdeVcmhmc= github.com/projectdiscovery/fdmax v0.0.4/go.mod h1:oZLqbhMuJ5FmcoaalOm31B1P4Vka/CqP50nWjgtSz+I= -github.com/projectdiscovery/goflags v0.1.64 h1:FDfwdt9N97Hi8OuhbkDlKtVttpc/CRMIWQVa08VsHsI= -github.com/projectdiscovery/goflags v0.1.64/go.mod h1:3FyHIVQtnycNOc1LE3O1jj/XR5XuMdF9QfHd0ujhnX4= -github.com/projectdiscovery/gologger v1.1.30 h1:Qc3tYcDwl7nh0IVotctAeXurp12sf+PQ3zu00AmLUds= -github.com/projectdiscovery/gologger v1.1.30/go.mod h1:XCNafm7f4TSOX8pgowmO4RQYNdL3qFFejPN/BBzxXPo= +github.com/projectdiscovery/goflags v0.1.65 h1:rjoj+5lP/FDzgeM0WILUTX9AOOnw0J0LXtl8P1SVeGE= +github.com/projectdiscovery/goflags v0.1.65/go.mod h1:cg6+yrLlaekP1hnefBc/UXbH1YGWa0fuzEW9iS1aG4g= +github.com/projectdiscovery/gologger v1.1.32 h1:j2Y2cxypELi9zbj/7UxDTdv9UWQl7ALJrwc7wV5snuY= +github.com/projectdiscovery/gologger v1.1.32/go.mod h1:w62+CIcwygjSpSnV/3Xh+jj4bgv6lfL7kx2kA/Bl09U= github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 h1:ZScLodGSezQVwsQDtBSMFp72WDq0nNN+KE/5DHKY5QE= github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983/go.mod h1:3G3BRKui7nMuDFAZKR/M2hiOLtaOmyukT20g88qRQjI= -github.com/projectdiscovery/ratelimit v0.0.26 h1:sxZCh72lMpQ1YNnJOWrJ+uZE9GFWdVE58LOArOc6c+4= -github.com/projectdiscovery/ratelimit v0.0.26/go.mod h1:2NHqfqqb9xAnqW+Ztd8AzzNi+JP38Kcdhb8cnbfX9sI= -github.com/projectdiscovery/retryabledns v1.0.83 h1:qevo2GBDr/BSSb5W9PxudXhu477uOz1mH1Kk0maGfVY= -github.com/projectdiscovery/retryabledns v1.0.83/go.mod h1:t9DWqUdr3wdc1iIuAozefhDpPfIqDnu0q9BpJO4Mq5M= -github.com/projectdiscovery/utils v0.2.18 h1:uV5JIYKIq8gXdu9wrCeUq3yqPiSCokTrKuLuZwXMSSw= -github.com/projectdiscovery/utils v0.2.18/go.mod h1:gcKxBTK1eNF+K8vzD62sMMVFf1eJoTgEiS81mp7CQjI= +github.com/projectdiscovery/ratelimit v0.0.62 h1:HmZ78oYWTbthHIImirdpgBwAr6r5pKjgoYBvz5c+ZAM= +github.com/projectdiscovery/ratelimit v0.0.62/go.mod h1:qY6G9Rrzov931loxqGRHYWzY/EUvbpNW+FxlIsBfDVA= +github.com/projectdiscovery/retryabledns v1.0.85 h1:9aLPWu0bcmtK8bPm/JJyfts28hgWf74UPsSG0KMXrqo= +github.com/projectdiscovery/retryabledns v1.0.85/go.mod h1:cZe0rydjby+ns2oIY7JmywHvtkwWxPzp3PuQz1rV50E= +github.com/projectdiscovery/utils v0.2.19 h1:vN1RGixpRVsn6VCnOACTLZyntV1o5FYqUElYpQUYND0= +github.com/projectdiscovery/utils v0.2.19/go.mod h1:M/uyy352fsl9juI8YHZY+r79auR2y1U9lSdkQv6lZpc= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= @@ -294,6 +294,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= +golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= diff --git a/internal/runner/banners.go b/internal/runner/banners.go index eb11c25..fdbc177 100644 --- a/internal/runner/banners.go +++ b/internal/runner/banners.go @@ -16,7 +16,7 @@ const banner = ` const ToolName = `urlfinder` // Version is the current version of urlfinder -const version = `v0.0.1` +const version = `v0.0.2` // showBanner is used to show the banner to the user func showBanner() { diff --git a/internal/runner/enumerate.go b/internal/runner/enumerate.go index e4e5623..617859b 100644 --- a/internal/runner/enumerate.go +++ b/internal/runner/enumerate.go @@ -48,6 +48,9 @@ func (r *Runner) EnumerateSingleQueryWithCtx(ctx context.Context, query string, case source.Error: gologger.Warning().Msgf("Could not run source %s: %s\n", result.Source, result.Error) case source.Url: + if result.Value == "" { + continue + } url := replacer.Replace(result.Value) diff --git a/internal/runner/options.go b/internal/runner/options.go index 960f29e..a2265e8 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -77,8 +77,8 @@ func ParseOptions() *Options { ) flagSet.CreateGroup("source", "Source", - flagSet.StringSliceVarP(&options.Sources, "sources", "s", nil, "specific sources to use for discovery (-s censys,dnsrepo). Use -ls to display all available sources.", goflags.NormalizedStringSliceOptions), - flagSet.StringSliceVarP(&options.ExcludeSources, "exclude-sources", "es", nil, "sources to exclude from enumeration (-es censys,dnsrepo)", goflags.NormalizedStringSliceOptions), + flagSet.StringSliceVarP(&options.Sources, "sources", "s", nil, "specific sources to use for discovery (-s alienvault,commoncrawl). Use -ls to display all available sources.", goflags.NormalizedStringSliceOptions), + flagSet.StringSliceVarP(&options.ExcludeSources, "exclude-sources", "es", nil, "sources to exclude from enumeration (-es alienvault,commoncrawl)", goflags.NormalizedStringSliceOptions), flagSet.BoolVar(&options.All, "all", false, "use all sources for enumeration (slow)"), ) diff --git a/pkg/agent/registry.go b/pkg/agent/registry.go index 15e729b..4ed5900 100644 --- a/pkg/agent/registry.go +++ b/pkg/agent/registry.go @@ -5,6 +5,7 @@ import ( "github.com/projectdiscovery/urlfinder/pkg/source/alienvault" "github.com/projectdiscovery/urlfinder/pkg/source/commoncrawl" "github.com/projectdiscovery/urlfinder/pkg/source/urlscan" + "github.com/projectdiscovery/urlfinder/pkg/source/virustotal" "github.com/projectdiscovery/urlfinder/pkg/source/waybackarchive" mapsutil "github.com/projectdiscovery/utils/maps" ) @@ -14,6 +15,7 @@ var AllSources = map[string]source.Source{ "commoncrawl": &commoncrawl.Source{}, "urlscan": &urlscan.Source{}, "waybackarchive": &waybackarchive.Source{}, + "virustotal": &virustotal.Source{}, } var sourceWarnings = mapsutil.NewSyncLockMap[string, string]( diff --git a/pkg/extractor/regex_extractor.go b/pkg/extractor/regex_extractor.go index 71b1213..787bb00 100644 --- a/pkg/extractor/regex_extractor.go +++ b/pkg/extractor/regex_extractor.go @@ -11,8 +11,9 @@ type RegexUrlExtractor struct { } // NewRegexUrlExtractor creates a new regular expression to extract urls -func NewRegexUrlExtractor() (*RegexUrlExtractor, error) { - extractor, err := regexp.Compile(`(?:http|https)?://(?:www\.)?[a-zA-Z0-9./?=_%:-]*`) +func NewRegexUrlExtractor(query string) (*RegexUrlExtractor, error) { + query = regexp.QuoteMeta(query) + extractor, err := regexp.Compile(`^(?:http://|https://)?(?:www\.)?(?:[a-zA-Z0-9-]+\.)*` + query + `(?:/.*)?$`) if err != nil { return nil, err } diff --git a/pkg/session/session.go b/pkg/session/session.go index aafe3a5..469586a 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -75,7 +75,7 @@ func NewSession(query string, proxy string, multiRateLimiter *ratelimit.MultiLim session.MultiRateLimiter = multiRateLimiter // Create a new extractor object for the current url - extractor, err := extractor.NewRegexUrlExtractor() + extractor, err := extractor.NewRegexUrlExtractor(query) session.Extractor = extractor return session, err diff --git a/pkg/source/alienvault/alienvault.go b/pkg/source/alienvault/alienvault.go index 1cdf23b..14f573d 100644 --- a/pkg/source/alienvault/alienvault.go +++ b/pkg/source/alienvault/alienvault.go @@ -63,8 +63,10 @@ func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session) resp.Body.Close() for _, record := range response.URLList { - results <- source.Result{Source: s.Name(), Value: record.URL, Reference: apiURL} - s.results++ + for _, extractedURL := range sess.Extractor.Extract(record.URL) { + results <- source.Result{Source: s.Name(), Value: extractedURL, Reference: apiURL} + s.results++ + } } if !response.HasNext { diff --git a/pkg/source/urlscan/urlscan.go b/pkg/source/urlscan/urlscan.go index 40a5d97..23cd18a 100644 --- a/pkg/source/urlscan/urlscan.go +++ b/pkg/source/urlscan/urlscan.go @@ -89,8 +89,10 @@ func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session) } for _, url := range data.Results { - results <- source.Result{Source: s.Name(), Value: url.Page.Url, Reference: apiURL} - s.results++ + for _, extractedURL := range sess.Extractor.Extract(url.Page.Url) { + results <- source.Result{Source: s.Name(), Value: extractedURL, Reference: apiURL} + s.results++ + } } if len(data.Results) > 0 { lastResult := data.Results[len(data.Results)-1] diff --git a/pkg/source/virustotal/virustotal.go b/pkg/source/virustotal/virustotal.go new file mode 100644 index 0000000..928c296 --- /dev/null +++ b/pkg/source/virustotal/virustotal.go @@ -0,0 +1,113 @@ +package virustotal + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/projectdiscovery/urlfinder/pkg/session" + "github.com/projectdiscovery/urlfinder/pkg/source" + "github.com/projectdiscovery/urlfinder/pkg/utils" +) + +type response struct { + DetectedUrls []struct { + URL string `json:"url"` + } `json:"detected_urls"` + Subdomains []string `json:"subdomains"` + UndetectedUrls [][]interface{} `json:"undetected_urls"` +} + +type Source struct { + apiKeys []string + timeTaken time.Duration + errors int + results int +} + +func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session) <-chan source.Result { + results := make(chan source.Result) + s.errors = 0 + s.results = 0 + + go func() { + defer func(startTime time.Time) { + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) + + randomApiKey := utils.PickRandom(s.apiKeys, s.Name()) + if randomApiKey == "" { + return + } + + searchURL := fmt.Sprintf("https://www.virustotal.com/vtapi/v2/domain/report?apikey=%s&domain=%s", randomApiKey, rootUrl) + resp, err := sess.SimpleGet(ctx, searchURL) + if err != nil { + results <- source.Result{Source: s.Name(), Type: source.Error, Error: err} + s.errors++ + sess.DiscardHTTPResponse(resp) + return + } + defer resp.Body.Close() + + var data response + err = json.NewDecoder(resp.Body).Decode(&data) + if err != nil { + results <- source.Result{Source: s.Name(), Type: source.Error, Error: err} + s.errors++ + return + } + + for _, detectedUrl := range data.DetectedUrls { + for _, extractedURL := range sess.Extractor.Extract(detectedUrl.URL) { + results <- source.Result{Source: s.Name(), Value: extractedURL} + s.results++ + } + } + for _, subdomain := range data.Subdomains { + for _, extractedURL := range sess.Extractor.Extract(subdomain) { + results <- source.Result{Source: s.Name(), Value: extractedURL} + s.results++ + } + } + + for _, undetectedUrl := range data.UndetectedUrls { + if len(undetectedUrl) > 0 { + if urlString, ok := undetectedUrl[0].(string); ok { + for _, extractedURL := range sess.Extractor.Extract(urlString) { + results <- source.Result{Source: s.Name(), Value: extractedURL} + s.results++ + } + } + } + } + + }() + return results +} + +func (s *Source) Name() string { + return "virustotal" +} + +func (s *Source) IsDefault() bool { + return true +} + +func (s *Source) NeedsKey() bool { + return true +} + +func (s *Source) AddApiKeys(keys []string) { + s.apiKeys = keys +} + +func (s *Source) Statistics() source.Statistics { + return source.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + } +}