Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

urlfinder v0.0.2 #30

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Base
FROM golang:1.23.2-alpine AS builder
RUN apk add --no-cache build-base
WORKDIR /app
COPY . /app
RUN go mod download
RUN go build ./cmd/urlfinder

# Release
FROM alpine:3.20.3
RUN apk -U upgrade --no-cache \
&& apk add --no-cache bind-tools ca-certificates
COPY --from=builder /app/urlfinder /usr/local/bin/

ENTRYPOINT ["urlfinder"]
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ INPUT:
-d, -list string[] target domain or list of domains

SOURCE:
-s, -sources string[] specific sources for discovery (e.g., -s censys,dnsrepo)
-es, -exclude-sources string[] sources to exclude (e.g., -es censys,dnsrepo)
-s, -sources string[] specific sources for discovery (e.g., -s alienvault,commoncrawl)
-es, -exclude-sources string[] sources to exclude (e.g., -es alienvault,commoncrawl)
-all use all sources (may be slower)

FILTER:
Expand Down
11 changes: 6 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ require (
github.com/pkg/errors v0.9.1
github.com/projectdiscovery/dnsx v1.1.6
github.com/projectdiscovery/fdmax v0.0.4
github.com/projectdiscovery/goflags v0.1.64
github.com/projectdiscovery/gologger v1.1.30
github.com/projectdiscovery/ratelimit v0.0.26
github.com/projectdiscovery/utils v0.2.18
github.com/projectdiscovery/goflags v0.1.65
github.com/projectdiscovery/gologger v1.1.32
github.com/projectdiscovery/ratelimit v0.0.62
github.com/projectdiscovery/utils v0.2.19
github.com/rs/xid v1.5.0
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
gopkg.in/yaml.v3 v3.0.1
Expand Down Expand Up @@ -66,7 +66,7 @@ require (
github.com/projectdiscovery/blackrock v0.0.1 // indirect
github.com/projectdiscovery/cdncheck v1.0.9 // indirect
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 // indirect
github.com/projectdiscovery/retryabledns v1.0.83 // indirect
github.com/projectdiscovery/retryabledns v1.0.85 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
github.com/shirou/gopsutil/v3 v3.23.7 // indirect
Expand All @@ -93,6 +93,7 @@ require (
golang.org/x/sys v0.25.0 // indirect
golang.org/x/term v0.24.0 // indirect
golang.org/x/text v0.18.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.33.0 // indirect
Expand Down
22 changes: 12 additions & 10 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -151,18 +151,18 @@ github.com/projectdiscovery/dnsx v1.1.6 h1:QdKVKC0n/fpgaB4q3s6A2wn+qqg75CY0XxNkU
github.com/projectdiscovery/dnsx v1.1.6/go.mod h1:9rkLQzJHxQ26qiD1PhfoJDrhqCVN8lKLsxiAON1uDxM=
github.com/projectdiscovery/fdmax v0.0.4 h1:K9tIl5MUZrEMzjvwn/G4drsHms2aufTn1xUdeVcmhmc=
github.com/projectdiscovery/fdmax v0.0.4/go.mod h1:oZLqbhMuJ5FmcoaalOm31B1P4Vka/CqP50nWjgtSz+I=
github.com/projectdiscovery/goflags v0.1.64 h1:FDfwdt9N97Hi8OuhbkDlKtVttpc/CRMIWQVa08VsHsI=
github.com/projectdiscovery/goflags v0.1.64/go.mod h1:3FyHIVQtnycNOc1LE3O1jj/XR5XuMdF9QfHd0ujhnX4=
github.com/projectdiscovery/gologger v1.1.30 h1:Qc3tYcDwl7nh0IVotctAeXurp12sf+PQ3zu00AmLUds=
github.com/projectdiscovery/gologger v1.1.30/go.mod h1:XCNafm7f4TSOX8pgowmO4RQYNdL3qFFejPN/BBzxXPo=
github.com/projectdiscovery/goflags v0.1.65 h1:rjoj+5lP/FDzgeM0WILUTX9AOOnw0J0LXtl8P1SVeGE=
github.com/projectdiscovery/goflags v0.1.65/go.mod h1:cg6+yrLlaekP1hnefBc/UXbH1YGWa0fuzEW9iS1aG4g=
github.com/projectdiscovery/gologger v1.1.32 h1:j2Y2cxypELi9zbj/7UxDTdv9UWQl7ALJrwc7wV5snuY=
github.com/projectdiscovery/gologger v1.1.32/go.mod h1:w62+CIcwygjSpSnV/3Xh+jj4bgv6lfL7kx2kA/Bl09U=
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 h1:ZScLodGSezQVwsQDtBSMFp72WDq0nNN+KE/5DHKY5QE=
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983/go.mod h1:3G3BRKui7nMuDFAZKR/M2hiOLtaOmyukT20g88qRQjI=
github.com/projectdiscovery/ratelimit v0.0.26 h1:sxZCh72lMpQ1YNnJOWrJ+uZE9GFWdVE58LOArOc6c+4=
github.com/projectdiscovery/ratelimit v0.0.26/go.mod h1:2NHqfqqb9xAnqW+Ztd8AzzNi+JP38Kcdhb8cnbfX9sI=
github.com/projectdiscovery/retryabledns v1.0.83 h1:qevo2GBDr/BSSb5W9PxudXhu477uOz1mH1Kk0maGfVY=
github.com/projectdiscovery/retryabledns v1.0.83/go.mod h1:t9DWqUdr3wdc1iIuAozefhDpPfIqDnu0q9BpJO4Mq5M=
github.com/projectdiscovery/utils v0.2.18 h1:uV5JIYKIq8gXdu9wrCeUq3yqPiSCokTrKuLuZwXMSSw=
github.com/projectdiscovery/utils v0.2.18/go.mod h1:gcKxBTK1eNF+K8vzD62sMMVFf1eJoTgEiS81mp7CQjI=
github.com/projectdiscovery/ratelimit v0.0.62 h1:HmZ78oYWTbthHIImirdpgBwAr6r5pKjgoYBvz5c+ZAM=
github.com/projectdiscovery/ratelimit v0.0.62/go.mod h1:qY6G9Rrzov931loxqGRHYWzY/EUvbpNW+FxlIsBfDVA=
github.com/projectdiscovery/retryabledns v1.0.85 h1:9aLPWu0bcmtK8bPm/JJyfts28hgWf74UPsSG0KMXrqo=
github.com/projectdiscovery/retryabledns v1.0.85/go.mod h1:cZe0rydjby+ns2oIY7JmywHvtkwWxPzp3PuQz1rV50E=
github.com/projectdiscovery/utils v0.2.19 h1:vN1RGixpRVsn6VCnOACTLZyntV1o5FYqUElYpQUYND0=
github.com/projectdiscovery/utils v0.2.19/go.mod h1:M/uyy352fsl9juI8YHZY+r79auR2y1U9lSdkQv6lZpc=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
Expand Down Expand Up @@ -294,6 +294,8 @@ golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224=
golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
Expand Down
2 changes: 1 addition & 1 deletion internal/runner/banners.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ const banner = `
const ToolName = `urlfinder`

// Version is the current version of urlfinder
const version = `v0.0.1`
const version = `v0.0.2`

// showBanner is used to show the banner to the user
func showBanner() {
Expand Down
3 changes: 3 additions & 0 deletions internal/runner/enumerate.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ func (r *Runner) EnumerateSingleQueryWithCtx(ctx context.Context, query string,
case source.Error:
gologger.Warning().Msgf("Could not run source %s: %s\n", result.Source, result.Error)
case source.Url:
if result.Value == "" {
continue
}

url := replacer.Replace(result.Value)

Expand Down
4 changes: 2 additions & 2 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ func ParseOptions() *Options {
)

flagSet.CreateGroup("source", "Source",
flagSet.StringSliceVarP(&options.Sources, "sources", "s", nil, "specific sources to use for discovery (-s censys,dnsrepo). Use -ls to display all available sources.", goflags.NormalizedStringSliceOptions),
flagSet.StringSliceVarP(&options.ExcludeSources, "exclude-sources", "es", nil, "sources to exclude from enumeration (-es censys,dnsrepo)", goflags.NormalizedStringSliceOptions),
flagSet.StringSliceVarP(&options.Sources, "sources", "s", nil, "specific sources to use for discovery (-s alienvault,commoncrawl). Use -ls to display all available sources.", goflags.NormalizedStringSliceOptions),
flagSet.StringSliceVarP(&options.ExcludeSources, "exclude-sources", "es", nil, "sources to exclude from enumeration (-es alienvault,commoncrawl)", goflags.NormalizedStringSliceOptions),
flagSet.BoolVar(&options.All, "all", false, "use all sources for enumeration (slow)"),
)

Expand Down
2 changes: 2 additions & 0 deletions pkg/agent/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"github.com/projectdiscovery/urlfinder/pkg/source/alienvault"
"github.com/projectdiscovery/urlfinder/pkg/source/commoncrawl"
"github.com/projectdiscovery/urlfinder/pkg/source/urlscan"
"github.com/projectdiscovery/urlfinder/pkg/source/virustotal"
"github.com/projectdiscovery/urlfinder/pkg/source/waybackarchive"
mapsutil "github.com/projectdiscovery/utils/maps"
)
Expand All @@ -14,6 +15,7 @@ var AllSources = map[string]source.Source{
"commoncrawl": &commoncrawl.Source{},
"urlscan": &urlscan.Source{},
"waybackarchive": &waybackarchive.Source{},
"virustotal": &virustotal.Source{},
}

var sourceWarnings = mapsutil.NewSyncLockMap[string, string](
Expand Down
5 changes: 3 additions & 2 deletions pkg/extractor/regex_extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ type RegexUrlExtractor struct {
}

// NewRegexUrlExtractor creates a new regular expression to extract urls
func NewRegexUrlExtractor() (*RegexUrlExtractor, error) {
extractor, err := regexp.Compile(`(?:http|https)?://(?:www\.)?[a-zA-Z0-9./?=_%:-]*`)
func NewRegexUrlExtractor(query string) (*RegexUrlExtractor, error) {
query = regexp.QuoteMeta(query)
extractor, err := regexp.Compile(`^(?:http://|https://)?(?:www\.)?(?:[a-zA-Z0-9-]+\.)*` + query + `(?:/.*)?$`)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/session/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func NewSession(query string, proxy string, multiRateLimiter *ratelimit.MultiLim
session.MultiRateLimiter = multiRateLimiter

// Create a new extractor object for the current url
extractor, err := extractor.NewRegexUrlExtractor()
extractor, err := extractor.NewRegexUrlExtractor(query)
session.Extractor = extractor

return session, err
Expand Down
6 changes: 4 additions & 2 deletions pkg/source/alienvault/alienvault.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,10 @@ func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session)
resp.Body.Close()

for _, record := range response.URLList {
results <- source.Result{Source: s.Name(), Value: record.URL, Reference: apiURL}
s.results++
for _, extractedURL := range sess.Extractor.Extract(record.URL) {
results <- source.Result{Source: s.Name(), Value: extractedURL, Reference: apiURL}
s.results++
}
}

if !response.HasNext {
Expand Down
6 changes: 4 additions & 2 deletions pkg/source/urlscan/urlscan.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,10 @@ func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session)
}

for _, url := range data.Results {
results <- source.Result{Source: s.Name(), Value: url.Page.Url, Reference: apiURL}
s.results++
for _, extractedURL := range sess.Extractor.Extract(url.Page.Url) {
results <- source.Result{Source: s.Name(), Value: extractedURL, Reference: apiURL}
s.results++
}
}
if len(data.Results) > 0 {
lastResult := data.Results[len(data.Results)-1]
Expand Down
113 changes: 113 additions & 0 deletions pkg/source/virustotal/virustotal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package virustotal

import (
"context"
"encoding/json"
"fmt"
"time"

"github.com/projectdiscovery/urlfinder/pkg/session"
"github.com/projectdiscovery/urlfinder/pkg/source"
"github.com/projectdiscovery/urlfinder/pkg/utils"
)

type response struct {
DetectedUrls []struct {
URL string `json:"url"`
} `json:"detected_urls"`
Subdomains []string `json:"subdomains"`
UndetectedUrls [][]interface{} `json:"undetected_urls"`
}

type Source struct {
apiKeys []string
timeTaken time.Duration
errors int
results int
}

func (s *Source) Run(ctx context.Context, rootUrl string, sess *session.Session) <-chan source.Result {
results := make(chan source.Result)
s.errors = 0
s.results = 0

go func() {
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(results)
}(time.Now())

randomApiKey := utils.PickRandom(s.apiKeys, s.Name())
if randomApiKey == "" {
return
}

searchURL := fmt.Sprintf("https://www.virustotal.com/vtapi/v2/domain/report?apikey=%s&domain=%s", randomApiKey, rootUrl)
resp, err := sess.SimpleGet(ctx, searchURL)
if err != nil {
results <- source.Result{Source: s.Name(), Type: source.Error, Error: err}
s.errors++
sess.DiscardHTTPResponse(resp)
return
}
defer resp.Body.Close()

var data response
err = json.NewDecoder(resp.Body).Decode(&data)
if err != nil {
results <- source.Result{Source: s.Name(), Type: source.Error, Error: err}
s.errors++
return
}

for _, detectedUrl := range data.DetectedUrls {
for _, extractedURL := range sess.Extractor.Extract(detectedUrl.URL) {
results <- source.Result{Source: s.Name(), Value: extractedURL}
s.results++
}
}
for _, subdomain := range data.Subdomains {
for _, extractedURL := range sess.Extractor.Extract(subdomain) {
results <- source.Result{Source: s.Name(), Value: extractedURL}
s.results++
}
}

for _, undetectedUrl := range data.UndetectedUrls {
if len(undetectedUrl) > 0 {
if urlString, ok := undetectedUrl[0].(string); ok {
for _, extractedURL := range sess.Extractor.Extract(urlString) {
results <- source.Result{Source: s.Name(), Value: extractedURL}
s.results++
}
}
}
}

}()
return results
}

func (s *Source) Name() string {
return "virustotal"
}

func (s *Source) IsDefault() bool {
return true
}

func (s *Source) NeedsKey() bool {
return true
}

func (s *Source) AddApiKeys(keys []string) {
s.apiKeys = keys
}

func (s *Source) Statistics() source.Statistics {
return source.Statistics{
Errors: s.errors,
Results: s.results,
TimeTaken: s.timeTaken,
}
}
Loading