From 0c25b58465916d2ce1ca1df7d0b46090e39d832a Mon Sep 17 00:00:00 2001 From: g0rbe Date: Fri, 31 Mar 2023 07:33:17 +0200 Subject: [PATCH] update to use direct connection to the DB and add option to verbose logging --- config.go | 12 ++++-------- main.go | 15 +++++++++------ scanner.conf.example | 10 +++++----- workers.go | 13 +++++++++++-- 4 files changed, 29 insertions(+), 21 deletions(-) diff --git a/config.go b/config.go index 7a7a5a9..047ab11 100644 --- a/config.go +++ b/config.go @@ -10,15 +10,15 @@ import ( type Config struct { LogURI string `yaml:"LogURI"` - APIKey string `yaml:"APIKey"` + MongoURI string `yaml:"MongoURI"` UptimeHook string `yaml:"UptimeHook"` NumWorkers int `yaml:"NumWorkers"` BatchSize int `yaml:"BatchSize"` ParallelFetch int `yaml:"ParallelFetch"` StartIndex int `yaml:"StartIndex"` - Server string `yaml:"Server"` BufferSize int `yaml:"BufferSize"` SkipPreCert bool `yaml:"SkipPreCert"` + Verbose bool `yaml:"Verbose"` m *sync.Mutex } @@ -73,8 +73,8 @@ func ParseConfig(path string) error { switch { case Conf.LogURI == "": return fmt.Errorf("LogURI is missing") - case Conf.APIKey == "": - return fmt.Errorf("APIKey is missing") + case Conf.MongoURI == "": + return fmt.Errorf("MongoURI is missing") } if Conf.NumWorkers < 0 { @@ -107,10 +107,6 @@ func ParseConfig(path string) error { Conf.StartIndex = 0 } - if Conf.Server == "" { - Conf.Server = "https://columbus.elmasy.com" - } - if Conf.BufferSize < 0 { return fmt.Errorf("BufferSize is negative") } diff --git a/main.go b/main.go index 3338861..e6f5932 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,7 @@ import ( "syscall" "time" - sdk "github.com/elmasy-com/columbus-sdk" + "github.com/elmasy-com/columbus-sdk/db" "github.com/g0rbe/slitu" ct "github.com/google/certificate-transparency-go" "github.com/google/certificate-transparency-go/client" @@ -76,13 +76,12 @@ func main() { os.Exit(1) } - // Set SDK - sdk.SetURI(Conf.Server) - - if err := sdk.GetDefaultUser(Conf.APIKey); err != nil { - fmt.Fprintf(os.Stderr, "Failed to get Columbus user: %s\n", err) + err = db.Connect(Conf.MongoURI) + if err != nil { + fmt.Fprintf(os.Stderr, "Failed to connect to MongoDB: %s\n", err) os.Exit(1) } + defer db.Disconnect() IndexRangeChan = make(chan IndexRange) LeafEntryChan = make(chan LeafEntry, Conf.BufferSize) @@ -116,6 +115,10 @@ infiniteLoop: break } + if Conf.Verbose { + fmt.Printf("Log size: %d\n", size) + } + for Conf.GetIndex() < size { select { diff --git a/scanner.conf.example b/scanner.conf.example index de3d108..aabc035 100644 --- a/scanner.conf.example +++ b/scanner.conf.example @@ -4,8 +4,8 @@ # URI to scan LogURI: -# API key -APIKey: +# MongoDB URI to connect to +MongoURI: # UptimeHook is called every 60 second (if not empty) to indicate, that the scanner is running UptimeHook: @@ -24,11 +24,11 @@ ParallelFetch: 2 # Start index of the logs (default: 0) StartIndex: 0 -# Server to insert to (default "https://columbus.elmasy.com") -Server: https://columbus.elmasy.com - # Buffer size to store fetched certs (default: 5000) BufferSize: 5000 # Skip precertificates (default: false) SkipPreCert: false + +# Verbose logging (default: false) +Verbose: false \ No newline at end of file diff --git a/workers.go b/workers.go index e6c10ae..412d797 100644 --- a/workers.go +++ b/workers.go @@ -9,7 +9,7 @@ import ( "sync" "time" - sdk "github.com/elmasy-com/columbus-sdk" + "github.com/elmasy-com/columbus-sdk/db" "github.com/elmasy-com/columbus-sdk/fault" "github.com/elmasy-com/elnet/domain" "github.com/elmasy-com/slices" @@ -129,12 +129,21 @@ func InsertWorker(id int, wg *sync.WaitGroup) { } } + var d string + // Write only unique and valid domains for i := range domains { if !domain.IsValid(domains[i]) { continue } - if err := sdk.Insert(domains[i]); err != nil { + + d = domain.Clean(domains[i]) + + if Conf.Verbose { + fmt.Printf("Inserting %s ...\n", d) + } + + if err := db.Insert(d); err != nil { if errors.Is(err, fault.ErrPublicSuffix) { continue }