Skip to content

Commit

Permalink
fix: reduce go routines and add debug logging (#394)
Browse files Browse the repository at this point in the history
The number of go routines could explode the more Publishers there are,
despite out bottleneck being the GitHub API rate limiting.

Try to be more conservative with go routines.
  • Loading branch information
bfabio authored Jun 3, 2024
1 parent 423556c commit 2c242f5
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 2 deletions.
8 changes: 6 additions & 2 deletions crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ func (c *Crawler) CrawlPublishers(publishers []common.Publisher) error {
// Process every item in publishers.
for _, publisher := range publishers {
c.publishersWg.Add(1)
go c.ScanPublisher(publisher)
c.ScanPublisher(publisher)
}

// Close the repositories channel when all the publisher goroutines are done
Expand All @@ -151,11 +151,15 @@ func (c *Crawler) crawl() error {

// Get cpus number
numCPUs := runtime.NumCPU()
log.Debugf("CPUs #: %d", numCPUs)

// Process the repositories in order to retrieve the files.
for i := 0; i < numCPUs; i++ {
c.repositoriesWg.Add(1)
go c.ProcessRepositories(reposChan)
go func(id int) {
log.Debugf("Starting ProcessRepositories() goroutine (#%d)", id)
c.ProcessRepositories(reposChan)
}(i)
}

for repo := range c.repositories {
Expand Down
4 changes: 4 additions & 0 deletions scanner/bitbucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ func NewBitBucketScanner() Scanner {
func (scanner BitBucketScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("BitBucketScanner.ScanGroupOfRepos(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")

if len(splitted) != 1 {
Expand Down Expand Up @@ -82,6 +84,8 @@ func (scanner BitBucketScanner) ScanGroupOfRepos(
func (scanner BitBucketScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("BitBucketScanner.ScanRepo(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
if len(splitted) != 2 {
return fmt.Errorf("bitbucket URL %s doesn't look like a repo", url.String())
Expand Down
4 changes: 4 additions & 0 deletions scanner/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ func NewGitHubScanner() Scanner {
func (scanner GitHubScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitHubScanner.ScanGroupOfRepos(%s)", url.String())

opt := &github.RepositoryListByOrgOptions{}

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
Expand Down Expand Up @@ -128,6 +130,8 @@ func (scanner GitHubScanner) ScanGroupOfRepos(
func (scanner GitHubScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitHubScanner.ScanRepo(%s)", url.String())

splitted := strings.Split(strings.Trim(url.Path, "/"), "/")
if len(splitted) != 2 {
return fmt.Errorf("doesn't look like a GitHub repo %s", url.String())
Expand Down
5 changes: 5 additions & 0 deletions scanner/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"

"github.com/italia/publiccode-crawler/v4/common"
log "github.com/sirupsen/logrus"
"github.com/xanzy/go-gitlab"
)

Expand All @@ -21,6 +22,8 @@ func NewGitLabScanner() Scanner {
func (scanner GitLabScanner) ScanGroupOfRepos(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitLabScanner.ScanGroupOfRepos(%s)", url.String())

apiURL, _ := url.Parse("/api/v4")
git, err := gitlab.NewClient(os.Getenv("GITLAB_TOKEN"), gitlab.WithBaseURL(apiURL.String()))
if err != nil {
Expand Down Expand Up @@ -68,6 +71,8 @@ func (scanner GitLabScanner) ScanGroupOfRepos(
func (scanner GitLabScanner) ScanRepo(
url url.URL, publisher common.Publisher, repositories chan common.Repository,
) error {
log.Debugf("GitLabScanner.ScanRepo(%s)", url.String())

apiURL, _ := url.Parse("/api/v4")
git, err := gitlab.NewClient(os.Getenv("GITLAB_TOKEN"), gitlab.WithBaseURL(apiURL.String()))
if err != nil {
Expand Down

0 comments on commit 2c242f5

Please sign in to comment.