diff --git a/backend/app/cmd/server.go b/backend/app/cmd/server.go index afcfbad4ba..5010945d82 100644 --- a/backend/app/cmd/server.go +++ b/backend/app/cmd/server.go @@ -4,6 +4,7 @@ import ( "context" "embed" "fmt" + "net" "net/http" "net/url" "os" @@ -495,6 +496,12 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { } log.Printf("[DEBUG] image service for url=%s, EditDuration=%v", imageService.ImageAPI, imageService.EditDuration) + // Extract second level domain from s.RemarkURL. It can be and IP like http//127.0.0.1 in which case we need to use whole IP as domain + allowedDomains, err := s.getAllowedDomains() + if err != nil { + return nil, fmt.Errorf("failed to make allowed domains list: %w", err) + } + dataService := &service.DataStore{ Engine: storeEngine, EditDuration: s.EditDuration, @@ -504,7 +511,7 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { MaxVotes: s.MaxVotes, PositiveScore: s.PositiveScore, ImageService: imageService, - TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}), + TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}, allowedDomains), RestrictedWordsMatcher: service.NewRestrictedWordsMatcher(service.StaticRestrictedWordsLister{Words: s.RestrictedWords}), } dataService.RestrictSameIPVotes.Enabled = s.RestrictVoteIP @@ -633,6 +640,21 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { }, nil } +func (s *ServerCommand) getAllowedDomains() ([]string, error) { + remark42URL, err := url.Parse(s.RemarkURL) + if err != nil { + return nil, fmt.Errorf("failed to parse REMARK_URL: %w", err) + } + remark42Domain := remark42URL.Hostname() + if net.ParseIP(remark42Domain) == nil && len(strings.Split(remark42Domain, ".")) > 2 { + remark42Domain = strings.Join(strings.Split(remark42Domain, ".")[len(strings.Split(remark42Domain, "."))-2:], ".") + } + + allowedDomains := s.AllowedHosts + allowedDomains = append(allowedDomains, remark42Domain) + return allowedDomains, nil +} + // Run all application objects func (a *serverApp) run(ctx context.Context) error { if a.AdminPasswd != "" { diff --git a/backend/app/rest/api/admin_test.go b/backend/app/rest/api/admin_test.go index d84dc43daf..a4c0283be6 100644 --- a/backend/app/rest/api/admin_test.go +++ b/backend/app/rest/api/admin_test.go @@ -111,7 +111,7 @@ func TestAdmin_Title(t *testing.T) { ts, srv, teardown := startupT(t) defer teardown() - srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) + srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"127.0.0.1"}) tss := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.String() == "/post1" { _, err := w.Write([]byte("post1 blah 123 2222")) diff --git a/backend/app/rest/api/rest_public_test.go b/backend/app/rest/api/rest_public_test.go index 65234398d3..57cdf29e37 100644 --- a/backend/app/rest/api/rest_public_test.go +++ b/backend/app/rest/api/rest_public_test.go @@ -480,7 +480,7 @@ func TestRest_FindUserComments(t *testing.T) { func TestRest_FindUserComments_CWE_918(t *testing.T) { ts, srv, teardown := startupT(t) - srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) // required for extracting the title, bad URL test + srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"radio-t.com"}) // required for extracting the title, bad URL test defer srv.DataService.TitleExtractor.Close() defer teardown() @@ -496,7 +496,8 @@ func TestRest_FindUserComments_CWE_918(t *testing.T) { assert.False(t, backendRequestedArbitraryServer) addComment(t, arbitraryURLComment, ts) - assert.True(t, backendRequestedArbitraryServer) + assert.False(t, backendRequestedArbitraryServer, + "no request is expected to the test server as it's not in the list of the allowed domains for the title extractor") res, code := get(t, ts.URL+"/api/v1/comments?site=remark42&user=provider1_dev") assert.Equal(t, http.StatusOK, code) diff --git a/backend/app/store/service/service_test.go b/backend/app/store/service/service_test.go index 74245d9398..e86f765a49 100644 --- a/backend/app/store/service/service_test.go +++ b/backend/app/store/service/service_test.go @@ -131,7 +131,7 @@ func TestService_CreateFromPartialWithTitle(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})} defer b.Close() postPath := "/post/42" @@ -195,7 +195,7 @@ func TestService_SetTitle(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})} defer b.Close() comment := store.Comment{ Text: "text", @@ -1658,10 +1658,10 @@ func TestService_DoubleClose_Static(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} - b.Close() + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})} + assert.NoError(t, b.Close()) // second call should not result in panic or errors - b.Close() + assert.NoError(t, b.Close()) } // makes new boltdb, put two records diff --git a/backend/app/store/service/title.go b/backend/app/store/service/title.go index f303939039..22c19cc27e 100644 --- a/backend/app/store/service/title.go +++ b/backend/app/store/service/title.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "net/http" + "net/url" "strings" "time" @@ -19,14 +20,16 @@ const ( // TitleExtractor gets html title from remote page, cached type TitleExtractor struct { - client http.Client - cache lcw.LoadingCache + client http.Client + cache lcw.LoadingCache + allowedDomains []string } // NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache -func NewTitleExtractor(client http.Client) *TitleExtractor { +func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor { res := TitleExtractor{ - client: client, + client: client, + allowedDomains: allowedDomains, } var err error res.cache, err = lcw.NewExpirableCache(lcw.TTL(teCacheTTL), lcw.MaxKeySize(teCacheMaxRecs)) @@ -38,13 +41,28 @@ func NewTitleExtractor(client http.Client) *TitleExtractor { } // Get page for url and return title -func (t *TitleExtractor) Get(url string) (string, error) { +func (t *TitleExtractor) Get(pageURL string) (string, error) { + // parse domain of the URL and check if it's in the allowed list + u, err := url.Parse(pageURL) + if err != nil { + return "", fmt.Errorf("failed to parse url %s: %w", pageURL, err) + } + allowed := false + for _, domain := range t.allowedDomains { + if strings.HasSuffix(u.Hostname(), domain) { + allowed = true + break + } + } + if !allowed { + return "", fmt.Errorf("domain %s is not allowed", u.Host) + } client := http.Client{Timeout: t.client.Timeout, Transport: t.client.Transport} defer client.CloseIdleConnections() - b, err := t.cache.Get(url, func() (interface{}, error) { - resp, err := client.Get(url) - if err != nil { - return nil, fmt.Errorf("failed to load page %s: %w", url, err) + b, err := t.cache.Get(pageURL, func() (interface{}, error) { + resp, e := client.Get(pageURL) + if e != nil { + return nil, fmt.Errorf("failed to load page %s: %w", pageURL, e) } defer func() { if err = resp.Body.Close(); err != nil { @@ -52,19 +70,19 @@ func (t *TitleExtractor) Get(url string) (string, error) { } }() if resp.StatusCode != 200 { - return nil, fmt.Errorf("can't load page %s, code %d", url, resp.StatusCode) + return nil, fmt.Errorf("can't load page %s, code %d", pageURL, resp.StatusCode) } title, ok := t.getTitle(resp.Body) if !ok { - return nil, fmt.Errorf("can't get title for %s", url) + return nil, fmt.Errorf("can't get title for %s", pageURL) } return title, nil }) // on error save result (empty string) to cache too and return "" title if err != nil { - _, _ = t.cache.Get(url, func() (interface{}, error) { return "", nil }) + _, _ = t.cache.Get(pageURL, func() (interface{}, error) { return "", nil }) return "", err } diff --git a/backend/app/store/service/title_test.go b/backend/app/store/service/title_test.go index 3e3bd79b39..b57d1235fd 100644 --- a/backend/app/store/service/title_test.go +++ b/backend/app/store/service/title_test.go @@ -28,7 +28,7 @@ func TestTitle_GetTitle(t *testing.T) { {` 2222`, false, ""}, } - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{}) defer ex.Close() for i, tt := range tbl { tt := tt @@ -41,7 +41,7 @@ func TestTitle_GetTitle(t *testing.T) { } func TestTitle_Get(t *testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -75,7 +75,7 @@ func TestTitle_GetConcurrent(t *testing.T) { for n := 0; n < 1000; n++ { body += "something something blah blah\n" } - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -104,7 +104,7 @@ func TestTitle_GetConcurrent(t *testing.T) { } func TestTitle_GetFailed(t *testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -124,9 +124,9 @@ func TestTitle_GetFailed(t *testing.T) { assert.Equal(t, int32(1), atomic.LoadInt32(&hits), "hit once, errors cached") } -func TestTitle_DoubleClosed(*testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) - ex.Close() +func TestTitle_DoubleClosed(t *testing.T) { + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{}) + assert.NoError(t, ex.Close()) // second call should not result in panic - ex.Close() + assert.NoError(t, ex.Close()) }