From 25a8d88f02b0348183ab55b35e05b834826040e7 Mon Sep 17 00:00:00 2001 From: Dmitry Verkhoturov Date: Tue, 10 Oct 2023 22:34:57 +0200 Subject: [PATCH] limit TitleExtractor to allow only Remark42 whitelisted domains Allowed domains consist of `REMARK_URL` second-level domain (or whole IP in case it's IP like 127.0.0.1) and `ALLOWED_HOSTS`. That is needed to prevent Remark42 from asking arbitrary servers and storing the page title as the comment.PostTitle. Previous behaviour allowed the caller of the API to create a comment with an arbitrary URL and learn the title of the page, which might be accessible to the server Remark42 is installed on but not to the user outside that network (CWE-918). --- backend/app/cmd/server.go | 24 ++++++++++++- backend/app/rest/api/admin_test.go | 2 +- backend/app/rest/api/rest_public_test.go | 5 +-- backend/app/store/service/service_test.go | 10 +++--- backend/app/store/service/title.go | 42 ++++++++++++++++------- backend/app/store/service/title_test.go | 16 ++++----- 6 files changed, 70 insertions(+), 29 deletions(-) diff --git a/backend/app/cmd/server.go b/backend/app/cmd/server.go index afcfbad4ba..5010945d82 100644 --- a/backend/app/cmd/server.go +++ b/backend/app/cmd/server.go @@ -4,6 +4,7 @@ import ( "context" "embed" "fmt" + "net" "net/http" "net/url" "os" @@ -495,6 +496,12 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { } log.Printf("[DEBUG] image service for url=%s, EditDuration=%v", imageService.ImageAPI, imageService.EditDuration) + // Extract second level domain from s.RemarkURL. It can be and IP like http//127.0.0.1 in which case we need to use whole IP as domain + allowedDomains, err := s.getAllowedDomains() + if err != nil { + return nil, fmt.Errorf("failed to make allowed domains list: %w", err) + } + dataService := &service.DataStore{ Engine: storeEngine, EditDuration: s.EditDuration, @@ -504,7 +511,7 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { MaxVotes: s.MaxVotes, PositiveScore: s.PositiveScore, ImageService: imageService, - TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}), + TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}, allowedDomains), RestrictedWordsMatcher: service.NewRestrictedWordsMatcher(service.StaticRestrictedWordsLister{Words: s.RestrictedWords}), } dataService.RestrictSameIPVotes.Enabled = s.RestrictVoteIP @@ -633,6 +640,21 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) { }, nil } +func (s *ServerCommand) getAllowedDomains() ([]string, error) { + remark42URL, err := url.Parse(s.RemarkURL) + if err != nil { + return nil, fmt.Errorf("failed to parse REMARK_URL: %w", err) + } + remark42Domain := remark42URL.Hostname() + if net.ParseIP(remark42Domain) == nil && len(strings.Split(remark42Domain, ".")) > 2 { + remark42Domain = strings.Join(strings.Split(remark42Domain, ".")[len(strings.Split(remark42Domain, "."))-2:], ".") + } + + allowedDomains := s.AllowedHosts + allowedDomains = append(allowedDomains, remark42Domain) + return allowedDomains, nil +} + // Run all application objects func (a *serverApp) run(ctx context.Context) error { if a.AdminPasswd != "" { diff --git a/backend/app/rest/api/admin_test.go b/backend/app/rest/api/admin_test.go index d84dc43daf..a4c0283be6 100644 --- a/backend/app/rest/api/admin_test.go +++ b/backend/app/rest/api/admin_test.go @@ -111,7 +111,7 @@ func TestAdmin_Title(t *testing.T) { ts, srv, teardown := startupT(t) defer teardown() - srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) + srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"127.0.0.1"}) tss := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.String() == "/post1" { _, err := w.Write([]byte("post1 blah 123 2222")) diff --git a/backend/app/rest/api/rest_public_test.go b/backend/app/rest/api/rest_public_test.go index 65234398d3..57cdf29e37 100644 --- a/backend/app/rest/api/rest_public_test.go +++ b/backend/app/rest/api/rest_public_test.go @@ -480,7 +480,7 @@ func TestRest_FindUserComments(t *testing.T) { func TestRest_FindUserComments_CWE_918(t *testing.T) { ts, srv, teardown := startupT(t) - srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) // required for extracting the title, bad URL test + srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"radio-t.com"}) // required for extracting the title, bad URL test defer srv.DataService.TitleExtractor.Close() defer teardown() @@ -496,7 +496,8 @@ func TestRest_FindUserComments_CWE_918(t *testing.T) { assert.False(t, backendRequestedArbitraryServer) addComment(t, arbitraryURLComment, ts) - assert.True(t, backendRequestedArbitraryServer) + assert.False(t, backendRequestedArbitraryServer, + "no request is expected to the test server as it's not in the list of the allowed domains for the title extractor") res, code := get(t, ts.URL+"/api/v1/comments?site=remark42&user=provider1_dev") assert.Equal(t, http.StatusOK, code) diff --git a/backend/app/store/service/service_test.go b/backend/app/store/service/service_test.go index 74245d9398..e86f765a49 100644 --- a/backend/app/store/service/service_test.go +++ b/backend/app/store/service/service_test.go @@ -131,7 +131,7 @@ func TestService_CreateFromPartialWithTitle(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})} defer b.Close() postPath := "/post/42" @@ -195,7 +195,7 @@ func TestService_SetTitle(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})} defer b.Close() comment := store.Comment{ Text: "text", @@ -1658,10 +1658,10 @@ func TestService_DoubleClose_Static(t *testing.T) { eng, teardown := prepStoreEngine(t) defer teardown() b := DataStore{Engine: eng, AdminStore: ks, - TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})} - b.Close() + TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})} + assert.NoError(t, b.Close()) // second call should not result in panic or errors - b.Close() + assert.NoError(t, b.Close()) } // makes new boltdb, put two records diff --git a/backend/app/store/service/title.go b/backend/app/store/service/title.go index f303939039..22c19cc27e 100644 --- a/backend/app/store/service/title.go +++ b/backend/app/store/service/title.go @@ -4,6 +4,7 @@ import ( "fmt" "io" "net/http" + "net/url" "strings" "time" @@ -19,14 +20,16 @@ const ( // TitleExtractor gets html title from remote page, cached type TitleExtractor struct { - client http.Client - cache lcw.LoadingCache + client http.Client + cache lcw.LoadingCache + allowedDomains []string } // NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache -func NewTitleExtractor(client http.Client) *TitleExtractor { +func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor { res := TitleExtractor{ - client: client, + client: client, + allowedDomains: allowedDomains, } var err error res.cache, err = lcw.NewExpirableCache(lcw.TTL(teCacheTTL), lcw.MaxKeySize(teCacheMaxRecs)) @@ -38,13 +41,28 @@ func NewTitleExtractor(client http.Client) *TitleExtractor { } // Get page for url and return title -func (t *TitleExtractor) Get(url string) (string, error) { +func (t *TitleExtractor) Get(pageURL string) (string, error) { + // parse domain of the URL and check if it's in the allowed list + u, err := url.Parse(pageURL) + if err != nil { + return "", fmt.Errorf("failed to parse url %s: %w", pageURL, err) + } + allowed := false + for _, domain := range t.allowedDomains { + if strings.HasSuffix(u.Hostname(), domain) { + allowed = true + break + } + } + if !allowed { + return "", fmt.Errorf("domain %s is not allowed", u.Host) + } client := http.Client{Timeout: t.client.Timeout, Transport: t.client.Transport} defer client.CloseIdleConnections() - b, err := t.cache.Get(url, func() (interface{}, error) { - resp, err := client.Get(url) - if err != nil { - return nil, fmt.Errorf("failed to load page %s: %w", url, err) + b, err := t.cache.Get(pageURL, func() (interface{}, error) { + resp, e := client.Get(pageURL) + if e != nil { + return nil, fmt.Errorf("failed to load page %s: %w", pageURL, e) } defer func() { if err = resp.Body.Close(); err != nil { @@ -52,19 +70,19 @@ func (t *TitleExtractor) Get(url string) (string, error) { } }() if resp.StatusCode != 200 { - return nil, fmt.Errorf("can't load page %s, code %d", url, resp.StatusCode) + return nil, fmt.Errorf("can't load page %s, code %d", pageURL, resp.StatusCode) } title, ok := t.getTitle(resp.Body) if !ok { - return nil, fmt.Errorf("can't get title for %s", url) + return nil, fmt.Errorf("can't get title for %s", pageURL) } return title, nil }) // on error save result (empty string) to cache too and return "" title if err != nil { - _, _ = t.cache.Get(url, func() (interface{}, error) { return "", nil }) + _, _ = t.cache.Get(pageURL, func() (interface{}, error) { return "", nil }) return "", err } diff --git a/backend/app/store/service/title_test.go b/backend/app/store/service/title_test.go index 3e3bd79b39..b57d1235fd 100644 --- a/backend/app/store/service/title_test.go +++ b/backend/app/store/service/title_test.go @@ -28,7 +28,7 @@ func TestTitle_GetTitle(t *testing.T) { {` 2222`, false, ""}, } - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{}) defer ex.Close() for i, tt := range tbl { tt := tt @@ -41,7 +41,7 @@ func TestTitle_GetTitle(t *testing.T) { } func TestTitle_Get(t *testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -75,7 +75,7 @@ func TestTitle_GetConcurrent(t *testing.T) { for n := 0; n < 1000; n++ { body += "something something blah blah\n" } - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -104,7 +104,7 @@ func TestTitle_GetConcurrent(t *testing.T) { } func TestTitle_GetFailed(t *testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"}) defer ex.Close() var hits int32 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -124,9 +124,9 @@ func TestTitle_GetFailed(t *testing.T) { assert.Equal(t, int32(1), atomic.LoadInt32(&hits), "hit once, errors cached") } -func TestTitle_DoubleClosed(*testing.T) { - ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}) - ex.Close() +func TestTitle_DoubleClosed(t *testing.T) { + ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{}) + assert.NoError(t, ex.Close()) // second call should not result in panic - ex.Close() + assert.NoError(t, ex.Close()) }