diff --git a/backend/app/cmd/server.go b/backend/app/cmd/server.go
index afcfbad4ba..5010945d82 100644
--- a/backend/app/cmd/server.go
+++ b/backend/app/cmd/server.go
@@ -4,6 +4,7 @@ import (
"context"
"embed"
"fmt"
+ "net"
"net/http"
"net/url"
"os"
@@ -495,6 +496,12 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
}
log.Printf("[DEBUG] image service for url=%s, EditDuration=%v", imageService.ImageAPI, imageService.EditDuration)
+ // Extract second level domain from s.RemarkURL. It can be and IP like http//127.0.0.1 in which case we need to use whole IP as domain
+ allowedDomains, err := s.getAllowedDomains()
+ if err != nil {
+ return nil, fmt.Errorf("failed to make allowed domains list: %w", err)
+ }
+
dataService := &service.DataStore{
Engine: storeEngine,
EditDuration: s.EditDuration,
@@ -504,7 +511,7 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
MaxVotes: s.MaxVotes,
PositiveScore: s.PositiveScore,
ImageService: imageService,
- TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}),
+ TitleExtractor: service.NewTitleExtractor(http.Client{Timeout: time.Second * 5}, allowedDomains),
RestrictedWordsMatcher: service.NewRestrictedWordsMatcher(service.StaticRestrictedWordsLister{Words: s.RestrictedWords}),
}
dataService.RestrictSameIPVotes.Enabled = s.RestrictVoteIP
@@ -633,6 +640,21 @@ func (s *ServerCommand) newServerApp(ctx context.Context) (*serverApp, error) {
}, nil
}
+func (s *ServerCommand) getAllowedDomains() ([]string, error) {
+ remark42URL, err := url.Parse(s.RemarkURL)
+ if err != nil {
+ return nil, fmt.Errorf("failed to parse REMARK_URL: %w", err)
+ }
+ remark42Domain := remark42URL.Hostname()
+ if net.ParseIP(remark42Domain) == nil && len(strings.Split(remark42Domain, ".")) > 2 {
+ remark42Domain = strings.Join(strings.Split(remark42Domain, ".")[len(strings.Split(remark42Domain, "."))-2:], ".")
+ }
+
+ allowedDomains := s.AllowedHosts
+ allowedDomains = append(allowedDomains, remark42Domain)
+ return allowedDomains, nil
+}
+
// Run all application objects
func (a *serverApp) run(ctx context.Context) error {
if a.AdminPasswd != "" {
diff --git a/backend/app/rest/api/admin_test.go b/backend/app/rest/api/admin_test.go
index d84dc43daf..a4c0283be6 100644
--- a/backend/app/rest/api/admin_test.go
+++ b/backend/app/rest/api/admin_test.go
@@ -111,7 +111,7 @@ func TestAdmin_Title(t *testing.T) {
ts, srv, teardown := startupT(t)
defer teardown()
- srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second})
+ srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"127.0.0.1"})
tss := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.String() == "/post1" {
_, err := w.Write([]byte("
post1 blah 123 2222"))
diff --git a/backend/app/rest/api/rest_public_test.go b/backend/app/rest/api/rest_public_test.go
index 65234398d3..57cdf29e37 100644
--- a/backend/app/rest/api/rest_public_test.go
+++ b/backend/app/rest/api/rest_public_test.go
@@ -480,7 +480,7 @@ func TestRest_FindUserComments(t *testing.T) {
func TestRest_FindUserComments_CWE_918(t *testing.T) {
ts, srv, teardown := startupT(t)
- srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}) // required for extracting the title, bad URL test
+ srv.DataService.TitleExtractor = service.NewTitleExtractor(http.Client{Timeout: time.Second}, []string{"radio-t.com"}) // required for extracting the title, bad URL test
defer srv.DataService.TitleExtractor.Close()
defer teardown()
@@ -496,7 +496,8 @@ func TestRest_FindUserComments_CWE_918(t *testing.T) {
assert.False(t, backendRequestedArbitraryServer)
addComment(t, arbitraryURLComment, ts)
- assert.True(t, backendRequestedArbitraryServer)
+ assert.False(t, backendRequestedArbitraryServer,
+ "no request is expected to the test server as it's not in the list of the allowed domains for the title extractor")
res, code := get(t, ts.URL+"/api/v1/comments?site=remark42&user=provider1_dev")
assert.Equal(t, http.StatusOK, code)
diff --git a/backend/app/store/service/service_test.go b/backend/app/store/service/service_test.go
index 74245d9398..e86f765a49 100644
--- a/backend/app/store/service/service_test.go
+++ b/backend/app/store/service/service_test.go
@@ -131,7 +131,7 @@ func TestService_CreateFromPartialWithTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
- TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
+ TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()
postPath := "/post/42"
@@ -195,7 +195,7 @@ func TestService_SetTitle(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
- TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
+ TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})}
defer b.Close()
comment := store.Comment{
Text: "text",
@@ -1658,10 +1658,10 @@ func TestService_DoubleClose_Static(t *testing.T) {
eng, teardown := prepStoreEngine(t)
defer teardown()
b := DataStore{Engine: eng, AdminStore: ks,
- TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second})}
- b.Close()
+ TitleExtractor: NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})}
+ assert.NoError(t, b.Close())
// second call should not result in panic or errors
- b.Close()
+ assert.NoError(t, b.Close())
}
// makes new boltdb, put two records
diff --git a/backend/app/store/service/title.go b/backend/app/store/service/title.go
index f303939039..22c19cc27e 100644
--- a/backend/app/store/service/title.go
+++ b/backend/app/store/service/title.go
@@ -4,6 +4,7 @@ import (
"fmt"
"io"
"net/http"
+ "net/url"
"strings"
"time"
@@ -19,14 +20,16 @@ const (
// TitleExtractor gets html title from remote page, cached
type TitleExtractor struct {
- client http.Client
- cache lcw.LoadingCache
+ client http.Client
+ cache lcw.LoadingCache
+ allowedDomains []string
}
// NewTitleExtractor makes extractor with cache. If memory cache failed, switching to no-cache
-func NewTitleExtractor(client http.Client) *TitleExtractor {
+func NewTitleExtractor(client http.Client, allowedDomains []string) *TitleExtractor {
res := TitleExtractor{
- client: client,
+ client: client,
+ allowedDomains: allowedDomains,
}
var err error
res.cache, err = lcw.NewExpirableCache(lcw.TTL(teCacheTTL), lcw.MaxKeySize(teCacheMaxRecs))
@@ -38,13 +41,28 @@ func NewTitleExtractor(client http.Client) *TitleExtractor {
}
// Get page for url and return title
-func (t *TitleExtractor) Get(url string) (string, error) {
+func (t *TitleExtractor) Get(pageURL string) (string, error) {
+ // parse domain of the URL and check if it's in the allowed list
+ u, err := url.Parse(pageURL)
+ if err != nil {
+ return "", fmt.Errorf("failed to parse url %s: %w", pageURL, err)
+ }
+ allowed := false
+ for _, domain := range t.allowedDomains {
+ if strings.HasSuffix(u.Hostname(), domain) {
+ allowed = true
+ break
+ }
+ }
+ if !allowed {
+ return "", fmt.Errorf("domain %s is not allowed", u.Host)
+ }
client := http.Client{Timeout: t.client.Timeout, Transport: t.client.Transport}
defer client.CloseIdleConnections()
- b, err := t.cache.Get(url, func() (interface{}, error) {
- resp, err := client.Get(url)
- if err != nil {
- return nil, fmt.Errorf("failed to load page %s: %w", url, err)
+ b, err := t.cache.Get(pageURL, func() (interface{}, error) {
+ resp, e := client.Get(pageURL)
+ if e != nil {
+ return nil, fmt.Errorf("failed to load page %s: %w", pageURL, e)
}
defer func() {
if err = resp.Body.Close(); err != nil {
@@ -52,19 +70,19 @@ func (t *TitleExtractor) Get(url string) (string, error) {
}
}()
if resp.StatusCode != 200 {
- return nil, fmt.Errorf("can't load page %s, code %d", url, resp.StatusCode)
+ return nil, fmt.Errorf("can't load page %s, code %d", pageURL, resp.StatusCode)
}
title, ok := t.getTitle(resp.Body)
if !ok {
- return nil, fmt.Errorf("can't get title for %s", url)
+ return nil, fmt.Errorf("can't get title for %s", pageURL)
}
return title, nil
})
// on error save result (empty string) to cache too and return "" title
if err != nil {
- _, _ = t.cache.Get(url, func() (interface{}, error) { return "", nil })
+ _, _ = t.cache.Get(pageURL, func() (interface{}, error) { return "", nil })
return "", err
}
diff --git a/backend/app/store/service/title_test.go b/backend/app/store/service/title_test.go
index 3e3bd79b39..b57d1235fd 100644
--- a/backend/app/store/service/title_test.go
+++ b/backend/app/store/service/title_test.go
@@ -28,7 +28,7 @@ func TestTitle_GetTitle(t *testing.T) {
{` 2222`, false, ""},
}
- ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
+ ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
defer ex.Close()
for i, tt := range tbl {
tt := tt
@@ -41,7 +41,7 @@ func TestTitle_GetTitle(t *testing.T) {
}
func TestTitle_Get(t *testing.T) {
- ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
+ ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -75,7 +75,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
for n := 0; n < 1000; n++ {
body += "something something blah blah\n"
}
- ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
+ ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -104,7 +104,7 @@ func TestTitle_GetConcurrent(t *testing.T) {
}
func TestTitle_GetFailed(t *testing.T) {
- ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
+ ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{"127.0.0.1"})
defer ex.Close()
var hits int32
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -124,9 +124,9 @@ func TestTitle_GetFailed(t *testing.T) {
assert.Equal(t, int32(1), atomic.LoadInt32(&hits), "hit once, errors cached")
}
-func TestTitle_DoubleClosed(*testing.T) {
- ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second})
- ex.Close()
+func TestTitle_DoubleClosed(t *testing.T) {
+ ex := NewTitleExtractor(http.Client{Timeout: 5 * time.Second}, []string{})
+ assert.NoError(t, ex.Close())
// second call should not result in panic
- ex.Close()
+ assert.NoError(t, ex.Close())
}