Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Go: fix flow through string concatenation in go/incomplete-hostname-regexp #16307

Merged
merged 6 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 10 additions & 8 deletions go/ql/src/Security/CWE-020/IncompleteHostnameRegexp.ql
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,12 @@ predicate regexpGuardsError(RegexpPattern regexp) {

module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
additional predicate isSourceString(DataFlow::Node source, string hostPart) {
exists(Expr e |
e = source.asExpr() and
isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart)
|
e instanceof StringLit
or
e instanceof AddExpr and
not isIncompleteHostNameRegexpPattern(e.(AddExpr).getAnOperand().getStringValue(), _)
exists(Expr e | e = source.asExpr() |
isIncompleteHostNameRegexpPattern(e.getStringValue(), hostPart) and
// Exclude constant names to avoid duplicate results, because the string
// literals which they are initialised with are also considered as
// sources.
not e instanceof ConstantName
)
}

Expand All @@ -101,6 +99,10 @@ module IncompleteHostNameRegexpConfig implements DataFlow::ConfigSig {
) and
not regexpGuardsError(sink)
}

predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) {
StringOps::Concatenation::taintStep(node1, node2)
}
}

module Flow = DataFlow::Global<IncompleteHostNameRegexpConfig>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import (
"regexp"
)

func checkRedirectGood(req *http.Request, via []*http.Request) error {
func checkRedirectGood2(req *http.Request, via []*http.Request) error {
// GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com`
re := `^((www|beta)\.)?example\.com/`
if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* The query `go/incomplete-hostname-regexp` now recognizes more sources involving concatenation of string literals and also follows flow through string concatenation. This may lead to more alerts.
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
edges
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | provenance | |
| main.go:49:21:49:45 | `https://www.example.com` | main.go:62:15:62:25 | sourceConst | provenance | |
| main.go:62:15:62:25 | sourceConst | main.go:65:15:65:23 | localVar3 | provenance | |
nodes
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | semmle.label | "^((www\|beta).)?example.com/" |
| IncompleteHostnameRegexp.go:12:38:12:39 | re | semmle.label | re |
| main.go:39:60:39:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" |
| main.go:44:15:44:39 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:40:60:40:79 | "^test2.github.com$" | semmle.label | "^test2.github.com$" |
| main.go:45:15:45:39 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:49:21:49:45 | `https://www.example.com` | semmle.label | `https://www.example.com` |
| main.go:56:15:56:34 | ...+... | semmle.label | ...+... |
| main.go:58:15:58:42 | ...+... | semmle.label | ...+... |
| main.go:62:15:62:25 | sourceConst | semmle.label | sourceConst |
| main.go:65:15:65:23 | localVar3 | semmle.label | localVar3 |
subpaths
#select
| IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:11:8:11:36 | "^((www\|beta).)?example.com/" | IncompleteHostnameRegexp.go:12:38:12:39 | re | This regular expression has an unescaped dot before ')?example.com', so it might match more hosts than expected when $@. | IncompleteHostnameRegexp.go:12:38:12:39 | re | the regular expression is used |
| main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | main.go:39:60:39:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:39:60:39:79 | "^test2.github.com$" | the regular expression is used |
| main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | main.go:44:15:44:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:44:15:44:39 | `https://www.example.com` | the regular expression is used |
| main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | main.go:40:60:40:79 | "^test2.github.com$" | This regular expression has an unescaped dot before 'github.com', so it might match more hosts than expected when $@. | main.go:40:60:40:79 | "^test2.github.com$" | the regular expression is used |
| main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | main.go:45:15:45:39 | `https://www.example.com` | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:45:15:45:39 | `https://www.example.com` | the regular expression is used |
| main.go:49:21:49:45 | `https://www.example.com` | main.go:49:21:49:45 | `https://www.example.com` | main.go:65:15:65:23 | localVar3 | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:65:15:65:23 | localVar3 | the regular expression is used |
| main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | main.go:56:15:56:34 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:56:15:56:34 | ...+... | the regular expression is used |
| main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | main.go:58:15:58:42 | ...+... | This regular expression has an unescaped dot before 'example.com', so it might match more hosts than expected when $@. | main.go:58:15:58:42 | ...+... | the regular expression is used |
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package main

import (
"errors"
"net/http"
"regexp"
)

func checkRedirectGood2(req *http.Request, via []*http.Request) error {
// GOOD: the host of `req.URL` must be `example.com`, `www.example.com` or `beta.example.com`
re := `^((www|beta)\.)?example\.com/`
if matched, _ := regexp.MatchString(re, req.URL.Host); matched {
return nil
}
return errors.New("Invalid redirect")
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
package main

import (
"github.com/elazarl/goproxy"
"net/http"
"regexp"
"time"

"github.com/elazarl/goproxy"
)

func Match(notARegex string) bool {
Expand Down Expand Up @@ -44,3 +45,22 @@ func main() {
regexp.Match(`https://www.example.com`, []byte("")) // NOT OK
regexp.Match(`https://www\.example\.com`, []byte("")) // OK
}

const sourceConst = `https://www.example.com`
const firstHalfConst = `https://www.example.`

func concatenateStrings() {
firstHalf := `https://www.example.`
regexp.Match(firstHalf+`com`, []byte("")) // MISSING: NOT OK

regexp.Match(firstHalfConst+`com`, []byte("")) // NOT OK

regexp.Match(`https://www.example.`+`com`, []byte("")) // NOT OK
}

func avoidDuplicateResults() {
localVar1 := sourceConst
localVar2 := localVar1
localVar3 := localVar2
regexp.Match(localVar3, []byte("")) // NOT OK
}
Loading