diff --git a/pkg/docker/dockerfile/reverse/reverse.go b/pkg/docker/dockerfile/reverse/reverse.go index 8deb8b55d4..19cdd764b7 100644 --- a/pkg/docker/dockerfile/reverse/reverse.go +++ b/pkg/docker/dockerfile/reverse/reverse.go @@ -264,72 +264,7 @@ func DockerfileFromHistory(apiClient *docker.Client, imageID string) (*Dockerfil inst = instPrefixRun + runData } default: - //TODO: need to refactor - processed := false - //rawInst := rawLine - if strings.HasPrefix(rawInst, runInstArgsPrefix) { - parts := strings.SplitN(rawInst, " ", 2) - if len(parts) == 2 { - withArgs := strings.TrimSpace(parts[1]) - argNumStr := parts[0][1:] - argNum, err := strconv.Atoi(argNumStr) - if err == nil { - if withArgsArray, err := shlex.Split(withArgs); err == nil { - if len(withArgsArray) > argNum { - rawInstParts := withArgsArray[argNum:] - processed = true - if len(rawInstParts) > 2 && - rawInstParts[0] == defaultRunInstShell && - rawInstParts[1] == "-c" { - isExecForm = false - rawInstParts = rawInstParts[2:] - - inst = fmt.Sprintf("RUN %s", strings.Join(rawInstParts, " ")) - inst = strings.TrimSpace(inst) - } else { - isExecForm = true - - var outJson bytes.Buffer - encoder := json.NewEncoder(&outJson) - encoder.SetEscapeHTML(false) - err := encoder.Encode(rawInstParts) - if err == nil { - inst = fmt.Sprintf("RUN %s", outJson.String()) - } - } - } else { - log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed - %v (%v)", rawInst, err) - } - } else { - log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed - %v (%v)", rawInst, err) - } - - } else { - log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed number of ARGs - %v (%v)", rawInst, err) - } - } else { - log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - unexpected number of parts - %v", rawInst) - } - } - //todo: RUN inst with ARGS for buildkit - if hasInstructionPrefix(rawInst) { - inst = rawInst - } else { - if !processed { - //default to RUN instruction in exec form - isExecForm = true - inst = instPrefixRun + rawInst - if outArray, err := shlex.Split(rawInst); err == nil { - var outJson bytes.Buffer - encoder := json.NewEncoder(&outJson) - encoder.SetEscapeHTML(false) - err := encoder.Encode(outArray) - if err == nil { - inst = fmt.Sprintf("RUN %s", outJson.String()) - } - } - } - } + inst, isExecForm = processRawInst(rawInst) } //NOTE: Dockerfile instructions can be any case, but the instructions from history are always uppercase @@ -645,6 +580,135 @@ func DockerfileFromHistory(apiClient *docker.Client, imageID string) (*Dockerfil */ } +// processRawInst handles the processing for non-standard instructions that +// are otherwise uncaught by the main processor. It returns both the processed +// instruction as well as its status as an exec format command. +func processRawInst(rawInst string) (string, bool) { + // Try to process as an uncaught standard instruction + inst, ief, err = processAsUncaughtInst(rawInst) + if err == nil { + return inst, ief + } + + // Failing that, try to process as an args format RUN instruction + inst, ief, err := processAsArgsFormat(rawInst) + if err == nil { + return inst, ief + } + + // Fallback: process as an exec format RUN instruction + return processAsExecRun(rawInst) +} + +// processAsArgsFormat attempts to process the given instruction as an +// args style RUN, resulting in an encoded RUN string, a boolean that +// reflects if the output is in exec format, and an error. +// +// Provided there are no issues along the way, the error will be nil. +// +// If the resulting error is non-nil, the other returned values should +// not be used. +func processAsArgsFormat(rawInst string) (string, bool, error) { + if !strings.HasPrefix(rawInst, runInstArgsPrefix) { + return "", false, errors.New("not inst args format") + } + + var inst string + var isExecForm bool + + parts := strings.SplitN(rawInst, " ", 2) + + if len(parts) != 2 { + log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - unexpected number of parts - %v", rawInst) + return "", false, errors.New("inst args format - unexpected number of parts") + } + + withArgs := strings.TrimSpace(parts[1]) + argNumStr := parts[0][1:] + argNum, err := strconv.Atoi(argNumStr) + if err != nil { + log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed number of ARGs - %v (%v)", rawInst, err) + return "", false, err + } + + withArgsArray, err := shlex.Split(withArgs) + if err != nil { + log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed - %v (%v)", rawInst, err) + return "", false, err + } + + if len(withArgsArray) <= argNum { + log.Infof("ReverseDockerfileFromHistory - RUN with ARGs - malformed - %v (%v)", rawInst, err) + return "", false, errors.New("inst args format - not enough args") + } + + rawInstParts := withArgsArray[argNum:] + + if detectRawShellForm(rawInstParts) { + isExecForm = false + rawInstParts = rawInstParts[2:] + + inst = fmt.Sprintf("RUN %s", strings.Join(rawInstParts, " ")) + inst = strings.TrimSpace(inst) + } else { + isExecForm = true + + exec, err := execify(rawInstParts) + if err == nil { + inst = exec + } + } + + return inst, isExecForm, nil +} + +// processAsUncaughtInst attempts to process the given instruction as an +// otherwise uncaught standard instruction. If the instruction is determined +// not to be an uncaught instruction, an error is returned. +func processAsUncaughtInst(rawInst string) (string, bool, error) { + if hasInstructionPrefix(rawInst) { + return rawInst, false, nil + } + + return "", false, errors.New("not an uncaught instruction") +} + +// processAsExecRun recieves a raw instruction, implicitly encodes it as +// a RUN instruction, and returns that encoded instruction in exec format +func processAsExecRun(rawInst string) (string, bool) { + inst := instPrefixRun + rawInst + + if outArray, err := shlex.Split(rawInst); err == nil { + if exec, eErr := execify(outArray); eErr == nil { + inst = exec + } + } + + return inst, true +} + +// execify encodes an array of strings as an exec format RUN instruction +func execify(parts []string) (string, error) { + var outJson bytes.Buffer + + encoder := json.NewEncoder(&outJson) + encoder.SetEscapeHTML(false) + err := encoder.Encode(parts) + if err != nil { + return "", err + } + + return fmt.Sprintf("RUN %s", outJson.String()), nil +} + +// detectRawShellForm determines if an array of strings can be treated as +// the arguments for a shell format RUN instruction +func detectRawShellForm(parts []string) bool { + return len(parts) > 2 && + parts[0] == defaultRunInstShell && + parts[1] == "-c" +} + // SaveDockerfileData saves the Dockerfile information to a file func SaveDockerfileData(fatImageDockerfileLocation string, fatImageDockerfileLines []string) error { var data bytes.Buffer diff --git a/pkg/docker/dockerfile/reverse/reverse_test.go b/pkg/docker/dockerfile/reverse/reverse_test.go index 0f5922a53a..36951d6688 100644 --- a/pkg/docker/dockerfile/reverse/reverse_test.go +++ b/pkg/docker/dockerfile/reverse/reverse_test.go @@ -2,10 +2,13 @@ package reverse import ( "fmt" + "os" + "strings" "testing" "time" dockerclient "github.com/fsouza/go-dockerclient" + log "github.com/sirupsen/logrus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -108,3 +111,209 @@ func TestHealthCheck(t *testing.T) { assert.Equal(t, testData.reconstructedHealthcheck, res) } } + +func TestProcessRawInst(t *testing.T) { + t.Run("when the instruction is in args format", func(t *testing.T) { + t.Run("but does not have arguments", func(t *testing.T) { + raw := "|/bin/sh" + + t.Run("it treats the instruction as a pure RUN", func(t *testing.T) { + expected := "RUN [\"|/bin/sh\"]\n" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + + t.Run("it logs the bad part count", func(t *testing.T) { + logged := withFakeLogger(func() { + processRawInst(raw) + }) + + assert.True(t, logged.Match("unexpected number of parts")) + }) + }) + + t.Run("but contains bad shell syntax", func(t *testing.T) { + raw := "|1 /bin/sh -c echo '" + + t.Run("it treats the instruction as a pure RUN", func(t *testing.T) { + expected := "RUN |1 /bin/sh -c echo '" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + + t.Run("it logs the bad command", func(t *testing.T) { + logged := withFakeLogger(func() { + processRawInst(raw) + }) + + assert.True(t, logged.Match("malformed - ")) + }) + }) + + t.Run("but contains too few args", func(t *testing.T) { + raw := "|4 /bin/sh -c echo foo" + + t.Run("it treats the instruction as a pure RUN", func(t *testing.T) { + expected := "RUN [\"|4\",\"/bin/sh\",\"-c\",\"echo\",\"foo\"]\n" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + + t.Run("it logs the bad command", func(t *testing.T) { + logged := withFakeLogger(func() { + processRawInst(raw) + }) + + assert.True(t, logged.Match("malformed - ")) + }) + }) + + t.Run("but does not have an argnumment number", func(t *testing.T) { + raw := "|/bin/sh -c echo 'blah'" + + t.Run("it treats the instruction as a pure RUN", func(t *testing.T) { + expected := "RUN [\"|/bin/sh\",\"-c\",\"echo\",\"blah\"]\n" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + + t.Run("it logs the bad arg count", func(t *testing.T) { + logged := withFakeLogger(func() { + processRawInst(raw) + }) + + assert.True(t, logged.Match("malformed number of ARGs")) + }) + }) + + t.Run("and is a well-formed shell format instruction", func(t *testing.T) { + raw := "|0 /bin/sh -c echo 'blah'" + + t.Run("it processes the instruction as a shell command", func(t *testing.T) { + expected := "RUN echo blah" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns false as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.False(t, ief) + }) + }) + + t.Run("and is a well-formed raw instruction", func(t *testing.T) { + raw := "|0 echo 'blah'" + + t.Run("it processes the instruction as an exec command", func(t *testing.T) { + expected := "RUN [\"echo\",\"blah\"]\n" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + }) + }) + + t.Run("when the instruction has an uncaught instruction prefix", func(t *testing.T) { + raw := "EXPOSE 8675" + + t.Run("it passes the instruction back unchanged", func(t *testing.T) { + result, _ := processRawInst(raw) + + assert.Equal(t, raw, result) + }) + + t.Run("it returns false as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.False(t, ief) + }) + }) + + t.Run("when the instruction is raw text", func(t *testing.T) { + raw := "foo bar" + + t.Run("it is treated as an exec format RUN", func(t *testing.T) { + expected := "RUN [\"foo\",\"bar\"]\n" + result, _ := processRawInst(raw) + + assert.Equal(t, expected, result) + }) + + t.Run("it returns true as the exec form", func(t *testing.T) { + _, ief := processRawInst(raw) + + assert.True(t, ief) + }) + }) + +} + +type fakeLogger struct { + Lines []string +} + +func (l *fakeLogger) Write(msg []byte) (int, error) { + if l.Lines == nil { + l.Lines = []string{} + } + + l.Lines = append(l.Lines, string(msg)) + + return len(msg), nil +} + +func (l *fakeLogger) Match(substr string) bool { + for _, line := range l.Lines { + if strings.Contains(line, substr) { + return true + } + } + + return false +} + +func withFakeLogger(proc func()) *fakeLogger { + logger := &fakeLogger{} + log.SetOutput(logger) + + proc() + + log.SetOutput(os.Stderr) + return logger +}