diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2fae92dcbfa..6cbaa008500 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -11,13 +11,25 @@ on: env: GO_VERSION: 1.23.x + SHORT_TIMEOUT: 5 jobs: - lint: - runs-on: ubuntu-24.04 - timeout-minutes: 20 + lint-go: + timeout-minutes: "${{ env.SHORT_TIMEOUT }}" + runs-on: "${{ matrix.os }}" + strategy: + matrix: + include: + - os: ubuntu-24.04 + goos: linux + - os: ubuntu-24.04 + goos: freebsd + - os: windows-2022 + goos: windows + env: + GOOS: "${{ matrix.goos }}" steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - uses: actions/setup-go@v5 @@ -26,13 +38,25 @@ jobs: check-latest: true cache: true - name: golangci-lint - uses: golangci/golangci-lint-action@v6.1.1 + uses: golangci/golangci-lint-action@v6 with: - version: v1.60.1 args: --verbose - - name: yamllint-lint + + lint-other: + timeout-minutes: "${{ env.SHORT_TIMEOUT }}" + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + - uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + check-latest: true + cache: true + - name: yaml run: make lint-yaml - - name: shellcheck + - name: shell run: make lint-shell - name: go imports ordering run: | @@ -40,10 +64,18 @@ jobs: make lint-imports test-unit: - runs-on: ubuntu-24.04 - timeout-minutes: 20 + timeout-minutes: "${{ env.SHORT_TIMEOUT }}" + runs-on: "${{ matrix.os }}" + strategy: + matrix: + include: + - os: ubuntu-24.04 + goos: linux + # FIXME: currently disabled as a lot more work is required to make these tests pass on windows + #- os: windows-2022 + # goos: windows steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - uses: actions/setup-go@v5 @@ -52,7 +84,7 @@ jobs: check-latest: true cache: true - name: "Run unit tests" - run: go test -v ./pkg/... + run: make test-unit test-integration: runs-on: "${{ matrix.runner }}" @@ -78,7 +110,7 @@ jobs: UBUNTU_VERSION: "${{ matrix.ubuntu }}" CONTAINERD_VERSION: "${{ matrix.containerd }}" steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - name: "Prepare integration test environment" @@ -99,12 +131,11 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm64 docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run -t --rm --privileged test-integration + run: | + docker run -t --rm --privileged test-integration ./test-integration.sh + - name: "Run integration tests (flaky)" + run: | + docker run -t --rm --privileged test-integration ./test-integration.sh -test.only-flaky test-integration-ipv6: runs-on: "ubuntu-${{ matrix.ubuntu }}" @@ -120,7 +151,7 @@ jobs: UBUNTU_VERSION: "${{ matrix.ubuntu }}" CONTAINERD_VERSION: "${{ matrix.containerd }}" steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Enable ipv4 and ipv6 forwarding @@ -133,7 +164,7 @@ jobs: echo '{"ipv6": true, "fixed-cidr-v6": "2001:db8:1::/64", "experimental": true, "ip6tables": true}' | sudo tee /etc/docker/daemon.json sudo systemctl restart docker - name: "Prepare integration test environment" - run: docker build -t test-integration-ipv6 --target test-integration-ipv6 --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . + run: docker build -t test-integration --target test-integration --build-arg UBUNTU_VERSION=${UBUNTU_VERSION} --build-arg CONTAINERD_VERSION=${CONTAINERD_VERSION} . - name: "Remove snap loopback devices (conflicts with our loopback devices in TestRunDevice)" run: | sudo systemctl disable --now snapd.service snapd.socket @@ -151,16 +182,11 @@ jobs: docker run --privileged --rm tonistiigi/binfmt --install linux/arm/v7 - name: "Run integration tests" # The nested IPv6 network inside docker and qemu is complex and needs a bunch of sysctl config. - # Therefore it's hard to debug why the IPv6 tests fail in such an isolation layer. + # Therefore, it's hard to debug why the IPv6 tests fail in such an isolation layer. # On the other side, using the host network is easier at configuration. # Besides, each job is running on a different instance, which means using host network here # is safe and has no side effects on others. - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run --network host -t --rm --privileged test-integration-ipv6 + run: docker run --network host -t --rm --privileged test-integration ./test-integration.sh -test.only-ipv6 test-integration-rootless: runs-on: "ubuntu-${{ matrix.ubuntu }}" @@ -207,7 +233,7 @@ jobs: } EOT sudo systemctl restart apparmor.service - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - name: "Register QEMU (tonistiigi/binfmt)" @@ -230,12 +256,9 @@ jobs: fi echo "WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622}" >> "$GITHUB_ENV" - name: "Test (network driver=slirp4netns, port driver=builtin)" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - command: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} + run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh + - name: "Test (network driver=slirp4netns, port driver=builtin) (flaky)" + run: docker run -t --rm --privileged -e WORKAROUND_ISSUE_622=${WORKAROUND_ISSUE_622} ${TEST_TARGET} /test-integration-rootless.sh ./test-integration.sh -test.only-flaky cross: runs-on: ubuntu-24.04 @@ -244,7 +267,7 @@ jobs: matrix: go-version: ["1.22.x", "1.23.x"] steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - uses: actions/setup-go@v5 @@ -259,7 +282,7 @@ jobs: runs-on: ubuntu-24.04 timeout-minutes: 45 steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - uses: actions/setup-go@v5 @@ -284,22 +307,13 @@ jobs: - name: "Prepare integration test environment" run: | sudo apt-get install -y expect + go install -v gotest.tools/gotestsum@v1 - name: "Ensure that the integration test suite is compatible with Docker" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon + run: ./test-integration.sh -test.target=docker - name: "Ensure that the IPv6 integration test suite is compatible with Docker" - uses: nick-fields/retry@v3 - with: - timeout_minutes: 30 - max_attempts: 2 - retry_on: error - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - command: go test -p 1 -timeout 20m -v -exec sudo ./cmd/nerdctl/... -args -test.target=docker -test.allow-kill-daemon -test.only-ipv6 + run: ./test-integration.sh -test.target=docker -test.only-ipv6 + - name: "Ensure that the integration test suite is compatible with Docker (flaky only)" + run: ./test-integration.sh -test.target=docker -test.only-flaky test-integration-windows: runs-on: windows-2022 @@ -308,7 +322,7 @@ jobs: run: shell: bash steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 with: fetch-depth: 1 - uses: actions/setup-go@v5 @@ -317,7 +331,8 @@ jobs: cache: true check-latest: true - run: go install ./cmd/nerdctl - - uses: actions/checkout@v4.2.1 + - run: go install -v gotest.tools/gotestsum@v1 + - uses: actions/checkout@v4 with: repository: containerd/containerd ref: v1.7.22 @@ -330,10 +345,12 @@ jobs: env: ctrdVersion: 1.7.22 run: powershell hack/configure-windows-ci.ps1 - # TODO: Run unit tests - name: "Run integration tests" - # See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization - run: go test -p 1 -v ./cmd/nerdctl/... + run: | + ./test-integration.sh + - name: "Run integration tests (flaky)" + run: | + ./test-integration.sh -test.only-flaky test-integration-freebsd: name: FreeBSD @@ -342,7 +359,7 @@ jobs: timeout-minutes: 20 steps: - - uses: actions/checkout@v4.2.1 + - uses: actions/checkout@v4 - uses: actions/cache@v4 with: path: /root/.vagrant.d diff --git a/Dockerfile b/Dockerfile index 7e7d8b8554c..d986978c635 100644 --- a/Dockerfile +++ b/Dockerfile @@ -281,7 +281,8 @@ ARG DEBIAN_FRONTEND=noninteractive # `expect` package contains `unbuffer(1)`, which is used for emulating TTY for testing RUN apt-get update -qq && apt-get install -qq --no-install-recommends \ expect \ - git + git \ + make COPY --from=goversion /GOVERSION /GOVERSION ARG TARGETARCH RUN curl -fsSL --proto '=https' --tlsv1.2 https://golang.org/dl/$(cat /GOVERSION).linux-${TARGETARCH:-amd64}.tar.gz | tar xzvC /usr/local @@ -318,8 +319,6 @@ RUN curl -o nydus-static.tgz -fsSL --proto '=https' --tlsv1.2 "https://github.co tar xzf nydus-static.tgz && \ mv nydus-static/nydus-image nydus-static/nydusd nydus-static/nydusify /usr/bin/ && \ rm nydus-static.tgz -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] FROM test-integration AS test-integration-rootless # Install SSH for creating systemd user session. @@ -342,17 +341,10 @@ RUN systemctl disable test-integration-ipfs-offline VOLUME /home/rootless/.local/share COPY ./Dockerfile.d/test-integration-rootless.sh / RUN chmod a+rx /test-integration-rootless.sh -CMD ["/test-integration-rootless.sh", \ - "gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon"] # test for CONTAINERD_ROOTLESS_ROOTLESSKIT_PORT_DRIVER=slirp4netns FROM test-integration-rootless AS test-integration-rootless-port-slirp4netns COPY ./Dockerfile.d/home_rootless_.config_systemd_user_containerd.service.d_port-slirp4netns.conf /home/rootless/.config/systemd/user/containerd.service.d/port-slirp4netns.conf RUN chown -R rootless:rootless /home/rootless/.config -FROM test-integration AS test-integration-ipv6 -CMD ["gotestsum", "--format=testname", "--rerun-fails=2", "--packages=./cmd/nerdctl/...", \ - "--", "-timeout=60m", "-p", "1", "-args", "-test.allow-kill-daemon", "-test.only-ipv6"] - FROM base AS demo diff --git a/Makefile b/Makefile index 0831c640047..ae4e18c94f3 100644 --- a/Makefile +++ b/Makefile @@ -86,6 +86,9 @@ lint-yaml: lint-shell: $(call recursive_wildcard,$(MAKEFILE_DIR)/,*.sh) shellcheck -a -x $^ +test-unit: + go test -v $(MAKEFILE_DIR)/pkg/... + binaries: nerdctl install: diff --git a/cmd/nerdctl/container/container_create_linux_test.go b/cmd/nerdctl/container/container_create_linux_test.go index 56b71ce854a..e0823ec3af0 100644 --- a/cmd/nerdctl/container/container_create_linux_test.go +++ b/cmd/nerdctl/container/container_create_linux_test.go @@ -187,7 +187,11 @@ func TestCreateWithTty(t *testing.T) { func TestIssue2993(t *testing.T) { testCase := nerdtest.Setup() - testCase.Require = test.Not(nerdtest.Docker) + testCase.Require = test.Require( + test.Not(nerdtest.Docker), + // Maybe the use of a custom data root has an impact? + nerdtest.IsFlaky("https://github.com/containerd/nerdctl/issues/3518"), + ) const ( containersPathKey = "containersPath" diff --git a/cmd/nerdctl/image/image_history_test.go b/cmd/nerdctl/image/image_history_test.go index 1d2e07e97fc..5086b3edb9f 100644 --- a/cmd/nerdctl/image/image_history_test.go +++ b/cmd/nerdctl/image/image_history_test.go @@ -76,6 +76,9 @@ func TestImageHistory(t *testing.T) { test.Not(test.Windows), // XXX Currently, history does not work on non-native platform, so, we cannot test reliably on other platforms test.Arm64, + // XXX this here is very likely breaking other tests because of one of the variants of + // https://github.com/containerd/nerdctl/issues/3513 so, making it private to try avoid that + nerdtest.Private, ), Setup: func(data test.Data, helpers test.Helpers) { helpers.Ensure("pull", "--platform", "linux/arm64", testutil.CommonImage) diff --git a/cmd/nerdctl/image/image_inspect_test.go b/cmd/nerdctl/image/image_inspect_test.go index 93278de9294..68ee93b0534 100644 --- a/cmd/nerdctl/image/image_inspect_test.go +++ b/cmd/nerdctl/image/image_inspect_test.go @@ -35,7 +35,7 @@ func TestImageInspectSimpleCases(t *testing.T) { testCase := &test.Case{ Description: "TestImageInspect", Setup: func(data test.Data, helpers test.Helpers) { - helpers.Ensure("pull", testutil.CommonImage) + helpers.Ensure("pull", "--quiet", testutil.CommonImage) }, SubTests: []*test.Case{ { diff --git a/pkg/cmd/builder/build_test.go b/pkg/cmd/builder/build_test.go index 954738cdbc6..97c89cd8aeb 100644 --- a/pkg/cmd/builder/build_test.go +++ b/pkg/cmd/builder/build_test.go @@ -18,6 +18,7 @@ package builder import ( "reflect" + "runtime" "testing" specs "github.com/opencontainers/image-spec/specs-go/v1" @@ -213,6 +214,10 @@ func TestParseBuildctlArgsForOCILayout(t *testing.T) { }, } + if runtime.GOOS == "windows" { + tests[1].expectedErr = "The system cannot find the path specified." + } + for _, test := range tests { t.Run(test.name, func(t *testing.T) { args, err := parseBuildContextFromOCILayout(test.ociLayoutName, test.ociLayoutPath) diff --git a/pkg/mountutil/mountutil_test.go b/pkg/mountutil/mountutil_test.go index 619f4269c66..85f5ee3bff9 100644 --- a/pkg/mountutil/mountutil_test.go +++ b/pkg/mountutil/mountutil_test.go @@ -34,4 +34,5 @@ func (mv *MockVolumeStore) CreateWithoutLock(name string, labels []string) (*nat return &native.Volume{Name: "test_volume", Mountpoint: "/test/volume"}, nil } +//nolint:unused var mockVolumeStore = &MockVolumeStore{} diff --git a/pkg/netutil/netutil.go b/pkg/netutil/netutil.go index 81badc8edf1..1c71e512f6c 100644 --- a/pkg/netutil/netutil.go +++ b/pkg/netutil/netutil.go @@ -619,6 +619,7 @@ func structToMap(in interface{}) (map[string]interface{}, error) { } // ParseMTU parses the mtu option +// nolint:unused func parseMTU(mtu string) (int, error) { if mtu == "" { return 0, nil // default diff --git a/pkg/ocihook/ocihook.go b/pkg/ocihook/ocihook.go index ffa98041611..2ae963c997a 100644 --- a/pkg/ocihook/ocihook.go +++ b/pkg/ocihook/ocihook.go @@ -39,6 +39,7 @@ import ( "github.com/containerd/nerdctl/v2/pkg/bypass4netnsutil" "github.com/containerd/nerdctl/v2/pkg/dnsutil/hostsstore" "github.com/containerd/nerdctl/v2/pkg/labels" + "github.com/containerd/nerdctl/v2/pkg/lockutil" "github.com/containerd/nerdctl/v2/pkg/namestore" "github.com/containerd/nerdctl/v2/pkg/netutil" "github.com/containerd/nerdctl/v2/pkg/netutil/nettype" @@ -92,6 +93,27 @@ func Run(stdin io.Reader, stderr io.Writer, event, dataStore, cniPath, cniNetcon } }() + // FIXME: CNI plugins are not safe to use concurrently + // See + // https://github.com/containerd/nerdctl/issues/3518 + // https://github.com/containerd/nerdctl/issues/2908 + // and likely others + // Fixing these issues would require a lot of work, possibly even stopping using individual cni binaries altogether + // or at least being very mindful in what operation we call inside CNIEnv at what point, with filesystem locking. + // This below is a stopgap solution that just enforces a global lock + // Note this here is probably not enough, as concurrent CNI operations may happen outside of the scope of ocihooks + // through explicit calls to Remove, etc. + lockDir := filepath.Join(dataStore, "cnilock") + err = os.MkdirAll(lockDir, 0o777) + if err != nil { + return err + } + lock, err := lockutil.Lock(lockDir) + if err != nil { + return err + } + defer lockutil.Unlock(lock) + opts, err := newHandlerOpts(&state, dataStore, cniPath, cniNetconfPath) if err != nil { return err diff --git a/pkg/store/filestore_test.go b/pkg/store/filestore_test.go index 2496b96cbb1..c39fa749b44 100644 --- a/pkg/store/filestore_test.go +++ b/pkg/store/filestore_test.go @@ -169,10 +169,11 @@ func TestFileStoreConcurrent(t *testing.T) { go func() { lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("routine 1"), "concurrentkey") + // Windows does not allow files starting with con + err := tempStore.Set([]byte("routine 1"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "routine 1") return nil @@ -183,10 +184,10 @@ func TestFileStoreConcurrent(t *testing.T) { go func() { time.Sleep(500 * time.Millisecond) lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("routine 2"), "concurrentkey") + err := tempStore.Set([]byte("routine 2"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "routine 2") return nil @@ -195,10 +196,10 @@ func TestFileStoreConcurrent(t *testing.T) { }() lErr := tempStore.WithLock(func() error { - err := tempStore.Set([]byte("main routine 1"), "concurrentkey") + err := tempStore.Set([]byte("main routine 1"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "main routine 1") return nil @@ -208,10 +209,10 @@ func TestFileStoreConcurrent(t *testing.T) { time.Sleep(750 * time.Millisecond) lErr = tempStore.WithLock(func() error { - err := tempStore.Set([]byte("main routine 2"), "concurrentkey") + err := tempStore.Set([]byte("main routine 2"), "c0ncurrentkey") assert.NilError(t, err, "writing should not error") time.Sleep(1 * time.Second) - result, err := tempStore.Get("concurrentkey") + result, err := tempStore.Get("c0ncurrentkey") assert.NilError(t, err, "reading should not error") assert.Assert(t, string(result) == "main routine 2") return nil diff --git a/pkg/testutil/testutil.go b/pkg/testutil/testutil.go index 4287db9b305..46f4459934b 100644 --- a/pkg/testutil/testutil.go +++ b/pkg/testutil/testutil.go @@ -800,6 +800,9 @@ func newBase(t *testing.T, ns string, ipv6Compatible bool, kubernetesCompatible } else if !base.EnableKubernetes && base.KubernetesCompatible { t.Skip("runner skips Kubernetes compatible tests in the non-Kubernetes environment") } + if !GetFlakyEnvironment() && !GetEnableKubernetes() && !GetEnableIPv6() { + t.Skip("legacy tests are considered flaky by default and are skipped unless in the flaky environment") + } var err error switch base.Target { case Nerdctl: diff --git a/pkg/testutil/testutil_freebsd.go b/pkg/testutil/testutil_freebsd.go index 9cbded4e258..5c0fb9ba293 100644 --- a/pkg/testutil/testutil_freebsd.go +++ b/pkg/testutil/testutil_freebsd.go @@ -32,6 +32,7 @@ const ( var ( AlpineImage = mirrorOf("alpine:3.13") NginxAlpineImage = mirrorOf("nginx:1.19-alpine") + GolangImage = mirrorOf("golang:1.18") ) func mirrorOf(s string) string { diff --git a/test-integration.sh b/test-integration.sh new file mode 100755 index 00000000000..cc319f22f59 --- /dev/null +++ b/test-integration.sh @@ -0,0 +1,36 @@ +#!/usr/bin/env bash + +# Copyright The containerd Authors. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# shellcheck disable=SC2034,SC2015 +set -o errexit -o errtrace -o functrace -o nounset -o pipefail +root="$(cd "$(dirname "${BASH_SOURCE[0]:-$PWD}")" 2>/dev/null 1>&2 && pwd)" +readonly root +readonly timeout="60m" + +# See https://github.com/containerd/nerdctl/blob/main/docs/testing/README.md#about-parallelization +args=(--format=testname --jsonfile /tmp/test-integration.log --packages="$root"/cmd/nerdctl/...) + +for arg in "$@"; do + if [ "$arg" == "-test.only-flaky" ]; then + args+=("--rerun-fails=2") + break + fi +done + +gotestsum "${args[@]}" -- -timeout="$timeout" -p 1 -args -test.allow-kill-daemon "$@" + +echo "These are the tests that took more than 10 seconds:" +gotestsum tool slowest --threshold 10s --jsonfile /tmp/test-integration.log