diff --git a/.bazelrc b/.bazelrc index 29b722badc4e..c25d7bdd5e28 100644 --- a/.bazelrc +++ b/.bazelrc @@ -45,7 +45,7 @@ query --ui_event_filters=-DEBUG clean --ui_event_filters=-WARNING info --ui_event_filters=-WARNING -build:race --@io_bazel_rules_go//go/config:race "--test_env=GORACE=halt_on_error=1 log_path=stdout" --test_sharding_strategy=disabled +build:race --@io_bazel_rules_go//go/config:race "--test_env=GORACE=halt_on_error=1 log_path=stdout" test:test --test_env=TZ= # Note: these timeout values are used indirectly in `build/teamcity/cockroach/ci/tests/testrace_impl.sh`. # If those values are updated, the script should be updated accordingly. @@ -127,6 +127,7 @@ build:engflowbase --extra_execution_platforms=//build/toolchains:cross_linux build:engflowbase --remote_upload_local_results=false build:engflowbase --remote_download_toplevel test:engflowbase --test_env=REMOTE_EXEC=1 +test:engflowbase --test_env=GOTRACEBACK=all build:engflow --config=engflowbase build:engflow --remote_cache=grpcs://tanzanite.cluster.engflow.com build:engflow --remote_executor=grpcs://tanzanite.cluster.engflow.com diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6c2c05836cd0..e714d3b78898 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -530,6 +530,7 @@ /pkg/scheduledjobs/ @cockroachdb/jobs-prs @cockroachdb/disaster-recovery /pkg/security/ @cockroachdb/prodsec @cockroachdb/server-prs /pkg/security/clientsecopts/ @cockroachdb/sql-foundations @cockroachdb/prodsec +/pkg/ccl/securityccl/ @cockroachdb/prodsec #!/pkg/settings/ @cockroachdb/unowned /pkg/spanconfig/ @cockroachdb/kv-prs @cockroachdb/sql-foundations /pkg/spanconfig/spanconfigbounds/ @cockroachdb/sql-foundations diff --git a/WORKSPACE b/WORKSPACE index b4ea4c1f3d70..ba448a30e701 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -645,4 +645,14 @@ go_download_sdk( }, urls = ["https://storage.googleapis.com/public-bazel-artifacts/go/20231019-214851/{}"], version = "1.21.3fips", + # In the golang-fips toolchain, FIPS-ready crypto packages are used by default, regardless of build tags. + # The boringcrypto experiment does almost nothing in this toolchain, but it does enable the use of the + # crypto/boring.Enabled() method which is the only application-visible way to inspect whether FIPS mode + # is working correctly. + # + # The golang-fips toolchain also supports an experiment `strictfipsruntime` which causes a panic at startup + # if the kernel is in FIPS mode but OpenSSL cannot be loaded. We do not currently use this experiment + # because A) we also want to detect the case when the kernel is not in FIPS mode and B) we want to be + # able to provide additional diagnostic information such as the expected version of OpenSSL. + experiments = ["boringcrypto"], ) diff --git a/build/teamcity/cockroach/ci/tests/testrace_impl.sh b/build/teamcity/cockroach/ci/tests/testrace_impl.sh index 0b8f1346646d..cc23da9ceeab 100755 --- a/build/teamcity/cockroach/ci/tests/testrace_impl.sh +++ b/build/teamcity/cockroach/ci/tests/testrace_impl.sh @@ -32,6 +32,7 @@ do $(bazel info bazel-bin --config=ci)/pkg/cmd/bazci/bazci_/bazci -- test --config=ci --config=race "$test" \ --test_env=COCKROACH_LOGIC_TESTS_SKIP=true \ --test_timeout $timeout \ + --test_sharding_strategy=disabled \ --test_env=GOMAXPROCS=8 done done diff --git a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh index 567fcedf4160..cced5d559bcb 100755 --- a/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh +++ b/build/teamcity/cockroach/nightlies/pebble_nightly_metamorphic_race_impl.sh @@ -20,6 +20,7 @@ $BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci --formatter=pebble-metamorphic -- test --c @com_github_cockroachdb_pebble//internal/metamorphic:metamorphic_test \ --test_env TC_SERVER_URL=$TC_SERVER_URL \ --test_timeout=14400 '--test_filter=TestMeta$' \ + --test_sharding_strategy=disabled \ --define gotags=bazel,invariants \ --run_under "@com_github_cockroachdb_stress//:stress -bazel -shardable-artifacts 'XML_OUTPUT_FILE=$BAZEL_BIN/pkg/cmd/bazci/bazci_/bazci merge-test-xmls' -maxtime 3h -maxfails 1 -timeout 30m -stderr -p 1" \ --test_arg -dir --test_arg $ARTIFACTS_DIR \ diff --git a/build/teamcity/cockroach/nightlies/stress_engflow.sh b/build/teamcity/cockroach/nightlies/stress_engflow.sh index 339be14c5ee1..1d9605236941 100755 --- a/build/teamcity/cockroach/nightlies/stress_engflow.sh +++ b/build/teamcity/cockroach/nightlies/stress_engflow.sh @@ -6,4 +6,6 @@ export EXTRA_TEST_ARGS="--config use_ci_timeouts" THIS_DIR=$(cd "$(dirname "$0")" && pwd) +unset GITHUB_API_TOKEN + $THIS_DIR/stress_engflow_impl.sh diff --git a/build/teamcity/cockroach/nightlies/stress_engflow_deadlock.sh b/build/teamcity/cockroach/nightlies/stress_engflow_deadlock.sh index 640d7883539b..f7e77915ca29 100755 --- a/build/teamcity/cockroach/nightlies/stress_engflow_deadlock.sh +++ b/build/teamcity/cockroach/nightlies/stress_engflow_deadlock.sh @@ -2,8 +2,11 @@ set -euo pipefail -export EXTRA_TEST_ARGS="--define gotags=bazel,gss,deadlock" +export RUNS_PER_TEST=3 +export EXTRA_TEST_ARGS="--define gotags=bazel,gss,deadlock --test_timeout=1800,3600,5395,5395" THIS_DIR=$(cd "$(dirname "$0")" && pwd) +unset GITHUB_API_TOKEN + $THIS_DIR/stress_engflow_impl.sh diff --git a/build/teamcity/cockroach/nightlies/stress_engflow_race.sh b/build/teamcity/cockroach/nightlies/stress_engflow_race.sh index 5956fe48fcf8..237ea9a3648f 100755 --- a/build/teamcity/cockroach/nightlies/stress_engflow_race.sh +++ b/build/teamcity/cockroach/nightlies/stress_engflow_race.sh @@ -2,9 +2,11 @@ set -euo pipefail -export RUNS_PER_TEST=5 -export EXTRA_TEST_ARGS="--@io_bazel_rules_go//go/config:race --test_env=GORACE=halt_on_error=1" +export RUNS_PER_TEST=3 +export EXTRA_TEST_ARGS="--config=race --test_timeout=1800,3600,5395,5395" THIS_DIR=$(cd "$(dirname "$0")" && pwd) +unset GITHUB_API_TOKEN + $THIS_DIR/stress_engflow_impl.sh diff --git a/dev b/dev index 26b16dd984a3..0c6e90239821 100755 --- a/dev +++ b/dev @@ -8,7 +8,7 @@ fi set -euo pipefail # Bump this counter to force rebuilding `dev` on all machines. -DEV_VERSION=89 +DEV_VERSION=90 THIS_DIR=$(cd "$(dirname "$0")" && pwd) BINARY_DIR=$THIS_DIR/bin/dev-versions diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 2ba242228746..1294efd8a28b 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -895,6 +895,7 @@ GO_TARGETS = [ "//pkg/ccl/pgcryptoccl:pgcryptoccl_test", "//pkg/ccl/schemachangerccl:schemachangerccl", "//pkg/ccl/schemachangerccl:schemachangerccl_test", + "//pkg/ccl/securityccl/fipsccl:fipsccl", "//pkg/ccl/serverccl/adminccl:adminccl_test", "//pkg/ccl/serverccl/diagnosticsccl:diagnosticsccl_test", "//pkg/ccl/serverccl/statusccl:statusccl_test", diff --git a/pkg/ccl/BUILD.bazel b/pkg/ccl/BUILD.bazel index b86474e903d9..a324ae2bb115 100644 --- a/pkg/ccl/BUILD.bazel +++ b/pkg/ccl/BUILD.bazel @@ -21,6 +21,7 @@ go_library( "//pkg/ccl/oidcccl", "//pkg/ccl/partitionccl", "//pkg/ccl/pgcryptoccl", + "//pkg/ccl/securityccl/fipsccl", "//pkg/ccl/storageccl", "//pkg/ccl/storageccl/engineccl", "//pkg/ccl/streamingccl/streamingest", diff --git a/pkg/ccl/ccl_init.go b/pkg/ccl/ccl_init.go index 4a224d5cd10c..ca925de44dd5 100644 --- a/pkg/ccl/ccl_init.go +++ b/pkg/ccl/ccl_init.go @@ -27,6 +27,7 @@ import ( _ "github.com/cockroachdb/cockroach/pkg/ccl/oidcccl" _ "github.com/cockroachdb/cockroach/pkg/ccl/partitionccl" _ "github.com/cockroachdb/cockroach/pkg/ccl/pgcryptoccl" + _ "github.com/cockroachdb/cockroach/pkg/ccl/securityccl/fipsccl" _ "github.com/cockroachdb/cockroach/pkg/ccl/storageccl" _ "github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl" _ "github.com/cockroachdb/cockroach/pkg/ccl/streamingccl/streamingest" diff --git a/pkg/ccl/cliccl/BUILD.bazel b/pkg/ccl/cliccl/BUILD.bazel index f4a428830232..71116c967f9d 100644 --- a/pkg/ccl/cliccl/BUILD.bazel +++ b/pkg/ccl/cliccl/BUILD.bazel @@ -20,16 +20,19 @@ go_library( "//pkg/base", "//pkg/ccl/baseccl", "//pkg/ccl/cliccl/cliflagsccl", + "//pkg/ccl/securityccl/fipsccl", "//pkg/ccl/sqlproxyccl", "//pkg/ccl/sqlproxyccl/tenantdirsvr", "//pkg/ccl/storageccl/engineccl/enginepbccl", "//pkg/ccl/utilccl", "//pkg/ccl/workloadccl/cliccl", "//pkg/cli", + "//pkg/cli/clierror", "//pkg/cli/clierrorplus", "//pkg/cli/cliflagcfg", "//pkg/cli/cliflags", "//pkg/cli/democluster", + "//pkg/cli/exit", "//pkg/storage", "//pkg/storage/enginepb", "//pkg/util/log", @@ -41,7 +44,9 @@ go_library( "@com_github_cockroachdb_errors//oserror", "@com_github_cockroachdb_pebble//vfs", "@com_github_cockroachdb_redact//:redact", + "@com_github_olekukonko_tablewriter//:tablewriter", "@com_github_spf13_cobra//:cobra", + "@com_github_spf13_pflag//:pflag", ], ) diff --git a/pkg/ccl/cliccl/debug.go b/pkg/ccl/cliccl/debug.go index 3fa0464cef8e..22be71676a43 100644 --- a/pkg/ccl/cliccl/debug.go +++ b/pkg/ccl/cliccl/debug.go @@ -14,12 +14,14 @@ import ( "fmt" "os" "path/filepath" + "runtime" "sort" "time" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/ccl/baseccl" "github.com/cockroachdb/cockroach/pkg/ccl/cliccl/cliflagsccl" + "github.com/cockroachdb/cockroach/pkg/ccl/securityccl/fipsccl" "github.com/cockroachdb/cockroach/pkg/ccl/storageccl/engineccl/enginepbccl" "github.com/cockroachdb/cockroach/pkg/cli" "github.com/cockroachdb/cockroach/pkg/cli/clierrorplus" @@ -31,6 +33,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/cockroachdb/errors/oserror" + "github.com/olekukonko/tablewriter" "github.com/spf13/cobra" ) @@ -102,12 +105,24 @@ with their env type and encryption settings (if applicable). RunE: clierrorplus.MaybeDecorateError(runList), } + checkFipsCmd := &cobra.Command{ + Use: "enterprise-check-fips", + Short: "print diagnostics for FIPS-ready configuration", + Long: ` +Performs various tests of this binary's ability to operate in FIPS-ready +mode in the current environment. +`, + + RunE: clierrorplus.MaybeDecorateError(runCheckFips), + } + // Add commands to the root debug command. // We can't add them to the lists of commands (eg: DebugCmdsForPebble) as cli init() is called before us. cli.DebugCmd.AddCommand(encryptionStatusCmd) cli.DebugCmd.AddCommand(encryptionActiveKeyCmd) cli.DebugCmd.AddCommand(encryptionDecryptCmd) cli.DebugCmd.AddCommand(encryptionRegistryList) + cli.DebugCmd.AddCommand(checkFipsCmd) // Add the encryption flag to commands that need it. // For the encryption-status command. @@ -376,3 +391,44 @@ func getActiveEncryptionkey(dir string) (string, string, error) { return setting.EncryptionType.String(), setting.KeyId, nil } + +func runCheckFips(cmd *cobra.Command, args []string) error { + if runtime.GOOS != "linux" { + return errors.New("FIPS-ready mode is only supported on linux") + } + // Our FIPS-ready deployments have three major requirements: + // 1. This binary is built with the golang-fips toolchain and running on linux + // 2. FIPS mode is enabled in the kernel. + // 3. We can dynamically load the OpenSSL library (which must be the same major version that was present at + // build time). Verifying that the OpenSSL library is FIPS-compliant is outside the scope of this command. + table := tablewriter.NewWriter(os.Stdout) + table.SetBorder(false) + table.SetAlignment(tablewriter.ALIGN_LEFT) + emit := func(label string, status bool, detail string) { + statusSymbol := "❌" + if status { + statusSymbol = "✅" + } + table.Append([]string{label, statusSymbol, detail}) + } + + emit("FIPS-ready build", fipsccl.IsCompileTimeFIPSReady(), "") + buildOpenSSLVersion, soname, err := fipsccl.BuildOpenSSLVersion() + if err == nil { + table.Append([]string{"Build-time OpenSSL Version", "", buildOpenSSLVersion}) + table.Append([]string{"OpenSSL library filename", "", soname}) + } + + isKernelEnabled, err := fipsccl.IsKernelEnabled() + detail := "" + if err != nil { + detail = err.Error() + } + emit("Kernel FIPS mode enabled", isKernelEnabled, detail) + + emit("OpenSSL loaded", fipsccl.IsOpenSSLLoaded(), "") + emit("FIPS ready", fipsccl.IsFIPSReady(), "") + + table.Render() + return nil +} diff --git a/pkg/ccl/cliccl/flags.go b/pkg/ccl/cliccl/flags.go index 58bd3e8d99ba..0c19bd81e0b5 100644 --- a/pkg/ccl/cliccl/flags.go +++ b/pkg/ccl/cliccl/flags.go @@ -9,11 +9,59 @@ package cliccl import ( + "os" + "strconv" + + "github.com/cockroachdb/cockroach/pkg/ccl/securityccl/fipsccl" "github.com/cockroachdb/cockroach/pkg/cli" + "github.com/cockroachdb/cockroach/pkg/cli/clierror" "github.com/cockroachdb/cockroach/pkg/cli/cliflagcfg" "github.com/cockroachdb/cockroach/pkg/cli/cliflags" + "github.com/cockroachdb/cockroach/pkg/cli/exit" + "github.com/cockroachdb/errors" + "github.com/spf13/pflag" ) +type requireFipsFlag bool + +// Type implements the pflag.Value interface. +func (f *requireFipsFlag) Type() string { + return "bool" +} + +// String implements the pflag.Value interface. +func (f *requireFipsFlag) String() string { + return strconv.FormatBool(bool(*f)) +} + +// Set implements the pflag.Value interface. +func (f *requireFipsFlag) Set(s string) error { + v, err := strconv.ParseBool(s) + if err != nil { + return err + } + // We implement the logic of this check in the flag setter itself because it + // applies to all commands and we do not have another good way to inject + // this behavior globally (PersistentPreRun functions don't help because + // they are inherited across different levels of the command hierarchy only + // if that level does not have its own hook). + if v && !fipsccl.IsFIPSReady() { + err := errors.WithHint(errors.New("FIPS readiness checks failed"), "Run `cockroach debug enterprise-check-fips` for details") + clierror.OutputError(os.Stderr, err, true, false) + exit.WithCode(exit.UnspecifiedError()) + } + *f = requireFipsFlag(v) + return nil +} + +var _ pflag.Value = (*requireFipsFlag)(nil) + +// IsBoolFlag implements a non-public pflag interface to indicate that this +// flag is used without an explicit value. +func (*requireFipsFlag) IsBoolFlag() bool { + return true +} + func init() { // Multi-tenancy proxy command flags. { @@ -44,4 +92,12 @@ func init() { // Use StringFlagDepth to avoid conflicting with the already registered KVAddrs env var. cliflagcfg.StringFlagDepth(1, f, &testDirectorySvrContext.kvAddrs, cliflags.KVAddrs) }) + + // FIPS verification flags. + cli.RegisterFlags(func() { + cmd := cli.CockroachCmd() + var requireFips = requireFipsFlag(false) + flag := cmd.PersistentFlags().VarPF(&requireFips, "enterprise-require-fips-ready", "", "abort if FIPS readiness checks fail") + flag.NoOptDefVal = "true" + }) } diff --git a/pkg/ccl/logictestccl/testdata/logic_test/fips_ready b/pkg/ccl/logictestccl/testdata/logic_test/fips_ready new file mode 100644 index 000000000000..be6f4294bfa9 --- /dev/null +++ b/pkg/ccl/logictestccl/testdata/logic_test/fips_ready @@ -0,0 +1,14 @@ +subtest fips_ready + +# We do not have the plumbing that would let test cases know whether they are +# running in a fips environment or not so this is just a very basic test to +# make sure that all the registration, oids, etc work properly. +query _ +SELECT crdb_internal.fips_ready() +---- +_ + +user testuser + +statement error pq: crdb_internal\.fips_ready\(\): user testuser does not have VIEWCLUSTERSETTING system privilege +SELECT crdb_internal.fips_ready() diff --git a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go index dba1b80d43e3..0a799ad3904d 100644 --- a/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go +++ b/pkg/ccl/logictestccl/tests/3node-tenant/generated_test.go @@ -2606,6 +2606,13 @@ func TestTenantLogicCCL_crdb_internal_tenant( runCCLLogicTest(t, "crdb_internal_tenant") } +func TestTenantLogicCCL_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestTenantLogicCCL_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel index 2fef49525e31..00ba9ff29601 100644 --- a/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/fakedist-disk/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 6, + shard_count = 7, tags = [ "ccl_test", "cpu:2", diff --git a/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go index ed15a067a5b3..c901c45d24e8 100644 --- a/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go +++ b/pkg/ccl/logictestccl/tests/fakedist-disk/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel index 731a6dba7e27..92498312b1e4 100644 --- a/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/fakedist-vec-off/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 6, + shard_count = 7, tags = [ "ccl_test", "cpu:2", diff --git a/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go index 662396114099..eeda3c15e837 100644 --- a/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go +++ b/pkg/ccl/logictestccl/tests/fakedist-vec-off/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel b/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel index 6240d6f37d3b..63b47e74f0f6 100644 --- a/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/fakedist/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 7, + shard_count = 8, tags = [ "ccl_test", "cpu:2", diff --git a/pkg/ccl/logictestccl/tests/fakedist/generated_test.go b/pkg/ccl/logictestccl/tests/fakedist/generated_test.go index 4781d36ae881..740b8da2992f 100644 --- a/pkg/ccl/logictestccl/tests/fakedist/generated_test.go +++ b/pkg/ccl/logictestccl/tests/fakedist/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel index 779fa2337028..902f07d858a8 100644 --- a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 6, + shard_count = 7, tags = [ "ccl_test", "cpu:1", diff --git a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go index 74fb8bc7ccb8..78c74e88e1d2 100644 --- a/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local-legacy-schema-changer/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local-mixed-23.1/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-mixed-23.1/BUILD.bazel index 322c935e4c17..76267f75c810 100644 --- a/pkg/ccl/logictestccl/tests/local-mixed-23.1/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/local-mixed-23.1/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 7, + shard_count = 8, tags = [ "ccl_test", "cpu:1", diff --git a/pkg/ccl/logictestccl/tests/local-mixed-23.1/generated_test.go b/pkg/ccl/logictestccl/tests/local-mixed-23.1/generated_test.go index d6082e807060..5edb77cdcb07 100644 --- a/pkg/ccl/logictestccl/tests/local-mixed-23.1/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local-mixed-23.1/generated_test.go @@ -85,6 +85,13 @@ func TestCCLLogic_changefeed( runCCLLogicTest(t, "changefeed") } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local-mixed-23.2/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-mixed-23.2/BUILD.bazel index f962607f0235..a8c7ebea2699 100644 --- a/pkg/ccl/logictestccl/tests/local-mixed-23.2/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/local-mixed-23.2/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 6, + shard_count = 7, tags = [ "ccl_test", "cpu:1", diff --git a/pkg/ccl/logictestccl/tests/local-mixed-23.2/generated_test.go b/pkg/ccl/logictestccl/tests/local-mixed-23.2/generated_test.go index 83d04f190498..571b77d39762 100644 --- a/pkg/ccl/logictestccl/tests/local-mixed-23.2/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local-mixed-23.2/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel b/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel index 97bcb27684b8..62f452f8d75d 100644 --- a/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/local-vec-off/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 6, + shard_count = 7, tags = [ "ccl_test", "cpu:1", diff --git a/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go b/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go index eae4fccbd6fb..d4243389a683 100644 --- a/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local-vec-off/generated_test.go @@ -78,6 +78,13 @@ func TestLogic_tmp(t *testing.T) { logictest.RunLogicTests(t, logictest.TestServerArgs{}, configIdx, glob) } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/logictestccl/tests/local/BUILD.bazel b/pkg/ccl/logictestccl/tests/local/BUILD.bazel index 70300c42c6f5..4956870d306f 100644 --- a/pkg/ccl/logictestccl/tests/local/BUILD.bazel +++ b/pkg/ccl/logictestccl/tests/local/BUILD.bazel @@ -9,7 +9,7 @@ go_test( "//pkg/ccl/logictestccl:testdata", # keep ], exec_properties = {"Pool": "large"}, - shard_count = 20, + shard_count = 21, tags = [ "ccl_test", "cpu:1", diff --git a/pkg/ccl/logictestccl/tests/local/generated_test.go b/pkg/ccl/logictestccl/tests/local/generated_test.go index dd66a1be3062..096c7c496613 100644 --- a/pkg/ccl/logictestccl/tests/local/generated_test.go +++ b/pkg/ccl/logictestccl/tests/local/generated_test.go @@ -120,6 +120,13 @@ func TestCCLLogic_explain_redact( runCCLLogicTest(t, "explain_redact") } +func TestCCLLogic_fips_ready( + t *testing.T, +) { + defer leaktest.AfterTest(t)() + runCCLLogicTest(t, "fips_ready") +} + func TestCCLLogic_new_schema_changer( t *testing.T, ) { diff --git a/pkg/ccl/securityccl/fipsccl/BUILD.bazel b/pkg/ccl/securityccl/fipsccl/BUILD.bazel new file mode 100644 index 000000000000..c71c275b068e --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/BUILD.bazel @@ -0,0 +1,26 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "fipsccl", + srcs = [ + "build_boring.go", # keep + "build_noboring.go", + "fips_linux.go", + "fips_nolinux.go", + "sql.go", + ], + cgo = True, + importpath = "github.com/cockroachdb/cockroach/pkg/ccl/securityccl/fipsccl", + visibility = ["//visibility:public"], + deps = [ + "//pkg/ccl/utilccl", + "//pkg/sql/privilege", + "//pkg/sql/roleoption", + "//pkg/sql/sem/eval", + "//pkg/sql/sem/tree", + "//pkg/sql/sem/volatility", + "//pkg/sql/syntheticprivilege", + "//pkg/sql/types", + "@com_github_cockroachdb_errors//:errors", + ], +) diff --git a/pkg/ccl/securityccl/fipsccl/build_boring.go b/pkg/ccl/securityccl/fipsccl/build_boring.go new file mode 100644 index 000000000000..011928745400 --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/build_boring.go @@ -0,0 +1,72 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt +// +//go:build boringcrypto + +package fipsccl + +/* +#include + +static unsigned long _fipsccl_openssl_version_number() { + return OPENSSL_VERSION_NUMBER; +} +*/ +import "C" + +import ( + "crypto/boring" + "fmt" +) + +// IsCompileTimeFIPSReady returns true if this binary was built with correct +// toolchain and options, which is a prerequisite for FIPS-ready mode. +// Note that we only support the golang-fips toolchain even though the +// build tag we test for is "boringcrypto". The two are not actually +// compatible because crypto/boring.Enabled is a bool in one and a function +// in the other. +func IsCompileTimeFIPSReady() bool { + return true +} + +// IsOpenSSLLoaded returns true if the OpenSSL library has been found and +// loaded. +func IsOpenSSLLoaded() bool { + return boring.Enabled() +} + +// IsFIPSReady returns true if all of our FIPS readiness checks succeed. +func IsFIPSReady() bool { + // The golang-fips toolchain only attempts to load OpenSSL if the kernel + // fips mode is enabled. Therefore we only need this single check for our + // overall fips-readiness status. We could redundantly call IsBoringBuild + // and IsKernelEnabled, but doing so would risk some divergence between our + // implementation and the toolchain itself so it's better at this time to + // use the single check. + return IsOpenSSLLoaded() +} + +// BuildOpenSSLVersion returns the version number of OpenSSL that was used at +// build time. The first return value is the hex value of the +// OPENSSL_VERSION_NUMBER constant (for example, 10100000 for OpenSSL 1.1 and +// 30000000 for OpenSSL 3.0), and the second is the versioned name of the +// libcrypto.so file. +func BuildOpenSSLVersion() (string, string, error) { + buildVersion := uint64(C._fipsccl_openssl_version_number()) + var soname string + // Reference: + // https://github.com/golang-fips/go/blob/7f64529ab80e5d394bb2496e982d6f6e11023902/patches/001-initial-openssl-for-fips.patch#L3476-L3482 + if buildVersion < 0x10100000 { + soname = "libcrypto.so.10" + } else if buildVersion < 0x30000000 { + soname = "libcrypto.so.1.1" + } else { + soname = "libcrypto.so.3" + } + return fmt.Sprintf("%x", buildVersion), soname, nil +} diff --git a/pkg/ccl/securityccl/fipsccl/build_noboring.go b/pkg/ccl/securityccl/fipsccl/build_noboring.go new file mode 100644 index 000000000000..e80044530ff2 --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/build_noboring.go @@ -0,0 +1,29 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt +// +//go:build !boringcrypto + +package fipsccl + +import "github.com/cockroachdb/errors" + +func IsCompileTimeFIPSReady() bool { + return false +} + +func IsOpenSSLLoaded() bool { + return false +} + +func IsFIPSReady() bool { + return false +} + +func BuildOpenSSLVersion() (string, string, error) { + return "", "", errors.New("openssl support not present") +} diff --git a/pkg/ccl/securityccl/fipsccl/fips_linux.go b/pkg/ccl/securityccl/fipsccl/fips_linux.go new file mode 100644 index 000000000000..6d841573bcbd --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/fips_linux.go @@ -0,0 +1,34 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +package fipsccl + +import ( + "fmt" + "os" + + "github.com/cockroachdb/errors" +) + +const fipsSysctlFilename = "/proc/sys/crypto/fips_enabled" + +// IsKernelEnabled returns true if FIPS mode is enabled in the kernel +// (by reading the crypto.fips_enabled sysctl). +func IsKernelEnabled() (bool, error) { + data, err := os.ReadFile(fipsSysctlFilename) + if err != nil { + return false, err + } + if len(data) == 0 { + return false, errors.New("sysctl file empty") + } + if data[0] == '1' { + return true, nil + } + return false, fmt.Errorf("sysctl value: %q", data) +} diff --git a/pkg/ccl/securityccl/fipsccl/fips_nolinux.go b/pkg/ccl/securityccl/fipsccl/fips_nolinux.go new file mode 100644 index 000000000000..247548b03b25 --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/fips_nolinux.go @@ -0,0 +1,17 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt +// +//go:build !linux + +package fipsccl + +import "github.com/cockroachdb/errors" + +func IsKernelEnabled() (bool, error) { + return false, errors.New("only supported on linux") +} diff --git a/pkg/ccl/securityccl/fipsccl/sql.go b/pkg/ccl/securityccl/fipsccl/sql.go new file mode 100644 index 000000000000..5b76ca4c53ad --- /dev/null +++ b/pkg/ccl/securityccl/fipsccl/sql.go @@ -0,0 +1,64 @@ +// Copyright 2023 The Cockroach Authors. +// +// Licensed as a CockroachDB Enterprise file under the Cockroach Community +// License (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt + +package fipsccl + +import ( + "context" + + "github.com/cockroachdb/cockroach/pkg/ccl/utilccl" + "github.com/cockroachdb/cockroach/pkg/sql/privilege" + "github.com/cockroachdb/cockroach/pkg/sql/roleoption" + "github.com/cockroachdb/cockroach/pkg/sql/sem/eval" + "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" + "github.com/cockroachdb/cockroach/pkg/sql/sem/volatility" + "github.com/cockroachdb/cockroach/pkg/sql/syntheticprivilege" + "github.com/cockroachdb/cockroach/pkg/sql/types" +) + +func init() { + overload := tree.Overload{ + Types: tree.ParamTypes{}, + ReturnType: tree.FixedReturnType(types.Bool), + Fn: func(ctx context.Context, evalCtx *eval.Context, args tree.Datums) (tree.Datum, error) { + if err := utilccl.CheckEnterpriseEnabled( + evalCtx.Settings, evalCtx.ClusterID, "fips_ready", + ); err != nil { + return nil, err + } + // It's debatable whether we need a permission check here at all. + // It's not very sensitive and is (currently) a very cheap function + // call. However, it's something that regular users should have no + // reason to look at so in the interest of least privilege we put it + // behind the VIEWCLUSTERSETTING privilige. + session := evalCtx.SessionAccessor + isAdmin, err := session.HasAdminRole(ctx) + if err != nil { + return nil, err + } + if !isAdmin { + hasView, err := session.HasRoleOption(ctx, roleoption.VIEWCLUSTERSETTING) + if err != nil { + return nil, err + } + if !hasView { + if err := session.CheckPrivilege(ctx, syntheticprivilege.GlobalPrivilegeObject, privilege.VIEWCLUSTERSETTING); err != nil { + return nil, err + } + } + } + return tree.MakeDBool(tree.DBool(IsFIPSReady())), nil + }, + Class: tree.NormalClass, + Volatility: volatility.Stable, + } + + utilccl.RegisterCCLBuiltin("crdb_internal.fips_ready", + `Returns true if all FIPS readiness checks pass.`, + overload) +} diff --git a/pkg/cli/cli.go b/pkg/cli/cli.go index 7beb18e35f4a..ec1ff3f14f79 100644 --- a/pkg/cli/cli.go +++ b/pkg/cli/cli.go @@ -228,6 +228,11 @@ var cockroachCmd = &cobra.Command{ }, } +// CockroachCmd returns the root cockroach Command object. +func CockroachCmd() *cobra.Command { + return cockroachCmd +} + var workloadCmd = workloadcli.WorkloadCmd(true /* userFacing */) func init() { diff --git a/pkg/cmd/dev/test.go b/pkg/cmd/dev/test.go index a58c739b0bd4..32213de5ff57 100644 --- a/pkg/cmd/dev/test.go +++ b/pkg/cmd/dev/test.go @@ -233,7 +233,7 @@ func (d *dev) test(cmd *cobra.Command, commandLine []string) error { args = append(args, fmt.Sprintf("--local_cpu_resources=%d", numCPUs)) } if race { - args = append(args, "--config=race") + args = append(args, "--config=race", "--test_sharding_strategy=disabled") } if deadlock { goTags = append(goTags, "deadlock") diff --git a/pkg/cmd/dev/testdata/recorderdriven/test b/pkg/cmd/dev/testdata/recorderdriven/test index 4890494d6456..d9e52bc40b56 100644 --- a/pkg/cmd/dev/testdata/recorderdriven/test +++ b/pkg/cmd/dev/testdata/recorderdriven/test @@ -47,7 +47,7 @@ bazel test //pkg/testutils:testutils_test //pkg/util/limit:limit_test //pkg/util dev test pkg/spanconfig --count 5 --race ---- bazel query 'kind(.*_test, pkg/spanconfig:all)' -bazel test --config=race //pkg/spanconfig:spanconfig_test --test_env=GOTRACEBACK=all --runs_per_test=5 '--runs_per_test=.*disallowed_imports_test@1' --test_output errors --build_event_binary_file=/tmp/path +bazel test --config=race --test_sharding_strategy=disabled //pkg/spanconfig:spanconfig_test --test_env=GOTRACEBACK=all --runs_per_test=5 '--runs_per_test=.*disallowed_imports_test@1' --test_output errors --build_event_binary_file=/tmp/path dev test pkg/cmd/dev -f TestDataDriven/test --rewrite -v ---- diff --git a/pkg/cmd/dev/testdata/recorderdriven/test.rec b/pkg/cmd/dev/testdata/recorderdriven/test.rec index 9913149d613d..98fc6e62e773 100644 --- a/pkg/cmd/dev/testdata/recorderdriven/test.rec +++ b/pkg/cmd/dev/testdata/recorderdriven/test.rec @@ -79,7 +79,7 @@ bazel query 'kind(.*_test, pkg/spanconfig:all)' ---- //pkg/spanconfig:spanconfig_test -bazel test --config=race //pkg/spanconfig:spanconfig_test --test_env=GOTRACEBACK=all --runs_per_test=5 '--runs_per_test=.*disallowed_imports_test@1' --test_output errors --build_event_binary_file=/tmp/path +bazel test --config=race --test_sharding_strategy=disabled //pkg/spanconfig:spanconfig_test --test_env=GOTRACEBACK=all --runs_per_test=5 '--runs_per_test=.*disallowed_imports_test@1' --test_output errors --build_event_binary_file=/tmp/path ---- bazel query 'kind(.*_test, pkg/cmd/dev:all)' diff --git a/pkg/cmd/github-pull-request-make/main.go b/pkg/cmd/github-pull-request-make/main.go index 7f0800e801a0..27ec703788a3 100644 --- a/pkg/cmd/github-pull-request-make/main.go +++ b/pkg/cmd/github-pull-request-make/main.go @@ -345,9 +345,8 @@ func main() { args = append(args, "--") if target == "stressrace" { args = append(args, "--config=race") - } else { - args = append(args, "--test_sharding_strategy=disabled") } + args = append(args, "--test_sharding_strategy=disabled") var filters []string for test := range pkg.tests { filters = append(filters, "^"+test+"$") diff --git a/pkg/kv/kvclient/kvcoord/BUILD.bazel b/pkg/kv/kvclient/kvcoord/BUILD.bazel index b6e008b26605..be3f610669ea 100644 --- a/pkg/kv/kvclient/kvcoord/BUILD.bazel +++ b/pkg/kv/kvclient/kvcoord/BUILD.bazel @@ -143,6 +143,7 @@ go_test( "txn_correctness_test.go", "txn_interceptor_committer_test.go", "txn_interceptor_heartbeater_test.go", + "txn_interceptor_metric_recorder_test.go", "txn_interceptor_pipeliner_client_test.go", "txn_interceptor_pipeliner_test.go", "txn_interceptor_seq_num_allocator_test.go", diff --git a/pkg/kv/kvclient/kvcoord/txn_coord_sender.go b/pkg/kv/kvclient/kvcoord/txn_coord_sender.go index ad17a52ee111..3099c812a703 100644 --- a/pkg/kv/kvclient/kvcoord/txn_coord_sender.go +++ b/pkg/kv/kvclient/kvcoord/txn_coord_sender.go @@ -25,6 +25,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/cockroachdb/logtags" @@ -258,9 +259,9 @@ func newRootTxnCoordSender( mu: &tcs.mu.Mutex, } tcs.interceptorAlloc.txnMetricRecorder = txnMetricRecorder{ - metrics: &tcs.metrics, - clock: tcs.clock, - txn: &tcs.mu.txn, + metrics: &tcs.metrics, + timeSource: timeutil.DefaultTimeSource{}, + txn: &tcs.mu.txn, } tcs.initCommonInterceptors(tcf, txn, kv.RootTxn) diff --git a/pkg/kv/kvclient/kvcoord/txn_coord_sender_savepoints.go b/pkg/kv/kvclient/kvcoord/txn_coord_sender_savepoints.go index 9fab3b349f81..af84ff920636 100644 --- a/pkg/kv/kvclient/kvcoord/txn_coord_sender_savepoints.go +++ b/pkg/kv/kvclient/kvcoord/txn_coord_sender_savepoints.go @@ -43,6 +43,9 @@ type savepoint struct { seqNum enginepb.TxnSeq // txnSpanRefresher fields. + // TODO(mira): after we remove + // kv.transaction.keep_refresh_spans_on_savepoint_rollback.enabled, we won't + // need these two fields anymore. refreshSpans []roachpb.Span refreshInvalid bool } diff --git a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go index 86c0d8aed0ad..f1c387cc0a65 100644 --- a/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go +++ b/pkg/kv/kvclient/kvcoord/txn_coord_sender_test.go @@ -2997,3 +2997,63 @@ func TestTxnSetIsoLevel(t *testing.T) { require.Equal(t, prev, txn.IsoLevel()) } } + +// TestRefreshWithSavepoint is an integration test that ensures the correct +// behavior of refreshes under savepoint rollback. The test sets up a write-skew +// example where txn1 reads keyA and writes to keyB, while concurrently txn2 +// reads keyB and writes to keyA. The two txns can't be serialized so one is +// expected to get a serialization error upon commit. +// +// However, with the old behavior of discarding refresh spans upon savepoint +// rollback, the read corresponding to the discarded refresh span is not +// refreshed, so the conflict goes unnoticed and both txns commit successfully. +// See #111228 for more details. +func TestRefreshWithSavepoint(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + testutils.RunTrueAndFalse(t, "keep-refresh-spans", func(t *testing.T, keepRefreshSpans bool) { + s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{}) + ctx := context.Background() + defer s.Stopper().Stop(context.Background()) + + if keepRefreshSpans { + kvcoord.KeepRefreshSpansOnSavepointRollback.Override(ctx, &s.ClusterSettings().SV, true) + } else { + kvcoord.KeepRefreshSpansOnSavepointRollback.Override(ctx, &s.ClusterSettings().SV, false) + } + + keyA := roachpb.Key("a") + keyB := roachpb.Key("b") + txn1 := kvDB.NewTxn(ctx, "txn1") + txn2 := kvDB.NewTxn(ctx, "txn2") + + spt1, err := txn1.CreateSavepoint(ctx) + require.NoError(t, err) + + _, err = txn1.Get(ctx, keyA) + require.NoError(t, err) + + err = txn1.RollbackToSavepoint(ctx, spt1) + require.NoError(t, err) + + _, err = txn2.Get(ctx, keyB) + require.NoError(t, err) + + err = txn1.Put(ctx, keyB, "bb") + require.NoError(t, err) + + err = txn2.Put(ctx, keyA, "aa") + require.NoError(t, err) + + err = txn1.Commit(ctx) + if keepRefreshSpans { + require.Regexp(t, ".*RETRY_SERIALIZABLE - failed preemptive refresh due to conflicting locks on \"a\"*", err) + } else { + require.NoError(t, err) + } + + err = txn2.Commit(ctx) + require.NoError(t, err) + }) +} diff --git a/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder.go b/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder.go index 3fd2b42bcee4..ac957ea1bce2 100644 --- a/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder.go +++ b/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder.go @@ -12,10 +12,10 @@ package kvcoord import ( "context" + "time" "github.com/cockroachdb/cockroach/pkg/kv/kvpb" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/timeutil" ) @@ -23,15 +23,13 @@ import ( // the behavior and outcome of a transaction. It records information about the // requests that a transaction sends and updates counters and histograms when // the transaction completes. -// -// TODO(nvanbenschoten): Unit test this file. type txnMetricRecorder struct { - wrapped lockedSender - metrics *TxnMetrics - clock *hlc.Clock + wrapped lockedSender + metrics *TxnMetrics + timeSource timeutil.TimeSource txn *roachpb.Transaction - txnStartNanos int64 + txnStart time.Time onePCCommit bool parallelCommit bool } @@ -40,8 +38,8 @@ type txnMetricRecorder struct { func (m *txnMetricRecorder) SendLocked( ctx context.Context, ba *kvpb.BatchRequest, ) (*kvpb.BatchResponse, *kvpb.Error) { - if m.txnStartNanos == 0 { - m.txnStartNanos = timeutil.Now().UnixNano() + if m.txnStart.IsZero() { + m.txnStart = m.timeSource.Now() } br, pErr := m.wrapped.SendLocked(ctx, ba) @@ -93,10 +91,10 @@ func (m *txnMetricRecorder) closeLocked() { m.metrics.ParallelCommits.Inc(1) } - if m.txnStartNanos != 0 { - duration := timeutil.Now().UnixNano() - m.txnStartNanos - if duration >= 0 { - m.metrics.Durations.RecordValue(duration) + if !m.txnStart.IsZero() { + dur := m.timeSource.Since(m.txnStart) + if dur >= 0 { + m.metrics.Durations.RecordValue(dur.Nanoseconds()) } } restarts := int64(m.txn.Epoch) diff --git a/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder_test.go b/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder_test.go new file mode 100644 index 000000000000..fda5d049f337 --- /dev/null +++ b/pkg/kv/kvclient/kvcoord/txn_interceptor_metric_recorder_test.go @@ -0,0 +1,223 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package kvcoord + +import ( + "context" + "testing" + + "github.com/cockroachdb/cockroach/pkg/kv/kvpb" + "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/leaktest" + "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/metric" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeMockTxnMetricRecorder( + txn *roachpb.Transaction, +) (txnMetricRecorder, *mockLockedSender, *timeutil.ManualTime) { + mockSender := &mockLockedSender{} + metrics := MakeTxnMetrics(metric.TestSampleInterval) + timeSource := timeutil.NewManualTime(timeutil.Unix(0, 123)) + return txnMetricRecorder{ + wrapped: mockSender, + metrics: &metrics, + timeSource: timeSource, + txn: txn, + }, mockSender, timeSource +} + +func TestTxnMetricRecorder(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + type metrics struct { + aborts, commits, commits1PC, parallelCommits, rollbacksFailed, duration, restarts int + } + check := func(t *testing.T, tm *txnMetricRecorder, m metrics) { + t.Helper() + assert.Equal(t, int64(m.aborts), tm.metrics.Aborts.Count(), "TxnMetrics.Aborts") + assert.Equal(t, int64(m.commits), tm.metrics.Commits.Count(), "TxnMetrics.Commits") + assert.Equal(t, int64(m.commits1PC), tm.metrics.Commits1PC.Count(), "TxnMetrics.Commits1PC") + assert.Equal(t, int64(m.parallelCommits), tm.metrics.ParallelCommits.Count(), "TxnMetrics.ParallelCommits") + assert.Equal(t, int64(m.rollbacksFailed), tm.metrics.RollbacksFailed.Count(), "TxnMetrics.RollbacksFailed") + // NOTE: histograms don't retain full precision, so we don't check the exact + // value. We just check whether the value is non-zero. + _, sum := tm.metrics.Durations.Total() + assert.Equal(t, m.duration != 0, sum != 0, "TxnMetrics.Durations") + _, sum = tm.metrics.Restarts.Total() + assert.Equal(t, m.restarts != 0, sum != 0, "TxnMetrics.Restarts") + } + + t.Run("no-op", func(t *testing.T) { + txn := makeTxnProto() + tm, _, _ := makeMockTxnMetricRecorder(&txn) + tm.closeLocked() + + check(t, &tm, metrics{aborts: 1, rollbacksFailed: 1}) + }) + + t.Run("commit (1pc)", func(t *testing.T) { + txn := makeTxnProto() + tm, mockSender, timeSource := makeMockTxnMetricRecorder(&txn) + + ba := &kvpb.BatchRequest{} + ba.Header = kvpb.Header{Txn: txn.Clone()} + ba.Add(&kvpb.EndTxnRequest{Commit: true}) + + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.EndTxnRequest{}, ba.Requests[0].GetInner()) + + // Simulate delay. + timeSource.Advance(234) + + br := ba.CreateReply() + br.Txn = ba.Txn + br.Txn.Status = roachpb.COMMITTED + br.Responses[0].GetEndTxn().OnePhaseCommit = true + return br, nil + }) + br, pErr := tm.SendLocked(ctx, ba) + require.Nil(t, pErr) + require.NotNil(t, br) + + // Acting as TxnCoordSender. + txn.Update(br.Txn) + tm.closeLocked() + + check(t, &tm, metrics{commits: 1, commits1PC: 1, duration: 234}) + }) + + t.Run("commit (parallel)", func(t *testing.T) { + txn := makeTxnProto() + tm, mockSender, timeSource := makeMockTxnMetricRecorder(&txn) + + ba := &kvpb.BatchRequest{} + ba.Header = kvpb.Header{Txn: txn.Clone()} + ba.Add(&kvpb.EndTxnRequest{Commit: true}) + + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.EndTxnRequest{}, ba.Requests[0].GetInner()) + + // Simulate delay. + timeSource.Advance(234) + + br := ba.CreateReply() + br.Txn = ba.Txn + br.Txn.Status = roachpb.COMMITTED + br.Responses[0].GetEndTxn().StagingTimestamp = br.Txn.WriteTimestamp + return br, nil + }) + br, pErr := tm.SendLocked(ctx, ba) + require.Nil(t, pErr) + require.NotNil(t, br) + + // Acting as TxnCoordSender. + txn.Update(br.Txn) + tm.closeLocked() + + check(t, &tm, metrics{commits: 1, parallelCommits: 1, duration: 234}) + }) + + t.Run("abort", func(t *testing.T) { + txn := makeTxnProto() + tm, mockSender, timeSource := makeMockTxnMetricRecorder(&txn) + + ba := &kvpb.BatchRequest{} + ba.Header = kvpb.Header{Txn: txn.Clone()} + ba.Add(&kvpb.EndTxnRequest{Commit: true}) + + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.EndTxnRequest{}, ba.Requests[0].GetInner()) + + // Simulate delay. + timeSource.Advance(234) + + br := ba.CreateReply() + br.Txn = ba.Txn + br.Txn.Status = roachpb.ABORTED + return br, nil + }) + br, pErr := tm.SendLocked(ctx, ba) + require.Nil(t, pErr) + require.NotNil(t, br) + + // Acting as TxnCoordSender. + txn.Update(br.Txn) + tm.closeLocked() + + check(t, &tm, metrics{aborts: 1, duration: 234}) + }) + + t.Run("restart", func(t *testing.T) { + txn := makeTxnProto() + tm, mockSender, timeSource := makeMockTxnMetricRecorder(&txn) + + ba := &kvpb.BatchRequest{} + ba.Header = kvpb.Header{Txn: txn.Clone()} + ba.Add(&kvpb.EndTxnRequest{Commit: true}) + + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Equal(t, enginepb.TxnEpoch(0), ba.Txn.Epoch) + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.EndTxnRequest{}, ba.Requests[0].GetInner()) + + // Simulate delay. + timeSource.Advance(234) + + wtoErr := &kvpb.WriteTooOldError{ActualTimestamp: txn.WriteTimestamp.Add(0, 10)} + return nil, kvpb.NewErrorWithTxn(wtoErr, ba.Txn) + }) + br, pErr := tm.SendLocked(ctx, ba) + require.Nil(t, br) + require.NotNil(t, pErr) + require.NotNil(t, pErr.GetTxn()) + + // Acting as TxnCoordSender. + txn.Update(pErr.GetTxn()) + txn.Restart(0, 0, hlc.Timestamp{}) + + // Resend the batch at the new epoch. + ba.Header = kvpb.Header{Txn: txn.Clone()} + + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Equal(t, enginepb.TxnEpoch(1), ba.Txn.Epoch) + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.EndTxnRequest{}, ba.Requests[0].GetInner()) + + // Simulate delay. + timeSource.Advance(234) + + br = ba.CreateReply() + br.Txn = ba.Txn + br.Txn.Status = roachpb.COMMITTED + return br, nil + }) + br, pErr = tm.SendLocked(ctx, ba) + require.Nil(t, pErr) + require.NotNil(t, br) + + // Acting as TxnCoordSender. + txn.Update(br.Txn) + tm.closeLocked() + + check(t, &tm, metrics{commits: 1, duration: 468, restarts: 1}) + }) +} diff --git a/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher.go b/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher.go index 5bbba69a2595..99a5cd70b035 100644 --- a/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher.go +++ b/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher.go @@ -44,6 +44,23 @@ var MaxTxnRefreshSpansBytes = settings.RegisterIntSetting( 1<<22, /* 4 MB */ settings.WithPublic) +// KeepRefreshSpansOnSavepointRollback is a boolean flag that, when enabled, +// ensures that all refresh spans accumulated since a savepoint was created are +// kept even after the savepoint is rolled back. This ensures that the reads +// corresponding to the refresh spans are serialized correctly, even though they +// were rolled back. See #111228 for more details. +// When set to true, this setting corresponds to the correct new behavior, +// which also matches the Postgres behavior. We don't expect this new behavior +// to impact customers because they should already be able to handle +// serialization errors; in case any unforeseen customer issues arise, the +// setting here allows us to revert to the old behavior. +// TODO(mira): set the default to true after #113765. +var KeepRefreshSpansOnSavepointRollback = settings.RegisterBoolSetting( + settings.SystemVisible, + "kv.transaction.keep_refresh_spans_on_savepoint_rollback.enabled", + "if enabled, all refresh spans accumulated since a savepoint was created are kept after the savepoint is rolled back", + false) + // txnSpanRefresher is a txnInterceptor that collects the read spans of a // serializable transaction in the event it gets a serializable retry error. It // can then use the set of read spans to avoid retrying the transaction if all @@ -794,6 +811,9 @@ func (sr *txnSpanRefresher) createSavepointLocked(ctx context.Context, s *savepo // TODO(nvanbenschoten): make sure this works correctly with ReadCommitted. // The refresh spans should either be empty when captured into a savepoint or // should be cleared when the savepoint is rolled back to. + // TODO(mira): after we remove + // kv.transaction.keep_refresh_spans_on_savepoint_rollback.enabled, we won't + // need to keep refresh spans in the savepoint anymore. s.refreshSpans = make([]roachpb.Span, len(sr.refreshFootprint.asSlice())) copy(s.refreshSpans, sr.refreshFootprint.asSlice()) s.refreshInvalid = sr.refreshInvalid @@ -801,9 +821,11 @@ func (sr *txnSpanRefresher) createSavepointLocked(ctx context.Context, s *savepo // rollbackToSavepointLocked is part of the txnInterceptor interface. func (sr *txnSpanRefresher) rollbackToSavepointLocked(ctx context.Context, s savepoint) { - sr.refreshFootprint.clear() - sr.refreshFootprint.insert(s.refreshSpans...) - sr.refreshInvalid = s.refreshInvalid + if !KeepRefreshSpansOnSavepointRollback.Get(&sr.st.SV) { + sr.refreshFootprint.clear() + sr.refreshFootprint.insert(s.refreshSpans...) + sr.refreshInvalid = s.refreshInvalid + } } // closeLocked implements the txnInterceptor interface. diff --git a/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher_test.go b/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher_test.go index 849d39d15035..18fd957f9c7a 100644 --- a/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher_test.go +++ b/pkg/kv/kvclient/kvcoord/txn_interceptor_span_refresher_test.go @@ -1436,57 +1436,77 @@ func TestTxnSpanRefresherEpochIncrement(t *testing.T) { func TestTxnSpanRefresherSavepoint(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - ctx := context.Background() - tsr, mockSender := makeMockTxnSpanRefresher() - keyA, keyB := roachpb.Key("a"), roachpb.Key("b") - txn := makeTxnProto() + testutils.RunTrueAndFalse(t, "keep-refresh-spans", func(t *testing.T, keepRefreshSpans bool) { + ctx := context.Background() + tsr, mockSender := makeMockTxnSpanRefresher() - read := func(key roachpb.Key) { - ba := &kvpb.BatchRequest{} - ba.Header = kvpb.Header{Txn: &txn} - getArgs := kvpb.GetRequest{RequestHeader: kvpb.RequestHeader{Key: key}} - ba.Add(&getArgs) - mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { - require.Len(t, ba.Requests, 1) - require.IsType(t, &kvpb.GetRequest{}, ba.Requests[0].GetInner()) - - br := ba.CreateReply() - br.Txn = ba.Txn - return br, nil - }) - br, pErr := tsr.SendLocked(ctx, ba) - require.Nil(t, pErr) - require.NotNil(t, br) - } - read(keyA) - require.Equal(t, []roachpb.Span{{Key: keyA}}, tsr.refreshFootprint.asSlice()) + if keepRefreshSpans { + KeepRefreshSpansOnSavepointRollback.Override(ctx, &tsr.st.SV, true) + } else { + KeepRefreshSpansOnSavepointRollback.Override(ctx, &tsr.st.SV, false) + } - s := savepoint{} - tsr.createSavepointLocked(ctx, &s) + keyA, keyB := roachpb.Key("a"), roachpb.Key("b") + txn := makeTxnProto() - // Another read after the savepoint was created. - read(keyB) - require.Equal(t, []roachpb.Span{{Key: keyA}, {Key: keyB}}, tsr.refreshFootprint.asSlice()) + read := func(key roachpb.Key) { + ba := &kvpb.BatchRequest{} + ba.Header = kvpb.Header{Txn: &txn} + getArgs := kvpb.GetRequest{RequestHeader: kvpb.RequestHeader{Key: key}} + ba.Add(&getArgs) + mockSender.MockSend(func(ba *kvpb.BatchRequest) (*kvpb.BatchResponse, *kvpb.Error) { + require.Len(t, ba.Requests, 1) + require.IsType(t, &kvpb.GetRequest{}, ba.Requests[0].GetInner()) - require.Equal(t, []roachpb.Span{{Key: keyA}}, s.refreshSpans) - require.False(t, s.refreshInvalid) + br := ba.CreateReply() + br.Txn = ba.Txn + return br, nil + }) + br, pErr := tsr.SendLocked(ctx, ba) + require.Nil(t, pErr) + require.NotNil(t, br) + } + read(keyA) + require.Equal(t, []roachpb.Span{{Key: keyA}}, tsr.refreshFootprint.asSlice()) - // Rollback the savepoint and check that refresh spans were overwritten. - tsr.rollbackToSavepointLocked(ctx, s) - require.Equal(t, []roachpb.Span{{Key: keyA}}, tsr.refreshFootprint.asSlice()) + s := savepoint{} + tsr.createSavepointLocked(ctx, &s) - // Check that rolling back to the savepoint resets refreshInvalid. - tsr.refreshInvalid = true - tsr.rollbackToSavepointLocked(ctx, s) - require.False(t, tsr.refreshInvalid) + // Another read after the savepoint was created. + read(keyB) + require.Equal(t, []roachpb.Span{{Key: keyA}, {Key: keyB}}, tsr.refreshFootprint.asSlice()) - // Set refreshInvalid and then create a savepoint. - tsr.refreshInvalid = true - s = savepoint{} - tsr.createSavepointLocked(ctx, &s) - require.True(t, s.refreshInvalid) - // Rollback to the savepoint check that refreshes are still invalid. - tsr.rollbackToSavepointLocked(ctx, s) - require.True(t, tsr.refreshInvalid) + require.Equal(t, []roachpb.Span{{Key: keyA}}, s.refreshSpans) + require.False(t, s.refreshInvalid) + + // Rollback the savepoint. + tsr.rollbackToSavepointLocked(ctx, s) + if keepRefreshSpans { + // Check that refresh spans were kept as such. + require.Equal(t, []roachpb.Span{{Key: keyA}, {Key: keyB}}, tsr.refreshFootprint.asSlice()) + } else { + // Check that refresh spans were overwritten. + require.Equal(t, []roachpb.Span{{Key: keyA}}, tsr.refreshFootprint.asSlice()) + } + + tsr.refreshInvalid = true + tsr.rollbackToSavepointLocked(ctx, s) + if keepRefreshSpans { + // Check that rolling back to the savepoint keeps refreshInvalid as such. + require.True(t, tsr.refreshInvalid) + } else { + // Check that rolling back to the savepoint resets refreshInvalid. + require.False(t, tsr.refreshInvalid) + } + + // Set refreshInvalid and then create a savepoint. + tsr.refreshInvalid = true + s = savepoint{} + tsr.createSavepointLocked(ctx, &s) + require.True(t, s.refreshInvalid) + // Rollback to the savepoint check that refreshes are still invalid. + tsr.rollbackToSavepointLocked(ctx, s) + require.True(t, tsr.refreshInvalid) + }) } diff --git a/pkg/kv/kvpb/errors.go b/pkg/kv/kvpb/errors.go index 412d5a832ee8..2041456d0466 100644 --- a/pkg/kv/kvpb/errors.go +++ b/pkg/kv/kvpb/errors.go @@ -866,7 +866,7 @@ func (e *TransactionRetryError) SafeFormatError(p errors.Printer) (next error) { msg = redact.Sprintf(" - %s", e.ExtraMsg) } if e.ConflictingTxn != nil { - msg = redact.Sprintf(" %s - conflicting txn: meta={%s}", msg, e.ConflictingTxn.String()) + msg = redact.Sprintf("%s - conflicting txn: meta={%s}", msg, e.ConflictingTxn.String()) } p.Printf("TransactionRetryError: retry txn (%s%s)", redact.SafeString(TransactionRetryReason_name[int32(e.Reason)]), msg) return nil diff --git a/pkg/sql/sem/builtins/fixed_oids.go b/pkg/sql/sem/builtins/fixed_oids.go index a9cb1ae51e4e..a416bbec92c8 100644 --- a/pkg/sql/sem/builtins/fixed_oids.go +++ b/pkg/sql/sem/builtins/fixed_oids.go @@ -2509,6 +2509,7 @@ var builtinOidsArray = []string{ 2540: `information_schema._pg_datetime_precision(typid: oid, typmod: int4) -> int`, 2541: `information_schema._pg_interval_type(typid: oid, typmod: int4) -> string`, 2542: `crdb_internal.release_series(version: string) -> string`, + 2543: `crdb_internal.fips_ready() -> bool`, } var builtinOidsBySignature map[string]oid.Oid