Skip to content

Commit

Permalink
Add DHCP IP Retries in PrepareHNSNetwork (#5819)
Browse files Browse the repository at this point in the history
To address the potential race condition issue where acquiring a DHCP IP address may fail after CreateHNSNetwork,
we added a retry mechanism to wait for an available IP. If the DHCP IP cannot be acquired within six seconds,
an error will be logged.

Signed-off-by: Shuyang Xin <[email protected]>
  • Loading branch information
XinShuYang authored Jan 2, 2024
1 parent 48c1646 commit 923b429
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 60 deletions.
21 changes: 21 additions & 0 deletions ci/jenkins/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ IP_MODE=""
K8S_VERSION="1.28.2-00"
WINDOWS_YAML_SUFFIX="windows"
WIN_IMAGE_NODE=""
echo "" > WIN_DHCP
GOLANG_RELEASE_DIR=${WORKDIR}/golang-releases

WINDOWS_CONFORMANCE_FOCUS="\[sig-network\].+\[Conformance\]|\[sig-windows\]"
Expand Down Expand Up @@ -256,6 +257,19 @@ function collect_windows_network_info_and_logs {
tar zcf debug_logs.tar.gz "${DEBUG_LOG_PATH}"
}

function check_dhcp_status {
echo "===== Check Interface DHCP Status ====="
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_BRINT_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias br-int -AddressFamily IPv4).Dhcp"')
WIN_DHCP=$(<WIN_DHCP)
if [[ ${WIN_DHCP} == ${WIN_BRINT_DHCP} ]]; then
echo "Newly created uplink DHCP status is consistent with the original adapter."
else
echo "Newly created uplink DHCP status is different from the original adapter. Original DHCP: $WIN_DHCP, br-int DHCP: $WIN_BRINT_DHCP"
exit 1
fi
}

function wait_for_antrea_windows_pods_ready {
kubectl apply -f "${WORKDIR}/antrea.yml"
if [[ "${PROXY_ALL}" == false && ${TESTCASE} =~ "windows-e2e" ]]; then
Expand All @@ -277,6 +291,7 @@ function wait_for_antrea_windows_pods_ready {
done
sleep 10
done
check_dhcp_status
}

function wait_for_antrea_windows_processes_ready {
Expand All @@ -295,6 +310,7 @@ function wait_for_antrea_windows_processes_ready {
done
sleep 10
done
check_dhcp_status
}

function clean_ns {
Expand Down Expand Up @@ -652,6 +668,11 @@ function deliver_antrea_windows_containerd {
ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "gzip -d antrea-windows.tar.gz && ctr -n k8s.io images import antrea-windows.tar"
fi
done
# Add Windows interface DHCP check using CI script to obtain the original interface status.
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias \"Ethernet0 2\" -AddressFamily IPv4).Dhcp"')
echo "Original adapter DHCP status: $WIN_DHCP"
echo $WIN_DHCP > WIN_DHCP
rm -f antrea-windows.tar
echo "==== Finish building and delivering Windows containerd images ===="
}
Expand Down
41 changes: 38 additions & 3 deletions pkg/agent/util/net_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ const (
RT_FILTER_METRIC
RT_FILTER_DST
RT_FILTER_GW

// IP_ADAPTER_DHCP_ENABLED is defined in the Win32 API document.
// https://learn.microsoft.com/en-us/windows/win32/api/iptypes/ns-iptypes-ip_adapter_addresses_lh
IP_ADAPTER_DHCP_ENABLED = 0x00000004
)

var (
Expand Down Expand Up @@ -460,10 +464,29 @@ func PrepareHNSNetwork(subnetCIDR *net.IPNet, nodeIPNet *net.IPNet, uplinkAdapte
hnsNetworkDelete(hnsNet)
}
}()

adapter, ipFound, err := adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
var adapter *net.Interface
var ipFound bool
// On the current Windows testbed, it takes a maximum of 1.8 seconds to obtain a valid IP.
// Therefore, we set the timeout limit to triple of that value, allowing a maximum wait of 6 seconds here.
err = wait.PollImmediate(1*time.Second, 6*time.Second, func() (bool, error) {
var checkErr error
adapter, ipFound, checkErr = adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
if checkErr != nil {
return false, checkErr
}
return ipFound, nil
})
if err != nil {
return err
if err == wait.ErrWaitTimeout {
dhcpStatus, err := InterfaceIPv4DhcpEnabled(uplinkAdapter.Name)
if err != nil {
klog.ErrorS(err, "Failed to get IPv4 DHCP status on the network adapter", "adapter", uplinkAdapter.Name)
} else {
klog.ErrorS(err, "Timeout acquiring IP for the adapter", "dhcpStatus", dhcpStatus)
}
} else {
return err
}
}
vNicName, index := adapter.Name, adapter.Index
// By default, "ipFound" should be true after Windows creates the HNSNetwork. The following check is for some corner
Expand Down Expand Up @@ -647,6 +670,16 @@ func HostInterfaceExists(ifaceName string) bool {
return true
}

// InterfaceIPv4DhcpEnabled returns the IPv4 DHCP status on the specified interface.
func InterfaceIPv4DhcpEnabled(ifaceName string) (bool, error) {
adapter, err := getAdapterInAllCompartmentsByName(ifaceName)
if err != nil {
return false, err
}
ipv4Dhcp := (adapter.flags&IP_ADAPTER_DHCP_ENABLED != 0)
return ipv4Dhcp, nil
}

// SetInterfaceMTU configures interface MTU on host for Pods. MTU change cannot be realized with HNSEndpoint because
// there's no MTU field in HNSEndpoint:
// https://github.com/Microsoft/hcsshim/blob/4a468a6f7ae547974bc32911395c51fb1862b7df/internal/hns/hnsendpoint.go#L12
Expand Down Expand Up @@ -1085,6 +1118,7 @@ type updateIPInterfaceFunc func(entry *antreasyscall.MibIPInterfaceRow) *antreas
type adapter struct {
net.Interface
compartmentID uint32
flags uint32
}

func (a *adapter) setMTU(mtu int, family uint16) error {
Expand Down Expand Up @@ -1250,6 +1284,7 @@ func getAdaptersByName(name string) ([]adapter, error) {
adapter := adapter{
Interface: ifi,
compartmentID: aa.CompartmentId,
flags: aa.Flags,
}
adapters = append(adapters, adapter)
}
Expand Down
129 changes: 72 additions & 57 deletions pkg/agent/util/net_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
setVMCmd := fmt.Sprintf("Set-VMSwitch -ComputerName $(hostname) -Name %s -EnableSoftwareRsc $True", LocalHNSNetwork)
tests := []struct {
name string
testAdapterAddresses *windows.IpAdapterAddresses
nodeIPNet *net.IPNet
dnsServers string
newName string
Expand All @@ -320,89 +321,99 @@ func TestPrepareHNSNetwork(t *testing.T) {
wantErr error
}{
{
name: "Prepare Success",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
name: "Prepare Success",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
wantCmds: []string{getAdapterCmd, renameAdapterCmd, renameNetCmd,
getVMCmd, setVMCmd},
},
{
name: "Create Error",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
name: "Create Error",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
},
{
name: "adapter Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
name: "adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Rename Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
name: "Rename Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
},
{
name: "Enable HNS Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkRequestError: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Enable RSC Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
name: "Enable RSC Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
name: "IP Not Found",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
},
{
name: "IP Not Found Configure Default Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
name: "IP Not Found Configure Default Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found Set adapter Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
name: "IP Not Found Set adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
},
{
name: "IP Not Found New Net Route Err",
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
name: "IP Not Found New Net Route Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
},
}

Expand All @@ -415,6 +426,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
defer mockHNSNetworkCreate(tc.hnsNetworkCreateErr)()
defer mockHNSNetworkDelete(nil)()
defer mockAntreaNetIO(&antreasyscalltest.MockNetIO{CreateIPForwardEntryErr: tc.createRowErr})()
defer mockGetAdaptersAddresses(tc.testAdapterAddresses, nil)()
gotErr := PrepareHNSNetwork(testSubnetCIDR, tc.nodeIPNet, testUplinkAdapter, "testGateway", tc.dnsServers, testRoutes, tc.newName)
assert.Equal(t, tc.wantErr, gotErr)
})
Expand Down Expand Up @@ -1076,6 +1088,7 @@ func TestGetAdapterInAllCompartmentsByName(t *testing.T) {
HardwareAddr: testMACAddr,
},
compartmentID: 1,
flags: IP_ADAPTER_DHCP_ENABLED,
}
tests := []struct {
name string
Expand Down Expand Up @@ -1139,6 +1152,7 @@ func createTestAdapterAddresses(name string) *windows.IpAdapterAddresses {
PhysicalAddressLength: 6,
PhysicalAddress: testPhysicalAddress,
CompartmentId: 1,
Flags: IP_ADAPTER_DHCP_ENABLED,
}
}

Expand Down Expand Up @@ -1171,6 +1185,7 @@ func mockGetAdaptersAddresses(testAdaptersAddresses *windows.IpAdapterAddresses,
adapterAddresses.PhysicalAddressLength = testAdaptersAddresses.PhysicalAddressLength
adapterAddresses.PhysicalAddress = testAdaptersAddresses.PhysicalAddress
adapterAddresses.CompartmentId = testAdaptersAddresses.CompartmentId
adapterAddresses.Flags = testAdaptersAddresses.Flags
}
return err
}
Expand Down

0 comments on commit 923b429

Please sign in to comment.