Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fqdnCacheMinTTL configuration parameter #6808

Merged
merged 18 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions build/charts/antrea/conf/antrea-agent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,12 @@ kubeAPIServerOverride: {{ .Values.kubeAPIServerOverride | quote }}
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: {{ .Values.dnsServerOverride | quote }}

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: {{ .Values.fqdnCacheMinTTL }}

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down
5 changes: 5 additions & 0 deletions build/charts/antrea/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,11 @@ kubeAPIServerOverride: ""
# -- Address of DNS server, to override the kube-dns Service. It's used to
# resolve hostnames in a FQDN policy.
dnsServerOverride: ""
# -- fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0
# -- IPv4 CIDR range used for Services. Required when AntreaProxy is disabled.
serviceCIDR: ""
# -- IPv6 CIDR range used for Services. Required when AntreaProxy is disabled.
Expand Down
10 changes: 8 additions & 2 deletions build/yamls/antrea-aks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,12 @@ data:
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: ""

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down Expand Up @@ -5394,7 +5400,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5dd823245aab41ce7ca74d05693aa96e1537615f6966b6b78879cde5d3a0b215
checksum/config: f09a1678d56ee4b710a2be5e76c76caa97d2014197f8ab3238d92e164a351809
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5632,7 +5638,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5dd823245aab41ce7ca74d05693aa96e1537615f6966b6b78879cde5d3a0b215
checksum/config: f09a1678d56ee4b710a2be5e76c76caa97d2014197f8ab3238d92e164a351809
labels:
app: antrea
component: antrea-controller
Expand Down
10 changes: 8 additions & 2 deletions build/yamls/antrea-eks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,12 @@ data:
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: ""

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down Expand Up @@ -5394,7 +5400,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5dd823245aab41ce7ca74d05693aa96e1537615f6966b6b78879cde5d3a0b215
checksum/config: f09a1678d56ee4b710a2be5e76c76caa97d2014197f8ab3238d92e164a351809
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5633,7 +5639,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 5dd823245aab41ce7ca74d05693aa96e1537615f6966b6b78879cde5d3a0b215
checksum/config: f09a1678d56ee4b710a2be5e76c76caa97d2014197f8ab3238d92e164a351809
labels:
app: antrea
component: antrea-controller
Expand Down
10 changes: 8 additions & 2 deletions build/yamls/antrea-gke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,12 @@ data:
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: ""

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down Expand Up @@ -5394,7 +5400,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: e9ea48fa57cb11513f69a1fb2b44dd3c6cb96aa739598c1db5091ea91f097f4b
checksum/config: 20bce26ad357c27b2990ebe7034cf6da9f84116c8648162d183cdfc5a1b4f4c5
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5630,7 +5636,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: e9ea48fa57cb11513f69a1fb2b44dd3c6cb96aa739598c1db5091ea91f097f4b
checksum/config: 20bce26ad357c27b2990ebe7034cf6da9f84116c8648162d183cdfc5a1b4f4c5
labels:
app: antrea
component: antrea-controller
Expand Down
10 changes: 8 additions & 2 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4248,6 +4248,12 @@ data:
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: ""

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down Expand Up @@ -5407,7 +5413,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 38e19ea8db3838e3f5cff4aaa2684db1586fb457d095ac3ea49e8bf405a04e41
checksum/config: d58ebcd2fbe31dea9bad21582401147bb4dd0ca1e4bc41c3c5ea4c50650b31e3
checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4
labels:
app: antrea
Expand Down Expand Up @@ -5689,7 +5695,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 38e19ea8db3838e3f5cff4aaa2684db1586fb457d095ac3ea49e8bf405a04e41
checksum/config: d58ebcd2fbe31dea9bad21582401147bb4dd0ca1e4bc41c3c5ea4c50650b31e3
labels:
app: antrea
component: antrea-controller
Expand Down
10 changes: 8 additions & 2 deletions build/yamls/antrea.yml
antoninbas marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4235,6 +4235,12 @@ data:
# 10.96.0.10:53, [fd00:10:96::a]:53).
dnsServerOverride: ""

# fqdnCacheMinTTL helps address the issue of applications caching DNS response IPs beyond the TTL value
# for the DNS record. It is used to enforce FQDN policy rules, ensuring that resolved IPs are included
# in datapath rules for as long as the application caches them. Ideally, this value should be set to
# the maximum caching duration across all applications.
fqdnCacheMinTTL: 0

# Comma-separated list of Cipher Suites. If omitted, the default Go Cipher Suites will be used.
# https://golang.org/pkg/crypto/tls/#pkg-constants
# Note that TLS1.3 Cipher Suites cannot be added to the list. But the apiserver will always
Expand Down Expand Up @@ -5394,7 +5400,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 59fb1ea496577015058d4e99e4f64136aa68d5340db13c00ced565da750a22fc
checksum/config: 67235f0d83b25919f71704adf4064e95347bd8ebbece8fc78d3403294fd26062
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -5630,7 +5636,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 59fb1ea496577015058d4e99e4f64136aa68d5340db13c00ced565da750a22fc
checksum/config: 67235f0d83b25919f71704adf4064e95347bd8ebbece8fc78d3403294fd26062
labels:
app: antrea
component: antrea-controller
Expand Down
1 change: 1 addition & 0 deletions cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,7 @@ func run(o *Options) error {
nodeConfig,
podNetworkWait,
l7Reconciler,
uint32(o.config.FQDNCacheMinTTL),
)
if err != nil {
return fmt.Errorf("error creating new NetworkPolicy controller: %v", err)
Expand Down
4 changes: 4 additions & 0 deletions cmd/antrea-agent/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ func (o *Options) validate(args []string) error {
return fmt.Errorf("nodeType %s requires feature gate ExternalNode to be enabled", o.config.NodeType)
}

if o.config.FQDNCacheMinTTL < 0 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: not necessarily need to be part of this PR, but I just want to note @salv-orlando's comment in the community meeting: we should consider setting a sensible max value for this field, so that Antrea will not just cache each FQDN ip forever

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking on it (i am inexperienced in a real world scenario for this though) and thought that IPs mapped to FQDN's wont probably update/change at a rate that it might make us ending up with too many datapath rules for each IP ? and maybe can this be left to the cluster admin to determine and set it ? What if the admin wishes to place a large value , for his own use case, which surpasses the 'max' that we set ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know about a max value for FQDNCacheMinTTL, but we should have a upper bound on the size of the cache (which would also be a upper bound on the number of datapath rules). This is independent of this feature IMO, because I could also have a DNS server returning records with a very high TTL, and it seems that the issue would be the same.

return fmt.Errorf("fqdnCacheMinTTL must be greater than or equal to 0")
}

if o.config.NodeType == config.ExternalNode.String() {
o.nodeType = config.ExternalNode
return o.validateExternalNodeOptions()
Expand Down
8 changes: 5 additions & 3 deletions pkg/agent/controller/networkpolicy/fqdn.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ type fqdnController struct {
ofClient openflow.Client
// dnsServerAddr stores the coreDNS server address, or the user provided DNS server address.
dnsServerAddr string
minTTL uint32

// dirtyRuleHandler is a callback that is run upon finding a rule out-of-sync.
dirtyRuleHandler func(string)
Expand Down Expand Up @@ -160,7 +161,7 @@ type fqdnController struct {
clock clock.Clock
}

func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServerOverride string, dirtyRuleHandler func(string), v4Enabled, v6Enabled bool, gwPort uint32, clock clock.WithTicker) (*fqdnController, error) {
func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServerOverride string, dirtyRuleHandler func(string), v4Enabled, v6Enabled bool, gwPort uint32, clock clock.WithTicker, fqdnCacheMinTTL uint32) (*fqdnController, error) {
controller := &fqdnController{
ofClient: client,
dirtyRuleHandler: dirtyRuleHandler,
Expand All @@ -182,6 +183,7 @@ func newFQDNController(client openflow.Client, allocator *idAllocator, dnsServer
ipv6Enabled: v6Enabled,
gwPort: gwPort,
clock: clock,
minTTL: fqdnCacheMinTTL,
}
if controller.ofClient != nil {
if err := controller.ofClient.NewDNSPacketInConjunction(dnsInterceptRuleID); err != nil {
Expand Down Expand Up @@ -643,15 +645,15 @@ func (f *fqdnController) parseDNSResponse(msg *dns.Msg) (string, map[string]ipWi
if f.ipv4Enabled {
responseIPs[r.A.String()] = ipWithExpiration{
ip: r.A,
expirationTime: currentTime.Add(time.Duration(r.Header().Ttl) * time.Second),
expirationTime: currentTime.Add(time.Duration(max(f.minTTL, r.Header().Ttl)) * time.Second),
}

}
case *dns.AAAA:
if f.ipv6Enabled {
responseIPs[r.AAAA.String()] = ipWithExpiration{
ip: r.AAAA,
expirationTime: currentTime.Add(time.Duration(r.Header().Ttl) * time.Second),
expirationTime: currentTime.Add(time.Duration(max(f.minTTL, r.Header().Ttl)) * time.Second),
}
}
}
Expand Down
Loading
Loading