From ef434fae72287a9cd0f7b85ad1701b998cce800b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:40 +0300 Subject: [PATCH 01/12] bpf: Unmask upper DSCP bits in bpf_fib_lookup() helper The helper performs a FIB lookup according to the parameters in the 'params' argument, one of which is 'tos'. According to the test in test_tc_neigh_fib.c, it seems that BPF programs are expected to initialize the 'tos' field to the full 8 bit DS field from the IPv4 header. Unmask the upper DSCP bits before invoking the IPv4 FIB lookup APIs so that in the future the lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-2-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/filter.c b/net/core/filter.c index f3c72cf860997..89f56fac48fbb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -84,6 +84,7 @@ #include #include #include +#include #include "dev.h" @@ -5899,7 +5900,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.flowi4_iif = params->ifindex; fl4.flowi4_oif = 0; } - fl4.flowi4_tos = params->tos & IPTOS_RT_MASK; + fl4.flowi4_tos = params->tos & INET_DSCP_MASK; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; From bc52a4eecefdd686065f8f68ba9e1c83d4ce6f14 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:41 +0300 Subject: [PATCH 02/12] ipv4: Unmask upper DSCP bits in NETLINK_FIB_LOOKUP family The NETLINK_FIB_LOOKUP netlink family can be used to perform a FIB lookup according to user provided parameters and communicate the result back to user space. Unmask the upper DSCP bits of the user-provided DS field before invoking the IPv4 FIB lookup API so that in the future the lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index da540ddb7af65..8b740f575da1a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1343,7 +1343,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn) struct flowi4 fl4 = { .flowi4_mark = frn->fl_mark, .daddr = frn->fl_addr, - .flowi4_tos = frn->fl_tos & IPTOS_RT_MASK, + .flowi4_tos = frn->fl_tos & INET_DSCP_MASK, .flowi4_scope = frn->fl_scope, }; struct fib_table *tb; From be2e9089cb34dfc958d8383a755bde3681a817b2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:42 +0300 Subject: [PATCH 03/12] ipv4: Unmask upper DSCP bits when constructing the Record Route option The Record Route IP option records the addresses of the routers that routed the packet. In the case of forwarded packets, the kernel performs a route lookup via fib_lookup() and fills in the preferred source address of the matched route. Unmask the upper DSCP bits when performing the lookup so that in the future the lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-4-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 13c0f1d455f33..9b6528b7b5629 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1263,7 +1263,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = iph->tos & IPTOS_RT_MASK, + .flowi4_tos = iph->tos & INET_DSCP_MASK, .flowi4_oif = rt->dst.dev->ifindex, .flowi4_iif = skb->dev->ifindex, .flowi4_mark = skb->mark, From c1ae5ca69b691a7403e85047382fc4fd6a69ee9f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:43 +0300 Subject: [PATCH 04/12] netfilter: rpfilter: Unmask upper DSCP bits The rpfilter match performs a reverse path filter test on a packet by performing a FIB lookup with the source and destination addresses swapped. Unmask the upper DSCP bits of the DS field of the tested packet so that in the future the FIB lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-5-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/netfilter/ipt_rpfilter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index ded5bef02f771..1ce7a1655b974 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -75,7 +76,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; - flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; + flow.flowi4_tos = iph->tos & INET_DSCP_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); flow.flowi4_uid = sock_net_uid(xt_net(par), NULL); From 338385e059c5d299556fa341d10601ae72c6e932 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:44 +0300 Subject: [PATCH 05/12] netfilter: nft_fib: Unmask upper DSCP bits In a similar fashion to the iptables rpfilter match, unmask the upper DSCP bits of the DS field of the currently tested packet so that in the future the FIB lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-6-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/netfilter/nft_fib_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index df94bc28c3d79..00da1332bbf1a 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -108,7 +109,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, if (priv->flags & NFTA_FIB_F_MARK) fl4.flowi4_mark = pkt->skb->mark; - fl4.flowi4_tos = iph->tos & IPTOS_RT_MASK; + fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; if (priv->flags & NFTA_FIB_F_DADDR) { fl4.daddr = iph->daddr; From 2bc9778b6696337f1e0b38f07522319296b39d83 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:45 +0300 Subject: [PATCH 06/12] ipv4: ipmr: Unmask upper DSCP bits in ipmr_rt_fib_lookup() Unmask the upper DSCP bits when calling ipmr_fib_lookup() so that in the future it could perform the FIB lookup according to the full DSCP value. Note that ipmr_fib_lookup() performs a FIB rule lookup (returning the relevant routing table) and that IPv4 multicast FIB rules do not support matching on TOS / DSCP. However, it is still worth unmasking the upper DSCP bits in case support for DSCP matching is ever added. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-7-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/ipmr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6c750bd13dd8d..d5295b69bc0af 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -62,6 +62,7 @@ #include #include #include +#include #include @@ -2080,7 +2081,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, - .flowi4_tos = RT_TOS(iph->tos), + .flowi4_tos = iph->tos & INET_DSCP_MASK, .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? From 39d3628f7ceabd22385dcd6811ed4c353f7c859b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:46 +0300 Subject: [PATCH 07/12] ipv4: Unmask upper DSCP bits in fib_compute_spec_dst() As explained in commit 35ebf65e851c ("ipv4: Create and use fib_compute_spec_dst() helper."), the function is used - for example - to determine the source address for an ICMP reply. If we are responding to a multicast or broadcast packet, the source address is set to the source address that we would use if we were to send a packet to the unicast source of the original packet. This address is determined by performing a FIB lookup and using the preferred source address of the resulting route. Unmask the upper DSCP bits of the DS field of the packet that triggered the reply so that in the future the FIB lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-8-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 8b740f575da1a..793e6781399a4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -293,7 +293,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), .daddr = ip_hdr(skb)->saddr, - .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, + .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK, .flowi4_scope = scope, .flowi4_mark = vmark ? skb->mark : 0, }; From df9131c7fafdc0d4c85686f5c7711a4dd4f3ae60 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:47 +0300 Subject: [PATCH 08/12] ipv4: Unmask upper DSCP bits in input route lookup Unmask the upper DSCP bits in input route lookup so that in the future the lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-9-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9b6528b7b5629..73bb61162445f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2470,7 +2470,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, struct fib_result res; int err; - tos &= IPTOS_RT_MASK; + tos &= INET_DSCP_MASK; rcu_read_lock(); err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); rcu_read_unlock(); From b1251a6f1a9b475725a097e75196ee105076cbd1 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:48 +0300 Subject: [PATCH 09/12] ipv4: Unmask upper DSCP bits in RTM_GETROUTE input route lookup Unmask the upper DSCP bits when looking up an input route via the RTM_GETROUTE netlink message so that in the future the lookup could be performed according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-10-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 73bb61162445f..524b70ab77a0e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3286,7 +3286,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, skb->dev = dev; skb->mark = mark; err = ip_route_input_rcu(skb, dst, src, - rtm->rtm_tos & IPTOS_RT_MASK, dev, + rtm->rtm_tos & INET_DSCP_MASK, dev, &res); rt = skb_rtable(skb); From 1c6f50b37f711b831d78973dad0df1da99ad0014 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:49 +0300 Subject: [PATCH 10/12] ipv4: icmp: Pass full DS field to ip_route_input() Align the ICMP code to other callers of ip_route_input() and pass the full DS field. In the future this will allow us to perform a route lookup according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-11-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index ab6d0d98dbc34..b8f56d03fcbb6 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -545,7 +545,7 @@ static struct rtable *icmp_route_lookup(struct net *net, orefdst = skb_in->_skb_refdst; /* save old refdst */ skb_dst_set(skb_in, NULL); err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, - RT_TOS(tos), rt2->dst.dev); + tos, rt2->dst.dev); dst_release(&rt2->dst); rt2 = skb_rtable(skb_in); From b6791ac5ea49aec7f9ef123ebc728fa6a5f9090a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:50 +0300 Subject: [PATCH 11/12] ipv4: udp: Unmask upper DSCP bits during early demux Unmask the upper DSCP bits when performing source validation for multicast packets during early demux. In the future, this will allow us to perform the FIB lookup which is performed as part of source validation according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-12-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ddb86baaea6c8..8accbf4cb2956 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -115,6 +115,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -2618,7 +2619,7 @@ int udp_v4_early_demux(struct sk_buff *skb) if (!inet_sk(sk)->inet_daddr && in_dev) return ip_mc_validate_source(skb, iph->daddr, iph->saddr, - iph->tos & IPTOS_RT_MASK, + iph->tos & INET_DSCP_MASK, skb->dev, in_dev, &itag); } return 0; From be8b8ded77996cfcd1bb2f1d97b91106d5882877 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 21 Aug 2024 15:52:51 +0300 Subject: [PATCH 12/12] ipv4: Unmask upper DSCP bits when using hints Unmask the upper DSCP bits when performing source validation and routing a packet using the same route from a previously processed packet (hint). In the future, this will allow us to perform the FIB lookup that is performed as part of source validation according to the full DSCP value. No functional changes intended since the upper DSCP bits are masked when comparing against the TOS selectors in FIB rules and routes. Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Acked-by: Florian Westphal Reviewed-by: David Ahern Link: https://patch.msgid.link/20240821125251.1571445-13-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 524b70ab77a0e..f6972b24664a0 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2160,7 +2160,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; - tos &= IPTOS_RT_MASK; + tos &= INET_DSCP_MASK; err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); if (err < 0) goto martian_source;