-
Notifications
You must be signed in to change notification settings - Fork 1
/
rfc2988bis.patch
286 lines (259 loc) · 10.4 KB
/
rfc2988bis.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
From e9aec3f857c215bc7393d6bd598e24d7f6a757e1 Mon Sep 17 00:00:00 2001
From: Ali Abbas <[email protected]>
Date: Mon, 20 Feb 2012 11:55:40 +0100
Subject: [PATCH] rfc2988bis
---
include/linux/tcp.h | 1 +
include/net/tcp.h | 11 +++++++-
net/ipv4/syncookies.c | 1 +
net/ipv4/tcp_input.c | 57 ++++++++++++++++++++++-----------------------
net/ipv4/tcp_ipv4.c | 12 ++++++---
net/ipv4/tcp_minisocks.c | 7 +++++-
net/ipv6/syncookies.c | 1 +
net/ipv6/tcp_ipv6.c | 6 ++++-
8 files changed, 59 insertions(+), 37 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e574001..c61ef82 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,6 +239,7 @@ struct tcp_request_sock {
#endif
u32 rcv_isn;
u32 snt_isn;
+ u32 snt_synack; /* synack sent time */
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7fddec3..f8477e3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -130,7 +130,13 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#endif
#define TCP_RTO_MAX ((unsigned)(120*HZ))
#define TCP_RTO_MIN ((unsigned)(HZ/5))
-#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
+#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC2988bis initial RTO value */
+#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
+ * used as a fallback RTO for the
+ * initial data transmission if no
+ * valid RTT sample has been acquired,
+ * most likely due to retrans in 3WHS.
+ */
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
* for local resources.
@@ -287,7 +293,7 @@ static inline void tcp_synq_overflow(struct sock *sk)
static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
{
unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT);
+ return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
}
extern struct proto tcp_prot;
@@ -532,6 +538,7 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
extern void tcp_mtup_init(struct sock *sk);
+extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
static inline void tcp_bound_rto(const struct sock *sk)
{
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..ed901c4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -304,6 +304,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e6bec56..2f2a81d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -868,17 +868,18 @@ static void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
tp->snd_ssthresh = tp->snd_cwnd_clamp;
+ } else {
+ /* ssthresh may have been reduced unnecessarily during.
+ * 3WHS. Restore it back to its initial default.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
if (dst_metric(dst, RTAX_REORDERING) &&
tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
tcp_disable_fack(tp);
tp->reordering = dst_metric(dst, RTAX_REORDERING);
}
-
- if (dst_metric(dst, RTAX_RTT) == 0)
- goto reset;
-
- if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
goto reset;
/* Initial rtt is determined from SYN,SYN-ACK.
@@ -904,25 +905,28 @@ static void tcp_init_metrics(struct sock *sk)
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
- if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
- goto reset;
-
-cwnd:
- tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_time_stamp;
- return;
reset:
- /* Play conservative. If timestamps are not
- * supported, TCP will fail to recalculate correct
- * rtt, if initial rto is too small. FORGET ALL AND RESET!
- */
- if (!tp->rx_opt.saw_tstamp && tp->srtt) {
- tp->srtt = 0;
- tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
- inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
- }
- goto cwnd;
+ if (tp->srtt == 0) {
+ /* RFC2988bis: We've failed to get a valid RTT sample from
+ * 3WHS. This is most likely due to retransmission,
+ * including spurious one. Reset the RTO back to 3secs
+ * from the more aggressive 1sec to avoid more spurious
+ * retransmission.
+ */
+ tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+ inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
+ }
+ /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
+ * retransmitted. In light of RFC2988bis' more aggressive 1sec
+ * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
+ * retransmission has occurred.
+ */
+ if (tp->total_retrans > 1)
+ tp->snd_cwnd = 1;
+ else
+ tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -3064,12 +3068,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
tcp_xmit_retransmit_queue(sk);
}
-static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
{
tcp_rtt_estimator(sk, seq_rtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
}
+EXPORT_SYMBOL(tcp_valid_rtt_meas);
/* Read draft-ietf-tcplw-high-performance before mucking
* with this code. (Supersedes RFC1323)
@@ -5696,12 +5701,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- /* tcp_ack considers this ACK as duplicate
- * and does not calculate rtt.
- * Force it here.
- */
- tcp_ack_update_rtt(sk, 0, 0);
-
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 363136b..0f25256 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -407,8 +407,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
- icsk->icsk_backoff;
+ inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+ TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
tcp_bound_rto(sk);
skb = tcp_write_queue_head(sk);
@@ -1333,6 +1333,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
goto drop_and_free;
@@ -1401,6 +1402,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
+ if (tcp_rsk(req)->snt_synack)
+ tcp_valid_rtt_meas(newsk,
+ tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ newtp->total_retrans = req->retrans;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1812,8 +1817,7 @@ static int tcp_v4_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = 2;
-
+ tp->snd_cwnd = TCP_INIT_CWND;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ab7ac46..90da62a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -417,7 +417,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- newtp->snd_cwnd = 2;
+ newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
@@ -647,6 +647,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
inet_rsk(req)->acked = 1;
return NULL;
}
+ if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
+ tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
+ else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
+ tcp_rsk(req)->snt_synack = 0;
+
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..9333737 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -230,6 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8babc93..482dd69 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1239,7 +1239,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
}
tcp_rsk(req)->snt_isn = isn;
-
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
security_inet_conn_request(sk, skb, req);
if (tcp_v6_send_synack(sk, req))
@@ -1431,6 +1431,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
+ if (tcp_rsk(req)->snt_synack)
+ tcp_valid_rtt_meas(newsk,
+ tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ newtp->total_retrans = req->retrans;
newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
--
1.7.5.4