All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
@ 2015-07-28 23:02 Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

This patch set implements a common function to simply set sk_txhash to
a random number instead of going through the trouble to call flow
dissector. From dst_negative_advice we now reset the sk_txhash in hopes
of finding a better ECMP path through the network. Changing sk_txhash
affects:
  - IPv6 flow label and UDP source port which affect ECMP in the network
  - Local EMCP route selection (pending changes to use sk_txhash)

Tom Herbert (2):
  net: Set sk_txhash from a random number
  net: Recompute sk_txhash on negative routing advice

 include/net/ip.h    | 16 ----------------
 include/net/ipv6.h  | 19 -------------------
 include/net/sock.h  | 16 ++++++++++++++++
 net/ipv4/datagram.c |  2 +-
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv6/datagram.c |  2 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 7 files changed, 22 insertions(+), 41 deletions(-)

-- 
1.8.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
  2015-07-29  9:13   ` Thomas Graf
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
  2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
  2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
  2 siblings, 2 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

This patch creates sk_set_txhash and eliminates protocol specific
inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
random number instead of performing flow dissection. sk_set_txash
is also allowed to be called multiple times for the same socket,
we'll need this when redoing the hash for negative routing advice.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/net/ip.h    | 16 ----------------
 include/net/ipv6.h  | 19 -------------------
 include/net/sock.h  |  8 ++++++++
 net/ipv4/datagram.c |  2 +-
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv6/datagram.c |  2 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 7 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index d5fe9f2..bee5f35 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
 	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 }
 
-static inline void inet_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = inet->inet_saddr;
-	keys.addrs.v4addrs.dst = inet->inet_daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
 {
 	const struct iphdr *iph = skb_gro_network_header(skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82dbdb0..7c79798 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline void ip6_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &np->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel)
 {
diff --git a/include/net/sock.h b/include/net/sock.h
index 4353ef7..fe735c4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
+static inline void sk_set_txhash(struct sock *sk)
+{
+	sk->sk_txhash = prandom_u32();
+
+	if (unlikely(!sk->sk_txhash))
+		sk->sk_txhash = 1;
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 574fad9..f915abf 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
-	inet_set_txhash(sk);
+	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
 
 	sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 486ba96..d27eb54 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		goto failure;
 
-	inet_set_txhash(sk);
+	sk_set_txhash(sk);
 
 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
@@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	inet_set_txhash(newsk);
+	sk_set_txhash(newsk);
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 	newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2572a32..9aadd57 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,7 +199,7 @@ ipv4_connected:
 		      NULL);
 
 	sk->sk_state = TCP_ESTABLISHED;
-	ip6_set_txhash(sk);
+	sk_set_txhash(sk);
 out:
 	fl6_sock_release(flowlabel);
 	return err;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d540846..52dd0d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (err)
 		goto late_failure;
 
-	ip6_set_txhash(sk);
+	sk_set_txhash(sk);
 
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
 	newsk->sk_bound_dev_if = ireq->ir_iif;
 
-	ip6_set_txhash(newsk);
+	sk_set_txhash(newsk);
 
 	/* Now IPv6 options...
 
-- 
1.8.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
  2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
  2 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

When a connection is failing a transport protocol calls
dst_negative_advice to try to get a better route. This patch includes
changing the sk_txhash in that function. This provides a rudimentary
method to try to find a different path in the network since sk_txhash
affects ECMP on the local host and through the network (via flow labels
or UDP source port in encapsulation).

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/net/sock.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index fe735c4..24aa75c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1695,6 +1695,12 @@ static inline void sk_set_txhash(struct sock *sk)
 		sk->sk_txhash = 1;
 }
 
+static inline void sk_rethink_txhash(struct sock *sk)
+{
+	if (sk->sk_txhash)
+		sk_set_txhash(sk);
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
@@ -1719,6 +1725,8 @@ static inline void dst_negative_advice(struct sock *sk)
 {
 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
 
+	sk_rethink_txhash(sk);
+
 	if (dst && dst->ops->negative_advice) {
 		ndst = dst->ops->negative_advice(dst);
 
-- 
1.8.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-29  9:13   ` Thomas Graf
  2015-07-29  9:29     ` Eric Dumazet
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
  1 sibling, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-07-29  9:13 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, kernel-team

On 07/28/15 at 04:02pm, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
> 
> Signed-off-by: Tom Herbert <tom@herbertland.com>

Doesn't this break TX hashing with SO_REUSEPORT?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:13   ` Thomas Graf
@ 2015-07-29  9:29     ` Eric Dumazet
  2015-07-29  9:54       ` Thomas Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29  9:29 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team

On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> > 
> > Signed-off-by: Tom Herbert <tom@herbertland.com>
> 
> Doesn't this break TX hashing with SO_REUSEPORT?


AFAIK nothing uses sk_txhash yet.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:29     ` Eric Dumazet
@ 2015-07-29  9:54       ` Thomas Graf
  2015-07-29 10:06         ` Eric Dumazet
  2015-07-29 15:58         ` Tom Herbert
  0 siblings, 2 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29  9:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team

On 07/29/15 at 11:29am, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > > This patch creates sk_set_txhash and eliminates protocol specific
> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > > random number instead of performing flow dissection. sk_set_txash
> > > is also allowed to be called multiple times for the same socket,
> > > we'll need this when redoing the hash for negative routing advice.
> > > 
> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
> > 
> > Doesn't this break TX hashing with SO_REUSEPORT?
> 
> 
> AFAIK nothing uses sk_txhash yet.

skb_set_hash_from_sk()
skb_get_hash()

Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
to assume that different sockets may go to different queues even if
the L4 tuple is identical.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:54       ` Thomas Graf
@ 2015-07-29 10:06         ` Eric Dumazet
  2015-07-29 10:47           ` Thomas Graf
  2015-07-29 15:58         ` Tom Herbert
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29 10:06 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team

On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:

> skb_set_hash_from_sk()
> skb_get_hash()
> 
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.

skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
then later. bonding uses its own hash functions.

SO_REUSEPORT is on input path, and uses its own hash anyway.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29 10:06         ` Eric Dumazet
@ 2015-07-29 10:47           ` Thomas Graf
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 10:47 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team

On 07/29/15 at 12:06pm, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:
> 
> > skb_set_hash_from_sk()
> > skb_get_hash()
> > 
> > Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> > to assume that different sockets may go to different queues even if
> > the L4 tuple is identical.
> 
> skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
> then later. bonding uses its own hash functions.
> 
> SO_REUSEPORT is on input path, and uses its own hash anyway.

Yes, I'm talking about the output path only but after further reading,
the only case that could be a problem is if two SO_REUSEPORT sockets
connect to the same destination address and port which will be prevented
by connect anyway.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:54       ` Thomas Graf
  2015-07-29 10:06         ` Eric Dumazet
@ 2015-07-29 15:58         ` Tom Herbert
  2015-07-29 20:02           ` Thomas Graf
  1 sibling, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-07-29 15:58 UTC (permalink / raw)
  To: Thomas Graf
  Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
	Kernel Team

On Wed, Jul 29, 2015 at 2:54 AM, Thomas Graf <tgraf@suug.ch> wrote:
> On 07/29/15 at 11:29am, Eric Dumazet wrote:
>> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
>> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
>> > > This patch creates sk_set_txhash and eliminates protocol specific
>> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > > random number instead of performing flow dissection. sk_set_txash
>> > > is also allowed to be called multiple times for the same socket,
>> > > we'll need this when redoing the hash for negative routing advice.
>> > >
>> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
>> >
>> > Doesn't this break TX hashing with SO_REUSEPORT?
>>
>>
>> AFAIK nothing uses sk_txhash yet.
>
> skb_set_hash_from_sk()
> skb_get_hash()
>
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.

Hi Thomas,

The salient property of both sk_txhash and skb->hash is that they
provide a uniform distribution over flows. It is incorrect to assume
that either of these immutable during the lifetime of a flow, so yes
this means that packets of a flow may go to different receive queues
when hashes change. SO_REUSEPORT is a process in the receive path but
uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
guarantee that packets of a "flow" will always hit the same socket,
the hashing is not consistent when new reuseport sockets are added or
removed-- this is actually a long standing issue with SO_REUSEPORT in
the TCP case since it is possible to orphan connections in SYN-RECV. I
believe Eric was working toward fixing that, so maybe in the future we
can use skb->hash if it is a savings.

Thanks,
Tom

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29 15:58         ` Tom Herbert
@ 2015-07-29 20:02           ` Thomas Graf
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 20:02 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
	Kernel Team

On 07/29/15 at 08:58am, Tom Herbert wrote:
> The salient property of both sk_txhash and skb->hash is that they
> provide a uniform distribution over flows. It is incorrect to assume
> that either of these immutable during the lifetime of a flow, so yes
> this means that packets of a flow may go to different receive queues
> when hashes change. SO_REUSEPORT is a process in the receive path but
> uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
> guarantee that packets of a "flow" will always hit the same socket,
> the hashing is not consistent when new reuseport sockets are added or
> removed-- this is actually a long standing issue with SO_REUSEPORT in
> the TCP case since it is possible to orphan connections in SYN-RECV. I
> believe Eric was working toward fixing that, so maybe in the future we
> can use skb->hash if it is a savings.

Thanks for the explanation. I have no objections to the changes then.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
@ 2015-07-30  5:44 ` David Miller
  2 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2015-07-30  5:44 UTC (permalink / raw)
  To: tom; +Cc: netdev, kernel-team

From: Tom Herbert <tom@herbertland.com>
Date: Tue, 28 Jul 2015 16:02:04 -0700

> This patch set implements a common function to simply set sk_txhash to
> a random number instead of going through the trouble to call flow
> dissector. From dst_negative_advice we now reset the sk_txhash in hopes
> of finding a better ECMP path through the network. Changing sk_txhash
> affects:
>   - IPv6 flow label and UDP source port which affect ECMP in the network
>   - Local EMCP route selection (pending changes to use sk_txhash)

This looks fine, series applied, thanks Tom.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
  2015-07-29  9:13   ` Thomas Graf
@ 2015-12-08  8:33   ` Alexander Drozdov
  2015-12-08 13:15     ` Eric Dumazet
  1 sibling, 1 reply; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-08  8:33 UTC (permalink / raw)
  To: Tom Herbert, davem, netdev; +Cc: kernel-team


29.07.2015 02:02, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
It seems that this patch and some previous txhash-related
ones break af_packet hash features for outgoing packets:
- PACKET_FANOUT_HASH
- TP_FT_REQ_FILL_RXHASH

af_packet now thinks that hashes for for incoming and outgoing
packets of the same TCP stream differ. That is true for TCP
sessions initiated by the host.

>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
>   include/net/ip.h    | 16 ----------------
>   include/net/ipv6.h  | 19 -------------------
>   include/net/sock.h  |  8 ++++++++
>   net/ipv4/datagram.c |  2 +-
>   net/ipv4/tcp_ipv4.c |  4 ++--
>   net/ipv6/datagram.c |  2 +-
>   net/ipv6/tcp_ipv6.c |  4 ++--
>   7 files changed, 14 insertions(+), 41 deletions(-)
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index d5fe9f2..bee5f35 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
>   	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>   }
>   
> -static inline void inet_set_txhash(struct sock *sk)
> -{
> -	struct inet_sock *inet = inet_sk(sk);
> -	struct flow_keys keys;
> -
> -	memset(&keys, 0, sizeof(keys));
> -
> -	keys.addrs.v4addrs.src = inet->inet_saddr;
> -	keys.addrs.v4addrs.dst = inet->inet_daddr;
> -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> -	keys.ports.src = inet->inet_sport;
> -	keys.ports.dst = inet->inet_dport;
> -
> -	sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
>   static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
>   {
>   	const struct iphdr *iph = skb_gro_network_header(skb);
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index 82dbdb0..7c79798 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
>   }
>   
>   #if IS_ENABLED(CONFIG_IPV6)
> -static inline void ip6_set_txhash(struct sock *sk)
> -{
> -	struct inet_sock *inet = inet_sk(sk);
> -	struct ipv6_pinfo *np = inet6_sk(sk);
> -	struct flow_keys keys;
> -
> -	memset(&keys, 0, sizeof(keys));
> -
> -	memcpy(&keys.addrs.v6addrs.src, &np->saddr,
> -	       sizeof(keys.addrs.v6addrs.src));
> -	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
> -	       sizeof(keys.addrs.v6addrs.dst));
> -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> -	keys.ports.src = inet->inet_sport;
> -	keys.ports.dst = inet->inet_dport;
> -
> -	sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
>   static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
>   					__be32 flowlabel, bool autolabel)
>   {
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 4353ef7..fe735c4 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
>   kuid_t sock_i_uid(struct sock *sk);
>   unsigned long sock_i_ino(struct sock *sk);
>   
> +static inline void sk_set_txhash(struct sock *sk)
> +{
> +	sk->sk_txhash = prandom_u32();
> +
> +	if (unlikely(!sk->sk_txhash))
> +		sk->sk_txhash = 1;
> +}
> +
>   static inline struct dst_entry *
>   __sk_dst_get(struct sock *sk)
>   {
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 574fad9..f915abf 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>   	inet->inet_daddr = fl4->daddr;
>   	inet->inet_dport = usin->sin_port;
>   	sk->sk_state = TCP_ESTABLISHED;
> -	inet_set_txhash(sk);
> +	sk_set_txhash(sk);
>   	inet->inet_id = jiffies;
>   
>   	sk_dst_set(sk, &rt->dst);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 486ba96..d27eb54 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>   	if (err)
>   		goto failure;
>   
> -	inet_set_txhash(sk);
> +	sk_set_txhash(sk);
>   
>   	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
>   			       inet->inet_sport, inet->inet_dport, sk);
> @@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>   	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
>   	newinet->rcv_tos      = ip_hdr(skb)->tos;
>   	inet_csk(newsk)->icsk_ext_hdr_len = 0;
> -	inet_set_txhash(newsk);
> +	sk_set_txhash(newsk);
>   	if (inet_opt)
>   		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
>   	newinet->inet_id = newtp->write_seq ^ jiffies;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 2572a32..9aadd57 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -199,7 +199,7 @@ ipv4_connected:
>   		      NULL);
>   
>   	sk->sk_state = TCP_ESTABLISHED;
> -	ip6_set_txhash(sk);
> +	sk_set_txhash(sk);
>   out:
>   	fl6_sock_release(flowlabel);
>   	return err;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index d540846..52dd0d9 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
>   	if (err)
>   		goto late_failure;
>   
> -	ip6_set_txhash(sk);
> +	sk_set_txhash(sk);
>   
>   	if (!tp->write_seq && likely(!tp->repair))
>   		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
> @@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>   	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
>   	newsk->sk_bound_dev_if = ireq->ir_iif;
>   
> -	ip6_set_txhash(newsk);
> +	sk_set_txhash(newsk);
>   
>   	/* Now IPv6 options...
>   

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
@ 2015-12-08 13:15     ` Eric Dumazet
  2015-12-08 16:33       ` Tom Herbert
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-12-08 13:15 UTC (permalink / raw)
  To: Alexander Drozdov; +Cc: Tom Herbert, davem, netdev, kernel-team

On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
> 29.07.2015 02:02, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> It seems that this patch and some previous txhash-related
> ones break af_packet hash features for outgoing packets:
> - PACKET_FANOUT_HASH
> - TP_FT_REQ_FILL_RXHASH
> 
> af_packet now thinks that hashes for for incoming and outgoing
> packets of the same TCP stream differ. That is true for TCP
> sessions initiated by the host.

There never has been such guarantee. Even rx hashes for a single TCP
flow can differ, if packets are received on two different NIC with
different RSSS keys.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08 13:15     ` Eric Dumazet
@ 2015-12-08 16:33       ` Tom Herbert
  2015-12-09 11:14         ` Alexander Drozdov
  0 siblings, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-12-08 16:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Alexander Drozdov, David S. Miller,
	Linux Kernel Network Developers, Kernel Team

On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
>> 29.07.2015 02:02, Tom Herbert wrote:
>> > This patch creates sk_set_txhash and eliminates protocol specific
>> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > random number instead of performing flow dissection. sk_set_txash
>> > is also allowed to be called multiple times for the same socket,
>> > we'll need this when redoing the hash for negative routing advice.
>> It seems that this patch and some previous txhash-related
>> ones break af_packet hash features for outgoing packets:
>> - PACKET_FANOUT_HASH
>> - TP_FT_REQ_FILL_RXHASH
>>
>> af_packet now thinks that hashes for for incoming and outgoing
>> packets of the same TCP stream differ. That is true for TCP
>> sessions initiated by the host.
>
> There never has been such guarantee. Even rx hashes for a single TCP
> flow can differ, if packets are received on two different NIC with
> different RSSS keys.
>
+1, it is a salient property that hashes can differ in each direction
for a flow and that the hash for a flow can change over time.

>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08 16:33       ` Tom Herbert
@ 2015-12-09 11:14         ` Alexander Drozdov
  0 siblings, 0 replies; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-09 11:14 UTC (permalink / raw)
  To: Tom Herbert, Eric Dumazet
  Cc: David S. Miller, Linux Kernel Network Developers, Kernel Team

08.12.2015 19:33, Tom Herbert wrote:
> On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> There never has been such guarantee. Even rx hashes for a single TCP
>> flow can differ, if packets are received on two different NIC with
>> different RSSS keys.
>>
> +1, it is a salient property that hashes can differ in each direction
> for a flow and that the hash for a flow can change over time.
Thanks! I'll then try to move onto BPF fanout.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-12-09 11:14 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
2015-07-29  9:13   ` Thomas Graf
2015-07-29  9:29     ` Eric Dumazet
2015-07-29  9:54       ` Thomas Graf
2015-07-29 10:06         ` Eric Dumazet
2015-07-29 10:47           ` Thomas Graf
2015-07-29 15:58         ` Tom Herbert
2015-07-29 20:02           ` Thomas Graf
2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
2015-12-08 13:15     ` Eric Dumazet
2015-12-08 16:33       ` Tom Herbert
2015-12-09 11:14         ` Alexander Drozdov
2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller

All the mail mirrored from lore.kernel.org

This inbox may be cloned and mirrored by anyone:

	git clone --mirror http://yhbt.net/lore/all

Example config snippet for mirrors.


AGPL code for this site: git clone http://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/public-inbox.git