All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
@ 2015-07-28 23:02 Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

This patch set implements a common function to simply set sk_txhash to
a random number instead of going through the trouble to call flow
dissector. From dst_negative_advice we now reset the sk_txhash in hopes
of finding a better ECMP path through the network. Changing sk_txhash
affects:
  - IPv6 flow label and UDP source port which affect ECMP in the network
  - Local EMCP route selection (pending changes to use sk_txhash)

Tom Herbert (2):
  net: Set sk_txhash from a random number
  net: Recompute sk_txhash on negative routing advice

 include/net/ip.h    | 16 ----------------
 include/net/ipv6.h  | 19 -------------------
 include/net/sock.h  | 16 ++++++++++++++++
 net/ipv4/datagram.c |  2 +-
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv6/datagram.c |  2 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 7 files changed, 22 insertions(+), 41 deletions(-)

-- 
1.8.1

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
  2015-07-29  9:13   ` Thomas Graf
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
  2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
  2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
  2 siblings, 2 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

This patch creates sk_set_txhash and eliminates protocol specific
inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
random number instead of performing flow dissection. sk_set_txash
is also allowed to be called multiple times for the same socket,
we'll need this when redoing the hash for negative routing advice.

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/net/ip.h    | 16 ----------------
 include/net/ipv6.h  | 19 -------------------
 include/net/sock.h  |  8 ++++++++
 net/ipv4/datagram.c |  2 +-
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv6/datagram.c |  2 +-
 net/ipv6/tcp_ipv6.c |  4 ++--
 7 files changed, 14 insertions(+), 41 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index d5fe9f2..bee5f35 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
 	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 }
 
-static inline void inet_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	keys.addrs.v4addrs.src = inet->inet_saddr;
-	keys.addrs.v4addrs.dst = inet->inet_daddr;
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
 {
 	const struct iphdr *iph = skb_gro_network_header(skb);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 82dbdb0..7c79798 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static inline void ip6_set_txhash(struct sock *sk)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flow_keys keys;
-
-	memset(&keys, 0, sizeof(keys));
-
-	memcpy(&keys.addrs.v6addrs.src, &np->saddr,
-	       sizeof(keys.addrs.v6addrs.src));
-	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
-	       sizeof(keys.addrs.v6addrs.dst));
-	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
-	keys.ports.src = inet->inet_sport;
-	keys.ports.dst = inet->inet_dport;
-
-	sk->sk_txhash = flow_hash_from_keys(&keys);
-}
-
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel)
 {
diff --git a/include/net/sock.h b/include/net/sock.h
index 4353ef7..fe735c4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
 kuid_t sock_i_uid(struct sock *sk);
 unsigned long sock_i_ino(struct sock *sk);
 
+static inline void sk_set_txhash(struct sock *sk)
+{
+	sk->sk_txhash = prandom_u32();
+
+	if (unlikely(!sk->sk_txhash))
+		sk->sk_txhash = 1;
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 574fad9..f915abf 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 	inet->inet_daddr = fl4->daddr;
 	inet->inet_dport = usin->sin_port;
 	sk->sk_state = TCP_ESTABLISHED;
-	inet_set_txhash(sk);
+	sk_set_txhash(sk);
 	inet->inet_id = jiffies;
 
 	sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 486ba96..d27eb54 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		goto failure;
 
-	inet_set_txhash(sk);
+	sk_set_txhash(sk);
 
 	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
 			       inet->inet_sport, inet->inet_dport, sk);
@@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
 	newinet->rcv_tos      = ip_hdr(skb)->tos;
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	inet_set_txhash(newsk);
+	sk_set_txhash(newsk);
 	if (inet_opt)
 		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
 	newinet->inet_id = newtp->write_seq ^ jiffies;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2572a32..9aadd57 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,7 +199,7 @@ ipv4_connected:
 		      NULL);
 
 	sk->sk_state = TCP_ESTABLISHED;
-	ip6_set_txhash(sk);
+	sk_set_txhash(sk);
 out:
 	fl6_sock_release(flowlabel);
 	return err;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d540846..52dd0d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	if (err)
 		goto late_failure;
 
-	ip6_set_txhash(sk);
+	sk_set_txhash(sk);
 
 	if (!tp->write_seq && likely(!tp->repair))
 		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
 	newsk->sk_bound_dev_if = ireq->ir_iif;
 
-	ip6_set_txhash(newsk);
+	sk_set_txhash(newsk);
 
 	/* Now IPv6 options...
 
-- 
1.8.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-28 23:02 ` Tom Herbert
  2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller
  2 siblings, 0 replies; 15+ messages in thread
From: Tom Herbert @ 2015-07-28 23:02 UTC (permalink / raw)
  To: davem, netdev; +Cc: kernel-team

When a connection is failing a transport protocol calls
dst_negative_advice to try to get a better route. This patch includes
changing the sk_txhash in that function. This provides a rudimentary
method to try to find a different path in the network since sk_txhash
affects ECMP on the local host and through the network (via flow labels
or UDP source port in encapsulation).

Signed-off-by: Tom Herbert <tom@herbertland.com>
---
 include/net/sock.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/net/sock.h b/include/net/sock.h
index fe735c4..24aa75c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1695,6 +1695,12 @@ static inline void sk_set_txhash(struct sock *sk)
 		sk->sk_txhash = 1;
 }
 
+static inline void sk_rethink_txhash(struct sock *sk)
+{
+	if (sk->sk_txhash)
+		sk_set_txhash(sk);
+}
+
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
@@ -1719,6 +1725,8 @@ static inline void dst_negative_advice(struct sock *sk)
 {
 	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
 
+	sk_rethink_txhash(sk);
+
 	if (dst && dst->ops->negative_advice) {
 		ndst = dst->ops->negative_advice(dst);
 
-- 
1.8.1

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
@ 2015-07-29  9:13   ` Thomas Graf
  2015-07-29  9:29     ` Eric Dumazet
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
  1 sibling, 1 reply; 15+ messages in thread
From: Thomas Graf @ 2015-07-29  9:13 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, kernel-team

On 07/28/15 at 04:02pm, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
> 
> Signed-off-by: Tom Herbert <tom@herbertland.com>

Doesn't this break TX hashing with SO_REUSEPORT?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:13   ` Thomas Graf
@ 2015-07-29  9:29     ` Eric Dumazet
  2015-07-29  9:54       ` Thomas Graf
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29  9:29 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team

On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> > 
> > Signed-off-by: Tom Herbert <tom@herbertland.com>
> 
> Doesn't this break TX hashing with SO_REUSEPORT?


AFAIK nothing uses sk_txhash yet.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:29     ` Eric Dumazet
@ 2015-07-29  9:54       ` Thomas Graf
  2015-07-29 10:06         ` Eric Dumazet
  2015-07-29 15:58         ` Tom Herbert
  0 siblings, 2 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29  9:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team

On 07/29/15 at 11:29am, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
> > > This patch creates sk_set_txhash and eliminates protocol specific
> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > > random number instead of performing flow dissection. sk_set_txash
> > > is also allowed to be called multiple times for the same socket,
> > > we'll need this when redoing the hash for negative routing advice.
> > > 
> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
> > 
> > Doesn't this break TX hashing with SO_REUSEPORT?
> 
> 
> AFAIK nothing uses sk_txhash yet.

skb_set_hash_from_sk()
skb_get_hash()

Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
to assume that different sockets may go to different queues even if
the L4 tuple is identical.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:54       ` Thomas Graf
@ 2015-07-29 10:06         ` Eric Dumazet
  2015-07-29 10:47           ` Thomas Graf
  2015-07-29 15:58         ` Tom Herbert
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-07-29 10:06 UTC (permalink / raw)
  To: Thomas Graf; +Cc: Tom Herbert, davem, netdev, kernel-team

On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:

> skb_set_hash_from_sk()
> skb_get_hash()
> 
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.

skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
then later. bonding uses its own hash functions.

SO_REUSEPORT is on input path, and uses its own hash anyway.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29 10:06         ` Eric Dumazet
@ 2015-07-29 10:47           ` Thomas Graf
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 10:47 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Tom Herbert, davem, netdev, kernel-team

On 07/29/15 at 12:06pm, Eric Dumazet wrote:
> On Wed, 2015-07-29 at 11:54 +0200, Thomas Graf wrote:
> 
> > skb_set_hash_from_sk()
> > skb_get_hash()
> > 
> > Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> > to assume that different sockets may go to different queues even if
> > the L4 tuple is identical.
> 
> skb_set_hash_from_sk() sets skb->hash in output path. Nothing uses it
> then later. bonding uses its own hash functions.
> 
> SO_REUSEPORT is on input path, and uses its own hash anyway.

Yes, I'm talking about the output path only but after further reading,
the only case that could be a problem is if two SO_REUSEPORT sockets
connect to the same destination address and port which will be prevented
by connect anyway.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29  9:54       ` Thomas Graf
  2015-07-29 10:06         ` Eric Dumazet
@ 2015-07-29 15:58         ` Tom Herbert
  2015-07-29 20:02           ` Thomas Graf
  1 sibling, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-07-29 15:58 UTC (permalink / raw)
  To: Thomas Graf
  Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
	Kernel Team

On Wed, Jul 29, 2015 at 2:54 AM, Thomas Graf <tgraf@suug.ch> wrote:
> On 07/29/15 at 11:29am, Eric Dumazet wrote:
>> On Wed, 2015-07-29 at 11:13 +0200, Thomas Graf wrote:
>> > On 07/28/15 at 04:02pm, Tom Herbert wrote:
>> > > This patch creates sk_set_txhash and eliminates protocol specific
>> > > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > > random number instead of performing flow dissection. sk_set_txash
>> > > is also allowed to be called multiple times for the same socket,
>> > > we'll need this when redoing the hash for negative routing advice.
>> > >
>> > > Signed-off-by: Tom Herbert <tom@herbertland.com>
>> >
>> > Doesn't this break TX hashing with SO_REUSEPORT?
>>
>>
>> AFAIK nothing uses sk_txhash yet.
>
> skb_set_hash_from_sk()
> skb_get_hash()
>
> Am I misreading this? I'm not using SO_REUSEPORT and it might be OK
> to assume that different sockets may go to different queues even if
> the L4 tuple is identical.

Hi Thomas,

The salient property of both sk_txhash and skb->hash is that they
provide a uniform distribution over flows. It is incorrect to assume
that either of these immutable during the lifetime of a flow, so yes
this means that packets of a flow may go to different receive queues
when hashes change. SO_REUSEPORT is a process in the receive path but
uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
guarantee that packets of a "flow" will always hit the same socket,
the hashing is not consistent when new reuseport sockets are added or
removed-- this is actually a long standing issue with SO_REUSEPORT in
the TCP case since it is possible to orphan connections in SYN-RECV. I
believe Eric was working toward fixing that, so maybe in the future we
can use skb->hash if it is a savings.

Thanks,
Tom

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 1/2] net: Set sk_txhash from a random number
  2015-07-29 15:58         ` Tom Herbert
@ 2015-07-29 20:02           ` Thomas Graf
  0 siblings, 0 replies; 15+ messages in thread
From: Thomas Graf @ 2015-07-29 20:02 UTC (permalink / raw)
  To: Tom Herbert
  Cc: Eric Dumazet, David S. Miller, Linux Kernel Network Developers,
	Kernel Team

On 07/29/15 at 08:58am, Tom Herbert wrote:
> The salient property of both sk_txhash and skb->hash is that they
> provide a uniform distribution over flows. It is incorrect to assume
> that either of these immutable during the lifetime of a flow, so yes
> this means that packets of a flow may go to different receive queues
> when hashes change. SO_REUSEPORT is a process in the receive path but
> uses ehashfn over the ports. But even with SO_REUSEPORT we provide no
> guarantee that packets of a "flow" will always hit the same socket,
> the hashing is not consistent when new reuseport sockets are added or
> removed-- this is actually a long standing issue with SO_REUSEPORT in
> the TCP case since it is possible to orphan connections in SYN-RECV. I
> believe Eric was working toward fixing that, so maybe in the future we
> can use skb->hash if it is a savings.

Thanks for the explanation. I have no objections to the changes then.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs
  2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
  2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
@ 2015-07-30  5:44 ` David Miller
  2 siblings, 0 replies; 15+ messages in thread
From: David Miller @ 2015-07-30  5:44 UTC (permalink / raw)
  To: tom; +Cc: netdev, kernel-team

From: Tom Herbert <tom@herbertland.com>
Date: Tue, 28 Jul 2015 16:02:04 -0700

> This patch set implements a common function to simply set sk_txhash to
> a random number instead of going through the trouble to call flow
> dissector. From dst_negative_advice we now reset the sk_txhash in hopes
> of finding a better ECMP path through the network. Changing sk_txhash
> affects:
>   - IPv6 flow label and UDP source port which affect ECMP in the network
>   - Local EMCP route selection (pending changes to use sk_txhash)

This looks fine, series applied, thanks Tom.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
  2015-07-29  9:13   ` Thomas Graf
@ 2015-12-08  8:33   ` Alexander Drozdov
  2015-12-08 13:15     ` Eric Dumazet
  1 sibling, 1 reply; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-08  8:33 UTC (permalink / raw)
  To: Tom Herbert, davem, netdev; +Cc: kernel-team


29.07.2015 02:02, Tom Herbert wrote:
> This patch creates sk_set_txhash and eliminates protocol specific
> inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> random number instead of performing flow dissection. sk_set_txash
> is also allowed to be called multiple times for the same socket,
> we'll need this when redoing the hash for negative routing advice.
It seems that this patch and some previous txhash-related
ones break af_packet hash features for outgoing packets:
- PACKET_FANOUT_HASH
- TP_FT_REQ_FILL_RXHASH

af_packet now thinks that hashes for for incoming and outgoing
packets of the same TCP stream differ. That is true for TCP
sessions initiated by the host.

>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
> ---
>   include/net/ip.h    | 16 ----------------
>   include/net/ipv6.h  | 19 -------------------
>   include/net/sock.h  |  8 ++++++++
>   net/ipv4/datagram.c |  2 +-
>   net/ipv4/tcp_ipv4.c |  4 ++--
>   net/ipv6/datagram.c |  2 +-
>   net/ipv6/tcp_ipv6.c |  4 ++--
>   7 files changed, 14 insertions(+), 41 deletions(-)
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index d5fe9f2..bee5f35 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -370,22 +370,6 @@ static inline void iph_to_flow_copy_v4addrs(struct flow_keys *flow,
>   	flow->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
>   }
>   
> -static inline void inet_set_txhash(struct sock *sk)
> -{
> -	struct inet_sock *inet = inet_sk(sk);
> -	struct flow_keys keys;
> -
> -	memset(&keys, 0, sizeof(keys));
> -
> -	keys.addrs.v4addrs.src = inet->inet_saddr;
> -	keys.addrs.v4addrs.dst = inet->inet_daddr;
> -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
> -	keys.ports.src = inet->inet_sport;
> -	keys.ports.dst = inet->inet_dport;
> -
> -	sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
>   static inline __wsum inet_gro_compute_pseudo(struct sk_buff *skb, int proto)
>   {
>   	const struct iphdr *iph = skb_gro_network_header(skb);
> diff --git a/include/net/ipv6.h b/include/net/ipv6.h
> index 82dbdb0..7c79798 100644
> --- a/include/net/ipv6.h
> +++ b/include/net/ipv6.h
> @@ -707,25 +707,6 @@ static inline void iph_to_flow_copy_v6addrs(struct flow_keys *flow,
>   }
>   
>   #if IS_ENABLED(CONFIG_IPV6)
> -static inline void ip6_set_txhash(struct sock *sk)
> -{
> -	struct inet_sock *inet = inet_sk(sk);
> -	struct ipv6_pinfo *np = inet6_sk(sk);
> -	struct flow_keys keys;
> -
> -	memset(&keys, 0, sizeof(keys));
> -
> -	memcpy(&keys.addrs.v6addrs.src, &np->saddr,
> -	       sizeof(keys.addrs.v6addrs.src));
> -	memcpy(&keys.addrs.v6addrs.dst, &sk->sk_v6_daddr,
> -	       sizeof(keys.addrs.v6addrs.dst));
> -	keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
> -	keys.ports.src = inet->inet_sport;
> -	keys.ports.dst = inet->inet_dport;
> -
> -	sk->sk_txhash = flow_hash_from_keys(&keys);
> -}
> -
>   static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
>   					__be32 flowlabel, bool autolabel)
>   {
> diff --git a/include/net/sock.h b/include/net/sock.h
> index 4353ef7..fe735c4 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -1687,6 +1687,14 @@ static inline void sock_graft(struct sock *sk, struct socket *parent)
>   kuid_t sock_i_uid(struct sock *sk);
>   unsigned long sock_i_ino(struct sock *sk);
>   
> +static inline void sk_set_txhash(struct sock *sk)
> +{
> +	sk->sk_txhash = prandom_u32();
> +
> +	if (unlikely(!sk->sk_txhash))
> +		sk->sk_txhash = 1;
> +}
> +
>   static inline struct dst_entry *
>   __sk_dst_get(struct sock *sk)
>   {
> diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
> index 574fad9..f915abf 100644
> --- a/net/ipv4/datagram.c
> +++ b/net/ipv4/datagram.c
> @@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
>   	inet->inet_daddr = fl4->daddr;
>   	inet->inet_dport = usin->sin_port;
>   	sk->sk_state = TCP_ESTABLISHED;
> -	inet_set_txhash(sk);
> +	sk_set_txhash(sk);
>   	inet->inet_id = jiffies;
>   
>   	sk_dst_set(sk, &rt->dst);
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 486ba96..d27eb54 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
>   	if (err)
>   		goto failure;
>   
> -	inet_set_txhash(sk);
> +	sk_set_txhash(sk);
>   
>   	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
>   			       inet->inet_sport, inet->inet_dport, sk);
> @@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>   	newinet->mc_ttl	      = ip_hdr(skb)->ttl;
>   	newinet->rcv_tos      = ip_hdr(skb)->tos;
>   	inet_csk(newsk)->icsk_ext_hdr_len = 0;
> -	inet_set_txhash(newsk);
> +	sk_set_txhash(newsk);
>   	if (inet_opt)
>   		inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
>   	newinet->inet_id = newtp->write_seq ^ jiffies;
> diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
> index 2572a32..9aadd57 100644
> --- a/net/ipv6/datagram.c
> +++ b/net/ipv6/datagram.c
> @@ -199,7 +199,7 @@ ipv4_connected:
>   		      NULL);
>   
>   	sk->sk_state = TCP_ESTABLISHED;
> -	ip6_set_txhash(sk);
> +	sk_set_txhash(sk);
>   out:
>   	fl6_sock_release(flowlabel);
>   	return err;
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index d540846..52dd0d9 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
>   	if (err)
>   		goto late_failure;
>   
> -	ip6_set_txhash(sk);
> +	sk_set_txhash(sk);
>   
>   	if (!tp->write_seq && likely(!tp->repair))
>   		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
> @@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
>   	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
>   	newsk->sk_bound_dev_if = ireq->ir_iif;
>   
> -	ip6_set_txhash(newsk);
> +	sk_set_txhash(newsk);
>   
>   	/* Now IPv6 options...
>   

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
@ 2015-12-08 13:15     ` Eric Dumazet
  2015-12-08 16:33       ` Tom Herbert
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2015-12-08 13:15 UTC (permalink / raw)
  To: Alexander Drozdov; +Cc: Tom Herbert, davem, netdev, kernel-team

On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
> 29.07.2015 02:02, Tom Herbert wrote:
> > This patch creates sk_set_txhash and eliminates protocol specific
> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
> > random number instead of performing flow dissection. sk_set_txash
> > is also allowed to be called multiple times for the same socket,
> > we'll need this when redoing the hash for negative routing advice.
> It seems that this patch and some previous txhash-related
> ones break af_packet hash features for outgoing packets:
> - PACKET_FANOUT_HASH
> - TP_FT_REQ_FILL_RXHASH
> 
> af_packet now thinks that hashes for for incoming and outgoing
> packets of the same TCP stream differ. That is true for TCP
> sessions initiated by the host.

There never has been such guarantee. Even rx hashes for a single TCP
flow can differ, if packets are received on two different NIC with
different RSSS keys.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08 13:15     ` Eric Dumazet
@ 2015-12-08 16:33       ` Tom Herbert
  2015-12-09 11:14         ` Alexander Drozdov
  0 siblings, 1 reply; 15+ messages in thread
From: Tom Herbert @ 2015-12-08 16:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Alexander Drozdov, David S. Miller,
	Linux Kernel Network Developers, Kernel Team

On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Tue, 2015-12-08 at 11:33 +0300, Alexander Drozdov wrote:
>> 29.07.2015 02:02, Tom Herbert wrote:
>> > This patch creates sk_set_txhash and eliminates protocol specific
>> > inet_set_txhash and ip6_set_txhash. sk_set_txhash simply sets a
>> > random number instead of performing flow dissection. sk_set_txash
>> > is also allowed to be called multiple times for the same socket,
>> > we'll need this when redoing the hash for negative routing advice.
>> It seems that this patch and some previous txhash-related
>> ones break af_packet hash features for outgoing packets:
>> - PACKET_FANOUT_HASH
>> - TP_FT_REQ_FILL_RXHASH
>>
>> af_packet now thinks that hashes for for incoming and outgoing
>> packets of the same TCP stream differ. That is true for TCP
>> sessions initiated by the host.
>
> There never has been such guarantee. Even rx hashes for a single TCP
> flow can differ, if packets are received on two different NIC with
> different RSSS keys.
>
+1, it is a salient property that hashes can differ in each direction
for a flow and that the hash for a flow can change over time.

>
>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [net-next,1/2] net: Set sk_txhash from a random number
  2015-12-08 16:33       ` Tom Herbert
@ 2015-12-09 11:14         ` Alexander Drozdov
  0 siblings, 0 replies; 15+ messages in thread
From: Alexander Drozdov @ 2015-12-09 11:14 UTC (permalink / raw)
  To: Tom Herbert, Eric Dumazet
  Cc: David S. Miller, Linux Kernel Network Developers, Kernel Team

08.12.2015 19:33, Tom Herbert wrote:
> On Tue, Dec 8, 2015 at 5:15 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>> There never has been such guarantee. Even rx hashes for a single TCP
>> flow can differ, if packets are received on two different NIC with
>> different RSSS keys.
>>
> +1, it is a salient property that hashes can differ in each direction
> for a flow and that the hash for a flow can change over time.
Thanks! I'll then try to move onto BPF fanout.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2015-12-09 11:14 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-28 23:02 [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs Tom Herbert
2015-07-28 23:02 ` [PATCH net-next 1/2] net: Set sk_txhash from a random number Tom Herbert
2015-07-29  9:13   ` Thomas Graf
2015-07-29  9:29     ` Eric Dumazet
2015-07-29  9:54       ` Thomas Graf
2015-07-29 10:06         ` Eric Dumazet
2015-07-29 10:47           ` Thomas Graf
2015-07-29 15:58         ` Tom Herbert
2015-07-29 20:02           ` Thomas Graf
2015-12-08  8:33   ` [net-next,1/2] " Alexander Drozdov
2015-12-08 13:15     ` Eric Dumazet
2015-12-08 16:33       ` Tom Herbert
2015-12-09 11:14         ` Alexander Drozdov
2015-07-28 23:02 ` [PATCH net-next 2/2] net: Recompute sk_txhash on negative routing advice Tom Herbert
2015-07-30  5:44 ` [PATCH net-next 0/2] net: Initialize sk_hash to random value and reset for failing cnxs David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.