commit a5902f243cd245d615fc0a443c370559db9117f9 (patch)
parent 2fa3153 raindrops 0.20.0
tree a3191e0747314f3acadf3063a2560c9984fc52a7
author Dale Hamel <dale.hamel@shopify.com> 2023-02-24 13:35:44 -0500
committer Eric Wong <bofh@yhbt.net> 2023-02-24 19:02:15 +0000
Fix queue stats for sockets with SO_REUSEPORT
SO_REUSEPORT was introduced in 2013, see https://lwn.net/Articles/542629/.
However, the current logic for calculating socket queue backlog stats
is from 2011, predating the advent of SO_REUSEPORT.
The current strategy was thus written before the notion that the mapping of
INET_ADDR:socket could potentially be 1:N instead of always 1:1.
This causes raindrops to provide invalid socket backlog values when
SO_REUSEPORT is used, which Unicorn supports since 2013. The current behaviour
will set the queue metric to the queue depth of the last inet_diag_msg
it processes matching the INET_ADDR. In practice, this will result in
the backlog being off by a factor N, assuming relatively even
distribution of requests to sockets, which the kernel should guarantee
through consistent hashing.
The fix here is to accumulate the socket queue depth as we iterate
over the the socket diagnostics, rather than reset it each time.
I have a provided a test, but it only checks the queues, not the
accept metrics, as those are not affected by this bug, and it is
not possible to know which of the listeners will be dispatched the
request by the kernel, and thus which should call accept.
---
ext/raindrops/linux_inet_diag.c | 4 +--
test/test_linux_reuseport_tcp_listen_stats.rb | 51 +++++++++++++++++++++++++++
2 files changed, 53 insertions(+), 2 deletions(-)
diff --git a/ext/raindrops/linux_inet_diag.c b/ext/raindrops/linux_inet_diag.c
index cabd427..2a2360c 100644
--- a/ext/raindrops/linux_inet_diag.c
+++ b/ext/raindrops/linux_inet_diag.c
@@ -306,7 +306,7 @@ static void table_set_queued(st_table *table, struct inet_diag_msg *r)
{
struct listen_stats *stats = stats_for(table, r);
stats->listener_p = 1;
- stats->queued = r->idiag_rqueue;
+ stats->queued += r->idiag_rqueue;
}
/* inner loop of inet_diag, called for every socket returned by netlink */
@@ -328,7 +328,7 @@ static inline void r_acc(struct nogvl_args *args, struct inet_diag_msg *r)
if (args->table)
table_set_queued(args->table, r);
else
- args->stats.queued = r->idiag_rqueue;
+ args->stats.queued += r->idiag_rqueue;
}
/*
* we wont get anything else because of the idiag_states filter
diff --git a/test/test_linux_reuseport_tcp_listen_stats.rb b/test/test_linux_reuseport_tcp_listen_stats.rb
new file mode 100644
index 0000000..c977c43
--- /dev/null
+++ b/test/test_linux_reuseport_tcp_listen_stats.rb
@@ -0,0 +1,51 @@
+# -*- encoding: binary -*-
+require "./test/rack_unicorn"
+require 'test/unit'
+require 'socket'
+require 'raindrops'
+$stderr.sync = $stdout.sync = true
+
+class TestLinuxReuseportTcpListenStats < Test::Unit::TestCase
+ include Raindrops::Linux
+ include Unicorn::SocketHelper
+ TEST_ADDR = ENV['UNICORN_TEST_ADDR'] || '127.0.0.1'
+ DEFAULT_BACKLOG = 10
+
+ def setup
+ @socks = []
+ end
+
+ def teardown
+ @socks.each { |io| io.closed? or io.close }
+ end
+
+ def new_socket_server(**kwargs)
+ s = new_tcp_server TEST_ADDR, kwargs[:port] || 0, kwargs
+ s.listen(kwargs[:backlog] || DEFAULT_BACKLOG)
+ @socks << s
+ [ s, s.addr[1] ]
+ end
+
+ def new_client(port)
+ s = TCPSocket.new("127.0.0.1", port)
+ @socks << s
+ s
+ end
+
+ def test_reuseport_queue_stats
+ listeners = 10
+ _, port = new_socket_server(reuseport: true)
+ addr = "#{TEST_ADDR}:#{port}"
+ listeners.times do
+ new_socket_server(reuseport: true, port: port)
+ end
+
+ listeners.times do |i|
+ all = Raindrops::Linux.tcp_listener_stats
+ assert_equal [0, i], all[addr].to_a
+ new_client(port)
+ all = Raindrops::Linux.tcp_listener_stats
+ assert_equal [0, i+1], all[addr].to_a
+ end
+ end
+end if RUBY_PLATFORM =~ /linux/
glossary
--------
Commit objects reference one tree, and zero or more parents.
Single parent commits can typically generate a patch in
unified diff format via `git format-patch'.
Multiple parents means the commit is a merge.
Root commits have no ancestor. Note that it is
possible to have multiple root commits when merging independent histories.
Every commit references one top-level tree object.
git clone http://yhbt.net/raindrops.git