raindrops.git  about / heads / tags
real-time stats for preforking Rack servers
blob 8fc0772cdd15fd76d1cfdbb18ea206ba46ea5db6 12871 bytes (raw)
$ git show HEAD:lib/raindrops/watcher.rb	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
 
# -*- encoding: binary -*-
# frozen_string_literal: false
require "thread"
require "time"
require "socket"
require "rack"
require "aggregate"

# Raindrops::Watcher is a stand-alone Rack application for watching
# any number of TCP and UNIX listeners (all of them by default).
#
# It depends on the {Aggregate RubyGem}[https://rubygems.org/gems/aggregate]
#
# In your Rack config.ru:
#
#    run Raindrops::Watcher(options = {})
#
# It takes the following options hash:
#
# - :listeners - an array of listener names, (e.g. %w(0.0.0.0:80 /tmp/sock))
# - :delay - interval between stats updates in seconds (default: 1)
#
# Raindrops::Watcher is compatible any thread-safe/thread-aware Rack
# middleware.  It does not work well with multi-process web servers
# but can be used to monitor them.  It consumes minimal resources
# with the default :delay.
#
# == HTTP endpoints
#
# === GET /
#
# Returns an HTML summary listing of all listen interfaces watched on
#
# === GET /active/$LISTENER.txt
#
# Returns a plain text summary + histogram with X-* HTTP headers for
# active connections.
#
# e.g.: curl https://yhbt.net/raindrops-demo/active/0.0.0.0%3A80.txt
#
# === GET /active/$LISTENER.html
#
# Returns an HTML summary + histogram with X-* HTTP headers for
# active connections.
#
# e.g.: curl https://yhbt.net/raindrops-demo/active/0.0.0.0%3A80.html
#
# === GET /queued/$LISTENER.txt
#
# Returns a plain text summary + histogram with X-* HTTP headers for
# queued connections.
#
# e.g.: curl https://yhbt.net/raindrops-demo/queued/0.0.0.0%3A80.txt
#
# === GET /queued/$LISTENER.html
#
# Returns an HTML summary + histogram with X-* HTTP headers for
# queued connections.
#
# e.g.: curl https://yhbt.net/raindrops-demo/queued/0.0.0.0%3A80.html
#
# === POST /reset/$LISTENER
#
# Resets the active and queued statistics for the given listener.
#
# === GET /tail/$LISTENER.txt?active_min=1&queued_min=1
#
# Streams chunked a response to the client.
# Interval is the preconfigured +:delay+ of the application (default 1 second)
#
# The response is plain text in the following format:
#
#   ISO8601_TIMESTAMP LISTENER_NAME ACTIVE_COUNT QUEUED_COUNT LINEFEED
#
# Query parameters:
#
# - active_min - do not stream a line until this active count is reached
# - queued_min - do not stream a line until this queued count is reached
#
# == Response headers (mostly the same names as Raindrops::LastDataRecv)
#
# - X-Count   - number of samples polled
# - X-Last-Reset - date since the last reset
#
# The following headers are only present if X-Count is greater than one.
#
# - X-Min     - lowest number of connections recorded
# - X-Max     - highest number of connections recorded
# - X-Mean    - mean number of connections recorded
# - X-Std-Dev - standard deviation of connection count
# - X-Outliers-Low - number of low outliers (hopefully many for queued)
# - X-Outliers-High - number of high outliers (hopefully zero for queued)
# - X-Current - current number of connections
# - X-First-Peak-At - date of when X-Max was first reached
# - X-Last-Peak-At - date of when X-Max was last reached
#
# = Demo Server
#
# There is a server running this app at https://yhbt.net/raindrops-demo/
# The Raindrops::Middleware demo is also accessible at
# https://yhbt.net/raindrops-demo/_raindrops
#
# The demo server is only limited to 30 users, so be sure not to abuse it
# by using the /tail/ endpoint too much.
class Raindrops::Watcher
  # :stopdoc:
  attr_reader :snapshot
  include Rack::Utils
  include Raindrops::Linux
  DOC_URL = "https://yhbt.net/raindrops/Raindrops/Watcher.html"
  Peak = Struct.new(:first, :last)

  def initialize(opts = {})
    @tcp_listeners = @unix_listeners = nil
    if l = opts[:listeners]
      tcp, unix = [], []
      Array(l).each { |addr| (addr =~ %r{\A/} ? unix : tcp) << addr }
      unless tcp.empty? && unix.empty?
        @tcp_listeners = tcp
        @unix_listeners = unix
      end
    end

    @agg_class = opts[:agg_class] || Aggregate
    @start_time = Time.now.utc
    @active = Hash.new { |h,k| h[k] = @agg_class.new }
    @queued = Hash.new { |h,k| h[k] = @agg_class.new }
    @resets = Hash.new { |h,k| h[k] = @start_time }
    @peak_active = Hash.new { |h,k| h[k] = Peak.new(@start_time, @start_time) }
    @peak_queued = Hash.new { |h,k| h[k] = Peak.new(@start_time, @start_time) }
    @snapshot = [ @start_time, {} ]
    @delay = opts[:delay] || 1
    @lock = Mutex.new
    @start = Mutex.new
    @cond = ConditionVariable.new
    @thr = nil
  end

  def hostname
    Socket.gethostname
  end

  # rack endpoint
  def call(env)
    @start.synchronize { @thr ||= aggregator_thread(env["rack.logger"]) }
    case env["REQUEST_METHOD"]
    when "GET"
      get env
    when "HEAD"
      r = get(env)
      r[2] = []
      r
    when "POST"
      post env
    else
      Rack::Response.new(["Method Not Allowed"], 405).finish
    end
  end

  def aggregate!(agg_hash, peak_hash, addr, number, now)
    agg = agg_hash[addr]
    if (max = agg.max) && number > 0 && number >= max
      peak = peak_hash[addr]
      peak.first = now if number > max
      peak.last = now
    end
    agg << number
  end

  def aggregator_thread(logger) # :nodoc:
    @socket = sock = Raindrops::InetDiagSocket.new
    thr = Thread.new do
      begin
        combined = tcp_listener_stats(@tcp_listeners, sock)
        combined.merge!(unix_listener_stats(@unix_listeners))
        @lock.synchronize do
          now = Time.now.utc
          combined.each do |addr,stats|
            aggregate!(@active, @peak_active, addr, stats.active, now)
            aggregate!(@queued, @peak_queued, addr, stats.queued, now)
          end
          @snapshot = [ now, combined ]
          @cond.broadcast
        end
      rescue => e
        logger.error "#{e.class} #{e.inspect}"
      end while sleep(@delay) && @socket
      sock.close
    end
    wait_snapshot
    thr
  end

  def non_existent_stats(time)
    [ time, @start_time, @agg_class.new, 0, Peak.new(@start_time, @start_time) ]
  end

  def active_stats(addr) # :nodoc:
    @lock.synchronize do
      time, combined = @snapshot
      stats = combined[addr] or return non_existent_stats(time)
      tmp, peak = @active[addr], @peak_active[addr]
      [ time, @resets[addr], tmp.dup, stats.active, peak ]
    end
  end

  def queued_stats(addr) # :nodoc:
    @lock.synchronize do
      time, combined = @snapshot
      stats = combined[addr] or return non_existent_stats(time)
      tmp, peak = @queued[addr], @peak_queued[addr]
      [ time, @resets[addr], tmp.dup, stats.queued, peak ]
    end
  end

  def wait_snapshot
    @lock.synchronize do
      @cond.wait @lock
      @snapshot
    end
  end

  def std_dev(agg)
    agg.std_dev.to_s
  rescue Errno::EDOM
    "NaN"
  end

  def agg_to_hash(reset_at, agg, current, peak)
    {
      "X-Count" => agg.count.to_s,
      "X-Min" => agg.min.to_s,
      "X-Max" => agg.max.to_s,
      "X-Mean" => agg.mean.to_s,
      "X-Std-Dev" => std_dev(agg),
      "X-Outliers-Low" => agg.outliers_low.to_s,
      "X-Outliers-High" => agg.outliers_high.to_s,
      "X-Last-Reset" => reset_at.httpdate,
      "X-Current" => current.to_s,
      "X-First-Peak-At" => peak.first.httpdate,
      "X-Last-Peak-At" => peak.last.httpdate,
    }
  end

  def histogram_txt(agg)
    updated_at, reset_at, agg, current, peak = *agg
    headers = agg_to_hash(reset_at, agg, current, peak)
    body = agg.to_s # 7-bit ASCII-clean
    headers["Content-Type"] = "text/plain"
    headers["Expires"] = (updated_at + @delay).httpdate
    headers["Content-Length"] = body.size.to_s
    [ 200, headers, [ body ] ]
  end

  def histogram_html(agg, addr)
    updated_at, reset_at, agg, current, peak = *agg
    headers = agg_to_hash(reset_at, agg, current, peak)
    body = "<html>" \
      "<head><title>#{hostname} - #{escape_html addr}</title></head>" \
      "<body><table>" <<
      headers.map { |k,v|
        "<tr><td>#{k.gsub(/^X-/, '')}</td><td>#{v}</td></tr>"
      }.join << "</table><pre>#{escape_html agg}</pre>" \
      "<form action='../reset/#{escape addr}' method='post'>" \
      "<input type='submit' name='x' value='reset' /></form>" \
      "</body>"
    headers["Content-Type"] = "text/html"
    headers["Expires"] = (updated_at + @delay).httpdate
    headers["Content-Length"] = body.size.to_s
    [ 200, headers, [ body ] ]
  end

  def get(env)
    retried = false
    begin
      case env["PATH_INFO"]
      when "/"
        index
      when %r{\A/active/(.+)\.txt\z}
        histogram_txt(active_stats(unescape($1)))
      when %r{\A/active/(.+)\.html\z}
        addr = unescape $1
        histogram_html(active_stats(addr), addr)
      when %r{\A/queued/(.+)\.txt\z}
        histogram_txt(queued_stats(unescape($1)))
      when %r{\A/queued/(.+)\.html\z}
        addr = unescape $1
        histogram_html(queued_stats(addr), addr)
      when %r{\A/tail/(.+)\.txt\z}
        tail(unescape($1), env)
      else
        not_found
      end
    rescue Errno::EDOM
      raise if retried
      retried = true
      wait_snapshot
      retry
    end
  end

  def not_found
    Rack::Response.new(["Not Found"], 404).finish
  end

  def post(env)
    case env["PATH_INFO"]
    when %r{\A/reset/(.+)\z}
      reset!(env, unescape($1))
    else
      not_found
    end
  end

  def reset!(env, addr)
    @lock.synchronize do
      @active.include?(addr) or return not_found
      @active.delete addr
      @queued.delete addr
      @resets[addr] = Time.now.utc
      @cond.wait @lock
    end
    req = Rack::Request.new(env)
    res = Rack::Response.new
    url = req.referer || "#{req.host_with_port}/"
    res.redirect(url)
    res["Content-Type"] = "text/plain"
    res.write "Redirecting to #{url}"
    res.finish
  end

  def index
    updated_at, all = snapshot
    headers = {
      "Content-Type" => "text/html",
      "Last-Modified" => updated_at.httpdate,
      "Expires" => (updated_at + @delay).httpdate,
    }
    body = "<html><head>" \
      "<title>#{hostname} - all interfaces</title>" \
      "</head><body><h3>Updated at #{updated_at.iso8601}</h3>" \
      "<table><tr>" \
        "<th>address</th><th>active</th><th>queued</th><th>reset</th>" \
      "</tr>" <<
      all.sort do |a,b|
        a[0] <=> b[0] # sort by addr
      end.map do |addr,stats|
        e_addr = escape addr
        "<tr>" \
          "<td><a href='tail/#{e_addr}.txt' " \
            "title='&quot;tail&quot; output in real time'" \
            ">#{escape_html addr}</a></td>" \
          "<td><a href='active/#{e_addr}.html' " \
            "title='show active connection stats'>#{stats.active}</a></td>" \
          "<td><a href='queued/#{e_addr}.html' " \
            "title='show queued connection stats'>#{stats.queued}</a></td>" \
          "<td><form action='reset/#{e_addr}' method='post'>" \
            "<input title='reset statistics' " \
              "type='submit' name='x' value='x' /></form></td>" \
        "</tr>" \
      end.join << "</table>" \
      "<p>" \
        "This is running the #{self.class}</a> service, see " \
        "<a href='#{DOC_URL}'>#{DOC_URL}</a> " \
        "for more information and options." \
      "</p>" \
      "</body></html>"
    headers["Content-Length"] = body.size.to_s
    [ 200, headers, [ body ] ]
  end

  def tail(addr, env)
    Tailer.new(self, addr, env).finish
  end

  # This is the response body returned for "/tail/$ADDRESS.txt".  This
  # must use a multi-threaded Rack server with streaming response support.
  # It is an internal class and not expected to be used directly
  class Tailer
    def initialize(rdmon, addr, env) # :nodoc:
      @rdmon = rdmon
      @addr = addr
      q = Rack::Utils.parse_query env["QUERY_STRING"]
      @active_min = q["active_min"].to_i
      @queued_min = q["queued_min"].to_i
      len = addr.size
      len = 35 if len > 35
      @fmt = "%20s % #{len}s % 10u % 10u\n"
      case env["HTTP_VERSION"]
      when "HTTP/1.0", nil
        @chunk = false
      else
        @chunk = true
      end
    end

    def finish
      headers = {
        "Content-Type" => "text/plain",
        "Cache-Control" => "no-transform",
        "Expires" => Time.at(0).httpdate,
      }
      headers["Transfer-Encoding"] = "chunked" if @chunk
      [ 200, headers, self ]
    end

    # called by the Rack server
    def each # :nodoc:
      begin
        time, all = @rdmon.wait_snapshot
        stats = all[@addr] or next
        stats.queued >= @queued_min or next
        stats.active >= @active_min or next
        body = sprintf(@fmt, time.iso8601, @addr, stats.active, stats.queued)
        body = "#{body.size.to_s(16)}\r\n#{body}\r\n" if @chunk
        yield body
      end while true
      yield "0\r\n\r\n" if @chunk
    end
  end

  # shuts down the background thread, only for tests
  def shutdown
    @socket = nil
    @thr.join if @thr
    @thr = nil
  end
  # :startdoc:
end

git clone https://yhbt.net/raindrops.git