diff options
Diffstat (limited to 'lib/unicorn')
-rw-r--r-- | lib/unicorn/app/exec_cgi.rb | 48 | ||||
-rw-r--r-- | lib/unicorn/app/inetd.rb | 5 | ||||
-rw-r--r-- | lib/unicorn/app/old_rails.rb | 1 | ||||
-rw-r--r-- | lib/unicorn/app/old_rails/static.rb | 2 | ||||
-rw-r--r-- | lib/unicorn/cgi_wrapper.rb | 1 | ||||
-rw-r--r-- | lib/unicorn/configurator.rb | 332 | ||||
-rw-r--r-- | lib/unicorn/const.rb | 69 | ||||
-rw-r--r-- | lib/unicorn/http_request.rb | 124 | ||||
-rw-r--r-- | lib/unicorn/http_response.rb | 56 | ||||
-rw-r--r-- | lib/unicorn/http_server.rb | 733 | ||||
-rw-r--r-- | lib/unicorn/launcher.rb | 8 | ||||
-rw-r--r-- | lib/unicorn/oob_gc.rb | 2 | ||||
-rw-r--r-- | lib/unicorn/preread_input.rb | 33 | ||||
-rw-r--r-- | lib/unicorn/socket_helper.rb | 74 | ||||
-rw-r--r-- | lib/unicorn/stream_input.rb | 145 | ||||
-rw-r--r-- | lib/unicorn/tee_input.rb | 176 | ||||
-rw-r--r-- | lib/unicorn/tmpio.rb | 29 | ||||
-rw-r--r-- | lib/unicorn/util.rb | 133 | ||||
-rw-r--r-- | lib/unicorn/worker.rb | 47 |
19 files changed, 1479 insertions, 539 deletions
diff --git a/lib/unicorn/app/exec_cgi.rb b/lib/unicorn/app/exec_cgi.rb index ff5f53a..232b681 100644 --- a/lib/unicorn/app/exec_cgi.rb +++ b/lib/unicorn/app/exec_cgi.rb @@ -1,5 +1,5 @@ # -*- encoding: binary -*- - +# :enddoc: require 'unicorn' module Unicorn::App @@ -28,6 +28,24 @@ module Unicorn::App SERVER_SOFTWARE ).map { |x| x.freeze } # frozen strings are faster for Hash assignments + class Body < Unicorn::TmpIO + def body_offset=(n) + sysseek(@body_offset = n) + end + + def each + sysseek @body_offset + # don't use a preallocated buffer for sysread since we can't + # guarantee an actual socket is consuming the yielded string + # (or if somebody is pushing to an array for eventual concatenation + begin + yield sysread(CHUNK_SIZE) + rescue EOFError + break + end while true + end + end + # Intializes the app, example of usage in a config.ru # map "/cgit" do # run Unicorn::App::ExecCgi.new("/path/to/cgit.cgi") @@ -43,7 +61,7 @@ module Unicorn::App # Calls the app def call(env) - out, err = Unicorn::Util.tmpio, Unicorn::Util.tmpio + out, err = Body.new, Unicorn::TmpIO.new inp = force_file_input(env) pid = fork { run_child(inp, out, err, env) } inp.close @@ -67,9 +85,9 @@ module Unicorn::App ENV['GATEWAY_INTERFACE'] = 'CGI/1.1' env.keys.grep(/^HTTP_/) { |key| ENV[key] = env[key] } - a = IO.new(0).reopen(inp) - b = IO.new(1).reopen(out) - c = IO.new(2).reopen(err) + $stdin.reopen(inp) + $stdout.reopen(out) + $stderr.reopen(err) exec(*args) end @@ -87,23 +105,7 @@ module Unicorn::App offset = 4 end offset += head.length - - # Allows +out+ to be used as a Rack body. - out.instance_eval { class << self; self; end }.instance_eval { - define_method(:each) { |&blk| - sysseek(offset) - - # don't use a preallocated buffer for sysread since we can't - # guarantee an actual socket is consuming the yielded string - # (or if somebody is pushing to an array for eventual concatenation - begin - blk.call(sysread(CHUNK_SIZE)) - rescue EOFError - break - end while true - } - } - + out.body_offset = offset size -= offset prev = nil headers = Rack::Utils::HeaderHash.new @@ -125,7 +127,7 @@ module Unicorn::App if inp.respond_to?(:size) && inp.size == 0 ::File.open('/dev/null', 'rb') else - tmp = Unicorn::Util.tmpio + tmp = Unicorn::TmpIO.new buf = inp.read(CHUNK_SIZE) begin diff --git a/lib/unicorn/app/inetd.rb b/lib/unicorn/app/inetd.rb index 9bfa7cb..2a212a2 100644 --- a/lib/unicorn/app/inetd.rb +++ b/lib/unicorn/app/inetd.rb @@ -1,10 +1,9 @@ # -*- encoding: binary -*- - +# :enddoc: # Copyright (c) 2009 Eric Wong # You can redistribute it and/or modify it under the same terms as Ruby. # this class *must* be used with Rack::Chunked - module Unicorn::App class Inetd < Struct.new(:cmd) @@ -47,7 +46,7 @@ module Unicorn::App } end - def each(&block) + def each begin rd, = IO.select([err_rd, out_rd]) rd && rd.first or next diff --git a/lib/unicorn/app/old_rails.rb b/lib/unicorn/app/old_rails.rb index e674d78..5f04ce7 100644 --- a/lib/unicorn/app/old_rails.rb +++ b/lib/unicorn/app/old_rails.rb @@ -1,5 +1,6 @@ # -*- encoding: binary -*- +# :enddoc: # This code is based on the original Rails handler in Mongrel # Copyright (c) 2005 Zed A. Shaw # Copyright (c) 2009 Eric Wong diff --git a/lib/unicorn/app/old_rails/static.rb b/lib/unicorn/app/old_rails/static.rb index 13a435e..1d53717 100644 --- a/lib/unicorn/app/old_rails/static.rb +++ b/lib/unicorn/app/old_rails/static.rb @@ -1,5 +1,5 @@ # -*- encoding: binary -*- - +# :enddoc: # This code is based on the original Rails handler in Mongrel # Copyright (c) 2005 Zed A. Shaw # Copyright (c) 2009 Eric Wong diff --git a/lib/unicorn/cgi_wrapper.rb b/lib/unicorn/cgi_wrapper.rb index b6eeb33..0dc3f33 100644 --- a/lib/unicorn/cgi_wrapper.rb +++ b/lib/unicorn/cgi_wrapper.rb @@ -1,5 +1,6 @@ # -*- encoding: binary -*- +# :enddoc: # This code is based on the original CGIWrapper from Mongrel # Copyright (c) 2005 Zed A. Shaw # Copyright (c) 2009 Eric Wong diff --git a/lib/unicorn/configurator.rb b/lib/unicorn/configurator.rb index fb37c56..b6ad022 100644 --- a/lib/unicorn/configurator.rb +++ b/lib/unicorn/configurator.rb @@ -8,14 +8,26 @@ require 'logger' # example configuration files. An example config file for use with # nginx is also available at # http://unicorn.bogomips.org/examples/nginx.conf -class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) +# +# See the link:/TUNING.html document for more information on tuning unicorn. +class Unicorn::Configurator + include Unicorn + + # :stopdoc: + attr_accessor :set, :config_file, :after_reload + # used to stash stuff for deferred processing of cli options in # config.ru after "working_directory" is bound. Do not rely on # this being around later on... - RACKUP = {} # :nodoc: + RACKUP = { + :daemonize => false, + :host => Unicorn::Const::DEFAULT_HOST, + :port => Unicorn::Const::DEFAULT_PORT, + :set_listener => false, + :options => { :listeners => [] } + } # Default settings for Unicorn - # :stopdoc: DEFAULTS = { :timeout => 60, :logger => Logger.new($stderr), @@ -31,6 +43,9 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) }, :pid => nil, :preload_app => false, + :rewindable_input => true, # for Rack 2.x: (Rack::VERSION[0] <= 1), + :client_body_buffer_size => Unicorn::Const::MAX_BODY, + :trust_x_forwarded => true, } #:startdoc: @@ -58,6 +73,9 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) parse_rackup_file + RACKUP[:set_listener] and + set[:listeners] << "#{RACKUP[:host]}:#{RACKUP[:port]}" + # unicorn_rails creates dirs here after working_directory is bound after_reload.call if after_reload @@ -87,20 +105,24 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) set[key] end - # sets object to the +new+ Logger-like object. The new logger-like + # sets object to the +obj+ Logger-like object. The new Logger-like # object must respond to the following methods: - # +debug+, +info+, +warn+, +error+, +fatal+ + # * debug + # * info + # * warn + # * error + # * fatal # The default Logger will log its output to the path specified # by +stderr_path+. If you're running Unicorn daemonized, then # you must specify a path to prevent error messages from going # to /dev/null. - def logger(new) + def logger(obj) %w(debug info warn error fatal).each do |m| - new.respond_to?(m) and next - raise ArgumentError, "logger=#{new} does not respond to method=#{m}" + obj.respond_to?(m) and next + raise ArgumentError, "logger=#{obj} does not respond to method=#{m}" end - set[:logger] = new + set[:logger] = obj end # sets after_fork hook to a given block. This block will be called by @@ -116,11 +138,6 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) # # Existing options for Unicorn::Configurator#listen such as # # :backlog, :rcvbuf, :sndbuf are available here as well. # server.listen(addr, :tries => -1, :delay => 5, :backlog => 128) - # - # # drop permissions to "www-data" in the worker - # # generally there's no reason to start Unicorn as a priviledged user - # # as it is not recommended to expose Unicorn to public clients. - # worker.user('www-data', 'www-data') if Process.euid == 0 # end def after_fork(*args, &block) set_hook(:after_fork, block_given? ? block : args[0]) @@ -168,11 +185,7 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) # server 192.168.0.9:8080 fail_timeout=0; # } def timeout(seconds) - Numeric === seconds or raise ArgumentError, - "not numeric: timeout=#{seconds.inspect}" - seconds >= 3 or raise ArgumentError, - "too low: timeout=#{seconds.inspect}" - set[:timeout] = seconds + set_int(:timeout, seconds, 3) end # sets the current number of worker_processes to +nr+. Each worker @@ -182,11 +195,7 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) # the rest of your Unicorn configuration. See the SIGNALS document # for more information. def worker_processes(nr) - Integer === nr or raise ArgumentError, - "not an integer: worker_processes=#{nr.inspect}" - nr >= 0 or raise ArgumentError, - "not non-negative: worker_processes=#{nr.inspect}" - set[:worker_processes] = nr + set_int(:worker_processes, nr, 1) end # sets listeners to the given +addresses+, replacing or augmenting the @@ -200,131 +209,165 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) set[:listeners] = addresses end - # adds an +address+ to the existing listener set. + # Adds an +address+ to the existing listener set. May be specified more + # than once. +address+ may be an Integer port number for a TCP port, an + # "IP_ADDRESS:PORT" for TCP listeners or a pathname for UNIX domain sockets. + # + # listen 3000 # listen to port 3000 on all TCP interfaces + # listen "127.0.0.1:3000" # listen to port 3000 on the loopback interface + # listen "/tmp/.unicorn.sock" # listen on the given Unix domain socket + # listen "[::1]:3000" # listen to port 3000 on the IPv6 loopback interface # # The following options may be specified (but are generally not needed): # - # +:backlog+: this is the backlog of the listen() syscall. + # [:backlog => number of clients] + # + # This is the backlog of the listen() syscall. + # + # Some operating systems allow negative values here to specify the + # maximum allowable value. In most cases, this number is only + # recommendation and there are other OS-specific tunables and + # variables that can affect this number. See the listen(2) + # syscall documentation of your OS for the exact semantics of + # this. + # + # If you are running unicorn on multiple machines, lowering this number + # can help your load balancer detect when a machine is overloaded + # and give requests to a different machine. + # + # Default: 1024 + # + # [:rcvbuf => bytes, :sndbuf => bytes] + # + # Maximum receive and send buffer sizes (in bytes) of sockets. + # + # These correspond to the SO_RCVBUF and SO_SNDBUF settings which + # can be set via the setsockopt(2) syscall. Some kernels + # (e.g. Linux 2.4+) have intelligent auto-tuning mechanisms and + # there is no need (and it is sometimes detrimental) to specify them. + # + # See the socket API documentation of your operating system + # to determine the exact semantics of these settings and + # other operating system-specific knobs where they can be + # specified. + # + # Defaults: operating system defaults + # + # [:tcp_nodelay => true or false] + # + # Disables Nagle's algorithm on TCP sockets if +true+. # - # Some operating systems allow negative values here to specify the - # maximum allowable value. In most cases, this number is only - # recommendation and there are other OS-specific tunables and - # variables that can affect this number. See the listen(2) - # syscall documentation of your OS for the exact semantics of - # this. + # Setting this to +true+ can make streaming responses in Rails 3.1 + # appear more quickly at the cost of slightly higher bandwidth usage. + # The effect of this option is most visible if nginx is not used, + # but nginx remains highly recommended with \Unicorn. # - # If you are running unicorn on multiple machines, lowering this number - # can help your load balancer detect when a machine is overloaded - # and give requests to a different machine. + # This has no effect on UNIX sockets. # - # Default: 1024 + # Default: +false+ (Nagle's algorithm enabled) in \Unicorn, + # +true+ in Rainbows! # - # +:rcvbuf+, +:sndbuf+: maximum receive and send buffer sizes of sockets + # [:tcp_nopush => true or false] # - # These correspond to the SO_RCVBUF and SO_SNDBUF settings which - # can be set via the setsockopt(2) syscall. Some kernels - # (e.g. Linux 2.4+) have intelligent auto-tuning mechanisms and - # there is no need (and it is sometimes detrimental) to specify them. + # Enables/disables TCP_CORK in Linux or TCP_NOPUSH in FreeBSD # - # See the socket API documentation of your operating system - # to determine the exact semantics of these settings and - # other operating system-specific knobs where they can be - # specified. + # This prevents partial TCP frames from being sent out and reduces + # wakeups in nginx if it is on a different machine. Since \Unicorn + # is only designed for applications that send the response body + # quickly without keepalive, sockets will always be flushed on close + # to prevent delays. # - # Defaults: operating system defaults + # This has no effect on UNIX sockets. # - # +:tcp_nodelay+: disables Nagle's algorithm on TCP sockets + # Default: +true+ in \Unicorn 3.4+, +false+ in Rainbows! # - # This has no effect on UNIX sockets. + # [:tries => Integer] # - # Default: operating system defaults (usually Nagle's algorithm enabled) + # Times to retry binding a socket if it is already in use # - # +:tcp_nopush+: enables TCP_CORK in Linux or TCP_NOPUSH in FreeBSD + # A negative number indicates we will retry indefinitely, this is + # useful for migrations and upgrades when individual workers + # are binding to different ports. # - # This will prevent partial TCP frames from being sent out. - # Enabling +tcp_nopush+ is generally not needed or recommended as - # controlling +tcp_nodelay+ already provides sufficient latency - # reduction whereas Unicorn does not know when the best times are - # for flushing corked sockets. + # Default: 5 # - # This has no effect on UNIX sockets. + # [:delay => seconds] # - # +:tries+: times to retry binding a socket if it is already in use + # Seconds to wait between successive +tries+ # - # A negative number indicates we will retry indefinitely, this is - # useful for migrations and upgrades when individual workers - # are binding to different ports. + # Default: 0.5 seconds # - # Default: 5 + # [:umask => mode] # - # +:delay+: seconds to wait between successive +tries+ + # Sets the file mode creation mask for UNIX sockets. If specified, + # this is usually in octal notation. # - # Default: 0.5 seconds + # Typically UNIX domain sockets are created with more liberal + # file permissions than the rest of the application. By default, + # we create UNIX domain sockets to be readable and writable by + # all local users to give them the same accessibility as + # locally-bound TCP listeners. # - # +:umask+: sets the file mode creation mask for UNIX sockets + # This has no effect on TCP listeners. # - # Typically UNIX domain sockets are created with more liberal - # file permissions than the rest of the application. By default, - # we create UNIX domain sockets to be readable and writable by - # all local users to give them the same accessibility as - # locally-bound TCP listeners. + # Default: 0000 (world-read/writable) # - # This has no effect on TCP listeners. + # [:tcp_defer_accept => Integer] # - # Default: 0 (world read/writable) + # Defer accept() until data is ready (Linux-only) # - # +:tcp_defer_accept:+ defer accept() until data is ready (Linux-only) + # For Linux 2.6.32 and later, this is the number of retransmits to + # defer an accept() for if no data arrives, but the client will + # eventually be accepted after the specified number of retransmits + # regardless of whether data is ready. # - # For Linux 2.6.32 and later, this is the number of retransmits to - # defer an accept() for if no data arrives, but the client will - # eventually be accepted after the specified number of retransmits - # regardless of whether data is ready. + # For Linux before 2.6.32, this is a boolean option, and + # accepts are _always_ deferred indefinitely if no data arrives. + # This is similar to <code>:accept_filter => "dataready"</code> + # under FreeBSD. # - # For Linux before 2.6.32, this is a boolean option, and - # accepts are _always_ deferred indefinitely if no data arrives. - # This is similar to <code>:accept_filter => "dataready"</code> - # under FreeBSD. + # Specifying +true+ is synonymous for the default value(s) below, + # and +false+ or +nil+ is synonymous for a value of zero. # - # Specifying +true+ is synonymous for the default value(s) below, - # and +false+ or +nil+ is synonymous for a value of zero. + # A value of +1+ is a good optimization for local networks + # and trusted clients. For Rainbows! and Zbatery users, a higher + # value (e.g. +60+) provides more protection against some + # denial-of-service attacks. There is no good reason to ever + # disable this with a +zero+ value when serving HTTP. # - # A value of +1+ is a good optimization for local networks - # and trusted clients. For Rainbows! and Zbatery users, a higher - # value (e.g. +60+) provides more protection against some - # denial-of-service attacks. There is no good reason to ever - # disable this with a +zero+ value when serving HTTP. + # Default: 1 retransmit for \Unicorn, 60 for Rainbows! 0.95.0\+ # - # Default: 1 retransmit for \Unicorn, 60 for Rainbows! 0.95.0\+ + # [:accept_filter => String] # - # +:accept_filter: defer accept() until data is ready (FreeBSD-only) + # defer accept() until data is ready (FreeBSD-only) # - # This enables either the "dataready" or (default) "httpready" - # accept() filter under FreeBSD. This is intended as an - # optimization to reduce context switches with common GET/HEAD - # requests. For Rainbows! and Zbatery users, this provides - # some protection against certain denial-of-service attacks, too. + # This enables either the "dataready" or (default) "httpready" + # accept() filter under FreeBSD. This is intended as an + # optimization to reduce context switches with common GET/HEAD + # requests. For Rainbows! and Zbatery users, this provides + # some protection against certain denial-of-service attacks, too. # - # There is no good reason to change from the default. + # There is no good reason to change from the default. # - # Default: "httpready" - def listen(address, opt = {}) + # Default: "httpready" + def listen(address, options = {}) address = expand_addr(address) if String === address [ :umask, :backlog, :sndbuf, :rcvbuf, :tries ].each do |key| - value = opt[key] or next + value = options[key] or next Integer === value or raise ArgumentError, "not an integer: #{key}=#{value.inspect}" end [ :tcp_nodelay, :tcp_nopush ].each do |key| - (value = opt[key]).nil? and next + (value = options[key]).nil? and next TrueClass === value || FalseClass === value or raise ArgumentError, "not boolean: #{key}=#{value.inspect}" end - unless (value = opt[:delay]).nil? + unless (value = options[:delay]).nil? Numeric === value or raise ArgumentError, "not numeric: delay=#{value.inspect}" end - set[:listener_opts][address].merge!(opt) + set[:listener_opts][address].merge!(options) end set[:listeners] << address @@ -362,12 +405,30 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) # cause the master process to exit with an error. def preload_app(bool) - case bool - when TrueClass, FalseClass - set[:preload_app] = bool - else - raise ArgumentError, "preload_app=#{bool.inspect} not a boolean" - end + set_bool(:preload_app, bool) + end + + # Toggles making \env[\"rack.input\"] rewindable. + # Disabling rewindability can improve performance by lowering + # I/O and memory usage for applications that accept uploads. + # Keep in mind that the Rack 1.x spec requires + # \env[\"rack.input\"] to be rewindable, so this allows + # intentionally violating the current Rack 1.x spec. + # + # +rewindable_input+ defaults to +true+ when used with Rack 1.x for + # Rack conformance. When Rack 2.x is finalized, this will most + # likely default to +false+ while still conforming to the newer + # (less demanding) spec. + def rewindable_input(bool) + set_bool(:rewindable_input, bool) + end + + # The maximum size (in +bytes+) to buffer in memory before + # resorting to a temporary file. Default is 112 kilobytes. + # This option has no effect if "rewindable_input" is set to + # +false+. + def client_body_buffer_size(bytes) + set_int(:client_body_buffer_size, bytes, 0) end # Allow redirecting $stderr to a given path. Unlike doing this from @@ -417,6 +478,7 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) # The master process always stays running as the user who started it. # This switch will occur after calling the after_fork hook, and only # if the Worker#user method is not called in the after_fork hook + # +group+ is optional and will not change if unspecified. def user(user, group = nil) # raises ArgumentError on invalid user/group Etc.getpwnam(user) @@ -424,10 +486,22 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) set[:user] = [ user, group ] end + # Sets whether or not the parser will trust X-Forwarded-Proto and + # X-Forwarded-SSL headers and set "rack.url_scheme" to "https" accordingly. + # Rainbows!/Zbatery installations facing untrusted clients directly + # should set this to +false+. This is +true+ by default as Unicorn + # is designed to only sit behind trusted nginx proxies. + # + # This has never been publically documented and is subject to removal + # in future releases. + def trust_x_forwarded(bool) # :nodoc: + set_bool(:trust_x_forwarded, bool) + end + # expands "unix:path/to/foo" to a socket relative to the current path # expands pathnames of sockets if relative to "~" or "~username" # expands "*:port and ":port" to "0.0.0.0:port" - def expand_addr(address) #:nodoc + def expand_addr(address) #:nodoc: return "0.0.0.0:#{address}" if Integer === address return address unless String === address @@ -438,16 +512,27 @@ class Unicorn::Configurator < Struct.new(:set, :config_file, :after_reload) File.expand_path(address) when %r{\A(?:\*:)?(\d+)\z} "0.0.0.0:#$1" - when %r{\A(.*):(\d+)\z} - # canonicalize the name - packed = Socket.pack_sockaddr_in($2.to_i, $1) - Socket.unpack_sockaddr_in(packed).reverse!.join(':') + when %r{\A\[([a-fA-F0-9:]+)\]\z}, %r/\A((?:\d+\.){3}\d+)\z/ + canonicalize_tcp($1, 80) + when %r{\A\[([a-fA-F0-9:]+)\]:(\d+)\z}, %r{\A(.*):(\d+)\z} + canonicalize_tcp($1, $2.to_i) else address end end private + def set_int(var, n, min) #:nodoc: + Integer === n or raise ArgumentError, "not an integer: #{var}=#{n.inspect}" + n >= min or raise ArgumentError, "too low (< #{min}): #{var}=#{n.inspect}" + set[var] = n + end + + def canonicalize_tcp(addr, port) + packed = Socket.pack_sockaddr_in(port, addr) + port, addr = Socket.unpack_sockaddr_in(packed) + /:/ =~ addr ? "[#{addr}]:#{port}" : "#{addr}:#{port}" + end def set_path(var, path) #:nodoc: case path @@ -458,6 +543,15 @@ private end end + def set_bool(var, bool) #:nodoc: + case bool + when true, false + set[var] = bool + else + raise ArgumentError, "#{var}=#{bool.inspect} not a boolean" + end + end + def set_hook(var, my_proc, req_arity = 2) #:nodoc: case my_proc when Proc @@ -495,23 +589,15 @@ private /^#\\(.*)/ =~ File.read(ru) or return RACKUP[:optparse].parse!($1.split(/\s+/)) - # XXX ugly as hell, WILL FIX in 2.x (along with Rainbows!/Zbatery) - host, port, set_listener, options, daemonize = - eval("[ host, port, set_listener, options, daemonize ]", - TOPLEVEL_BINDING) - - # XXX duplicate code from bin/unicorn{,_rails} - set[:listeners] << "#{host}:#{port}" if set_listener - - if daemonize + if RACKUP[:daemonize] # unicorn_rails wants a default pid path, (not plain 'unicorn') if after_reload spid = set[:pid] pid('tmp/pids/unicorn.pid') if spid.nil? || spid == :unset end unless RACKUP[:daemonized] - Unicorn::Launcher.daemonize!(options) - RACKUP[:ready_pipe] = options.delete(:ready_pipe) + Unicorn::Launcher.daemonize!(RACKUP[:options]) + RACKUP[:ready_pipe] = RACKUP[:options].delete(:ready_pipe) end end end diff --git a/lib/unicorn/const.rb b/lib/unicorn/const.rb index b428be5..c65c242 100644 --- a/lib/unicorn/const.rb +++ b/lib/unicorn/const.rb @@ -1,36 +1,39 @@ # -*- encoding: binary -*- -module Unicorn - - # Frequently used constants when constructing requests or responses. Many times - # the constant just refers to a string with the same contents. Using these constants - # gave about a 3% to 10% performance improvement over using the strings directly. - # Symbols did not really improve things much compared to constants. - module Const - - # The current version of Unicorn, currently 1.1.7 - UNICORN_VERSION="1.1.7" - - DEFAULT_HOST = "0.0.0.0" # default TCP listen host address - DEFAULT_PORT = 8080 # default TCP listen port - DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}" - - # The basic max request size we'll try to read. - CHUNK_SIZE=(16 * 1024) - - # Maximum request body size before it is moved out of memory and into a - # temporary file for reading (112 kilobytes). - MAX_BODY=1024 * 112 - - # common errors we'll send back - ERROR_400_RESPONSE = "HTTP/1.1 400 Bad Request\r\n\r\n" - ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n" - EXPECT_100_RESPONSE = "HTTP/1.1 100 Continue\r\n\r\n" - - # A frozen format for this is about 15% faster - REMOTE_ADDR="REMOTE_ADDR".freeze - RACK_INPUT="rack.input".freeze - HTTP_EXPECT="HTTP_EXPECT" - end - +# :enddoc: +# Frequently used constants when constructing requests or responses. +# Many times the constant just refers to a string with the same +# contents. Using these constants gave about a 3% to 10% performance +# improvement over using the strings directly. Symbols did not really +# improve things much compared to constants. +module Unicorn::Const + + # The current version of Unicorn, currently 3.4.0 + UNICORN_VERSION = "3.4.0" + + # default TCP listen host address (0.0.0.0, all interfaces) + DEFAULT_HOST = "0.0.0.0" + + # default TCP listen port (8080) + DEFAULT_PORT = 8080 + + # default TCP listen address and port (0.0.0.0:8080) + DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}" + + # The basic request body size we'll try to read at once (16 kilobytes). + CHUNK_SIZE = 16 * 1024 + + # Maximum request body size before it is moved out of memory and into a + # temporary file for reading (112 kilobytes). This is the default + # value of of client_body_buffer_size. + MAX_BODY = 1024 * 112 + + # :stopdoc: + # common errors we'll send back + ERROR_400_RESPONSE = "HTTP/1.1 400 Bad Request\r\n\r\n" + ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n" + EXPECT_100_RESPONSE = "HTTP/1.1 100 Continue\r\n\r\n" + + HTTP_EXPECT = "HTTP_EXPECT" + # :startdoc: end diff --git a/lib/unicorn/http_request.rb b/lib/unicorn/http_request.rb index 65870ed..e72f571 100644 --- a/lib/unicorn/http_request.rb +++ b/lib/unicorn/http_request.rb @@ -1,71 +1,79 @@ # -*- encoding: binary -*- - +# :enddoc: +# no stable API here require 'unicorn_http' -module Unicorn - class HttpRequest +# TODO: remove redundant names +Unicorn.const_set(:HttpRequest, Unicorn::HttpParser) +class Unicorn::HttpParser + + # default parameters we merge into the request env for Rack handlers + DEFAULTS = { + "rack.errors" => $stderr, + "rack.multiprocess" => true, + "rack.multithread" => false, + "rack.run_once" => false, + "rack.version" => [1, 1], + "SCRIPT_NAME" => "", + + # this is not in the Rack spec, but some apps may rely on it + "SERVER_SOFTWARE" => "Unicorn #{Unicorn::Const::UNICORN_VERSION}" + } - # default parameters we merge into the request env for Rack handlers - DEFAULTS = { - "rack.errors" => $stderr, - "rack.multiprocess" => true, - "rack.multithread" => false, - "rack.run_once" => false, - "rack.version" => [1, 1], - "SCRIPT_NAME" => "", + NULL_IO = StringIO.new("") - # this is not in the Rack spec, but some apps may rely on it - "SERVER_SOFTWARE" => "Unicorn #{Const::UNICORN_VERSION}" - } + # :stopdoc: + # A frozen format for this is about 15% faster + REMOTE_ADDR = 'REMOTE_ADDR'.freeze + RACK_INPUT = 'rack.input'.freeze + @@input_class = Unicorn::TeeInput - NULL_IO = StringIO.new("") - LOCALHOST = '127.0.0.1' + def self.input_class + @@input_class + end - # Being explicitly single-threaded, we have certain advantages in - # not having to worry about variables being clobbered :) - BUF = "" - PARSER = HttpParser.new - REQ = {} + def self.input_class=(klass) + @@input_class = klass + end + # :startdoc: - # Does the majority of the IO processing. It has been written in - # Ruby using about 8 different IO processing strategies. - # - # It is currently carefully constructed to make sure that it gets - # the best possible performance for the common case: GET requests - # that are fully complete after a single read(2) - # - # Anyone who thinks they can make it faster is more than welcome to - # take a crack at it. - # - # returns an environment hash suitable for Rack if successful - # This does minimal exception trapping and it is up to the caller - # to handle any socket errors (e.g. user aborted upload). - def read(socket) - REQ.clear - PARSER.reset + # Does the majority of the IO processing. It has been written in + # Ruby using about 8 different IO processing strategies. + # + # It is currently carefully constructed to make sure that it gets + # the best possible performance for the common case: GET requests + # that are fully complete after a single read(2) + # + # Anyone who thinks they can make it faster is more than welcome to + # take a crack at it. + # + # returns an environment hash suitable for Rack if successful + # This does minimal exception trapping and it is up to the caller + # to handle any socket errors (e.g. user aborted upload). + def read(socket) + clear + e = env - # From http://www.ietf.org/rfc/rfc3875: - # "Script authors should be aware that the REMOTE_ADDR and - # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) - # may not identify the ultimate source of the request. They - # identify the client for the immediate request to the server; - # that client may be a proxy, gateway, or other intermediary - # acting on behalf of the actual source client." - REQ[Const::REMOTE_ADDR] = - TCPSocket === socket ? socket.peeraddr[-1] : LOCALHOST + # From http://www.ietf.org/rfc/rfc3875: + # "Script authors should be aware that the REMOTE_ADDR and + # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9) + # may not identify the ultimate source of the request. They + # identify the client for the immediate request to the server; + # that client may be a proxy, gateway, or other intermediary + # acting on behalf of the actual source client." + e[REMOTE_ADDR] = socket.kgio_addr - # short circuit the common case with small GET requests first - if PARSER.headers(REQ, socket.readpartial(Const::CHUNK_SIZE, BUF)).nil? - # Parser is not done, queue up more data to read and continue parsing - # an Exception thrown from the PARSER will throw us out of the loop - begin - BUF << socket.readpartial(Const::CHUNK_SIZE) - end while PARSER.headers(REQ, BUF).nil? - end - REQ[Const::RACK_INPUT] = 0 == PARSER.content_length ? - NULL_IO : Unicorn::TeeInput.new(socket, REQ, PARSER, BUF) - REQ.update(DEFAULTS) + # short circuit the common case with small GET requests first + socket.kgio_read!(16384, buf) + if parse.nil? + # Parser is not done, queue up more data to read and continue parsing + # an Exception thrown from the parser will throw us out of the loop + begin + buf << socket.kgio_read!(16384) + end while parse.nil? end - + e[RACK_INPUT] = 0 == content_length ? + NULL_IO : @@input_class.new(socket, self) + e.merge!(DEFAULTS) end end diff --git a/lib/unicorn/http_response.rb b/lib/unicorn/http_response.rb index f3b5a82..b781e20 100644 --- a/lib/unicorn/http_response.rb +++ b/lib/unicorn/http_response.rb @@ -1,23 +1,13 @@ # -*- encoding: binary -*- -require 'time' - +# :enddoc: # Writes a Rack response to your client using the HTTP/1.1 specification. # You use it by simply doing: # # status, headers, body = rack_app.call(env) -# HttpResponse.write(socket, [ status, headers, body ]) +# http_response_write(socket, status, headers, body) # # Most header correctness (including Content-Length and Content-Type) -# is the job of Rack, with the exception of the "Connection: close" -# and "Date" headers. -# -# A design decision was made to force the client to not pipeline or -# keepalive requests. HTTP/1.1 pipelining really kills the -# performance due to how it has to be handled and how unclear the -# standard is. To fix this the HttpResponse always gives a -# "Connection: close" header which forces the client to close right -# away. The bonus for this is that it gives a pretty nice speed boost -# to most clients since they can close their connection immediately. +# is the job of Rack, with the exception of the "Date" and "Status" header. module Unicorn::HttpResponse # Every standard HTTP code mapped to the appropriate message. @@ -25,41 +15,27 @@ module Unicorn::HttpResponse hash[code] = "#{code} #{msg}" hash } - - # Rack does not set/require a Date: header. We always override the - # Connection: and Date: headers no matter what (if anything) our - # Rack application sent us. - SKIP = { 'connection' => true, 'date' => true, 'status' => true } + CRLF = "\r\n" # writes the rack_response to socket as an HTTP response - def self.write(socket, rack_response, have_header = true) - status, headers, body = rack_response - - if have_header - status = CODES[status.to_i] || status - out = [] - - # Don't bother enforcing duplicate supression, it's a Hash most of - # the time anyways so just hope our app knows what it's doing + def http_response_write(socket, status, headers, body) + status = CODES[status.to_i] || status + + if headers + buf = "HTTP/1.1 #{status}\r\n" \ + "Date: #{httpdate}\r\n" \ + "Status: #{status}\r\n" \ + "Connection: close\r\n" headers.each do |key, value| - next if SKIP.include?(key.downcase) + next if %r{\A(?:Date\z|Connection\z)}i =~ key if value =~ /\n/ # avoiding blank, key-only cookies with /\n+/ - out.concat(value.split(/\n+/).map! { |v| "#{key}: #{v}\r\n" }) + buf << value.split(/\n+/).map! { |v| "#{key}: #{v}\r\n" }.join else - out << "#{key}: #{value}\r\n" + buf << "#{key}: #{value}\r\n" end end - - # Rack should enforce Content-Length or chunked transfer encoding, - # so don't worry or care about them. - # Date is required by HTTP/1.1 as long as our clock can be trusted. - # Some broken clients require a "Status" header so we accomodate them - socket.write("HTTP/1.1 #{status}\r\n" \ - "Date: #{Time.now.httpdate}\r\n" \ - "Status: #{status}\r\n" \ - "Connection: close\r\n" \ - "#{out.join('')}\r\n") + socket.write(buf << CRLF) end body.each { |chunk| socket.write(chunk) } diff --git a/lib/unicorn/http_server.rb b/lib/unicorn/http_server.rb new file mode 100644 index 0000000..3077b95 --- /dev/null +++ b/lib/unicorn/http_server.rb @@ -0,0 +1,733 @@ +# -*- encoding: binary -*- + +# This is the process manager of Unicorn. This manages worker +# processes which in turn handle the I/O and application process. +# Listener sockets are started in the master process and shared with +# forked worker children. +# +# Users do not need to know the internals of this class, but reading the +# {source}[http://bogomips.org/unicorn.git/tree/lib/unicorn/http_server.rb] +# is education for programmers wishing to learn how \Unicorn works. +# See Unicorn::Configurator for information on how to configure \Unicorn. +class Unicorn::HttpServer + # :stopdoc: + attr_accessor :app, :request, :timeout, :worker_processes, + :before_fork, :after_fork, :before_exec, + :listener_opts, :preload_app, + :reexec_pid, :orig_app, :init_listeners, + :master_pid, :config, :ready_pipe, :user + attr_reader :pid, :logger + include Unicorn::SocketHelper + include Unicorn::HttpResponse + + # backwards compatibility with 1.x + Worker = Unicorn::Worker + + # prevents IO objects in here from being GC-ed + IO_PURGATORY = [] + + # all bound listener sockets + LISTENERS = [] + + # This hash maps PIDs to Workers + WORKERS = {} + + # We use SELF_PIPE differently in the master and worker processes: + # + # * The master process never closes or reinitializes this once + # initialized. Signal handlers in the master process will write to + # it to wake up the master from IO.select in exactly the same manner + # djb describes in http://cr.yp.to/docs/selfpipe.html + # + # * The workers immediately close the pipe they inherit from the + # master and replace it with a new pipe after forking. This new + # pipe is also used to wakeup from IO.select from inside (worker) + # signal handlers. However, workers *close* the pipe descriptors in + # the signal handlers to raise EBADF in IO.select instead of writing + # like we do in the master. We cannot easily use the reader set for + # IO.select because LISTENERS is already that set, and it's extra + # work (and cycles) to distinguish the pipe FD from the reader set + # once IO.select returns. So we're lazy and just close the pipe when + # a (rare) signal arrives in the worker and reinitialize the pipe later. + SELF_PIPE = [] + + # signal queue used for self-piping + SIG_QUEUE = [] + + # list of signals we care about and trap in master. + QUEUE_SIGS = [ :WINCH, :QUIT, :INT, :TERM, :USR1, :USR2, :HUP, :TTIN, :TTOU ] + + # :startdoc: + # We populate this at startup so we can figure out how to reexecute + # and upgrade the currently running instance of Unicorn + # This Hash is considered a stable interface and changing its contents + # will allow you to switch between different installations of Unicorn + # or even different installations of the same applications without + # downtime. Keys of this constant Hash are described as follows: + # + # * 0 - the path to the unicorn/unicorn_rails executable + # * :argv - a deep copy of the ARGV array the executable originally saw + # * :cwd - the working directory of the application, this is where + # you originally started Unicorn. + # + # To change your unicorn executable to a different path without downtime, + # you can set the following in your Unicorn config file, HUP and then + # continue with the traditional USR2 + QUIT upgrade steps: + # + # Unicorn::HttpServer::START_CTX[0] = "/home/bofh/1.9.2/bin/unicorn" + START_CTX = { + :argv => ARGV.map { |arg| arg.dup }, + 0 => $0.dup, + } + # We favor ENV['PWD'] since it is (usually) symlink aware for Capistrano + # and like systems + START_CTX[:cwd] = begin + a = File.stat(pwd = ENV['PWD']) + b = File.stat(Dir.pwd) + a.ino == b.ino && a.dev == b.dev ? pwd : Dir.pwd + rescue + Dir.pwd + end + # :stopdoc: + + # Creates a working server on host:port (strange things happen if + # port isn't a Number). Use HttpServer::run to start the server and + # HttpServer.run.join to join the thread that's processing + # incoming requests on the socket. + def initialize(app, options = {}) + @app = app + @request = Unicorn::HttpRequest.new + self.reexec_pid = 0 + options = options.dup + @ready_pipe = options.delete(:ready_pipe) + self.init_listeners = options[:listeners] ? options[:listeners].dup : [] + options[:use_defaults] = true + self.config = Unicorn::Configurator.new(options) + self.listener_opts = {} + + # we try inheriting listeners first, so we bind them later. + # we don't write the pid file until we've bound listeners in case + # unicorn was started twice by mistake. Even though our #pid= method + # checks for stale/existing pid files, race conditions are still + # possible (and difficult/non-portable to avoid) and can be likely + # to clobber the pid if the second start was in quick succession + # after the first, so we rely on the listener binding to fail in + # that case. Some tests (in and outside of this source tree) and + # monitoring tools may also rely on pid files existing before we + # attempt to connect to the listener(s) + config.commit!(self, :skip => [:listeners, :pid]) + self.orig_app = app + end + + # Runs the thing. Returns self so you can run join on it + def start + BasicSocket.do_not_reverse_lookup = true + + # inherit sockets from parents, they need to be plain Socket objects + # before they become Kgio::UNIXServer or Kgio::TCPServer + inherited = ENV['UNICORN_FD'].to_s.split(/,/).map do |fd| + io = Socket.for_fd(fd.to_i) + set_server_sockopt(io, listener_opts[sock_name(io)]) + IO_PURGATORY << io + logger.info "inherited addr=#{sock_name(io)} fd=#{fd}" + server_cast(io) + end + + config_listeners = config[:listeners].dup + LISTENERS.replace(inherited) + + # we start out with generic Socket objects that get cast to either + # Kgio::TCPServer or Kgio::UNIXServer objects; but since the Socket + # objects share the same OS-level file descriptor as the higher-level + # *Server objects; we need to prevent Socket objects from being + # garbage-collected + config_listeners -= listener_names + if config_listeners.empty? && LISTENERS.empty? + config_listeners << Unicorn::Const::DEFAULT_LISTEN + init_listeners << Unicorn::Const::DEFAULT_LISTEN + START_CTX[:argv] << "-l#{Unicorn::Const::DEFAULT_LISTEN}" + end + config_listeners.each { |addr| listen(addr) } + raise ArgumentError, "no listeners" if LISTENERS.empty? + + # this pipe is used to wake us up from select(2) in #join when signals + # are trapped. See trap_deferred. + init_self_pipe! + + # setup signal handlers before writing pid file in case people get + # trigger happy and send signals as soon as the pid file exists. + # Note that signals don't actually get handled until the #join method + QUEUE_SIGS.each { |sig| trap(sig) { SIG_QUEUE << sig; awaken_master } } + trap(:CHLD) { awaken_master } + self.pid = config[:pid] + + self.master_pid = $$ + build_app! if preload_app + maintain_worker_count + self + end + + # replaces current listener set with +listeners+. This will + # close the socket if it will not exist in the new listener set + def listeners=(listeners) + cur_names, dead_names = [], [] + listener_names.each do |name| + if ?/ == name[0] + # mark unlinked sockets as dead so we can rebind them + (File.socket?(name) ? cur_names : dead_names) << name + else + cur_names << name + end + end + set_names = listener_names(listeners) + dead_names.concat(cur_names - set_names).uniq! + + LISTENERS.delete_if do |io| + if dead_names.include?(sock_name(io)) + IO_PURGATORY.delete_if do |pio| + pio.fileno == io.fileno && (pio.close rescue nil).nil? # true + end + (io.close rescue nil).nil? # true + else + set_server_sockopt(io, listener_opts[sock_name(io)]) + false + end + end + + (set_names - cur_names).each { |addr| listen(addr) } + end + + def stdout_path=(path); redirect_io($stdout, path); end + def stderr_path=(path); redirect_io($stderr, path); end + + def logger=(obj) + Unicorn::HttpRequest::DEFAULTS["rack.logger"] = @logger = obj + end + + # sets the path for the PID file of the master process + def pid=(path) + if path + if x = valid_pid?(path) + return path if pid && path == pid && x == $$ + if x == reexec_pid && pid =~ /\.oldbin\z/ + logger.warn("will not set pid=#{path} while reexec-ed "\ + "child is running PID:#{x}") + return + end + raise ArgumentError, "Already running on PID:#{x} " \ + "(or pid=#{path} is stale)" + end + end + unlink_pid_safe(pid) if pid + + if path + fp = begin + tmp = "#{File.dirname(path)}/#{rand}.#$$" + File.open(tmp, File::RDWR|File::CREAT|File::EXCL, 0644) + rescue Errno::EEXIST + retry + end + fp.syswrite("#$$\n") + File.rename(fp.path, path) + fp.close + end + @pid = path + end + + # add a given address to the +listeners+ set, idempotently + # Allows workers to add a private, per-process listener via the + # after_fork hook. Very useful for debugging and testing. + # +:tries+ may be specified as an option for the number of times + # to retry, and +:delay+ may be specified as the time in seconds + # to delay between retries. + # A negative value for +:tries+ indicates the listen will be + # retried indefinitely, this is useful when workers belonging to + # different masters are spawned during a transparent upgrade. + def listen(address, opt = {}.merge(listener_opts[address] || {})) + address = config.expand_addr(address) + return if String === address && listener_names.include?(address) + + delay = opt[:delay] || 0.5 + tries = opt[:tries] || 5 + begin + io = bind_listen(address, opt) + unless Kgio::TCPServer === io || Kgio::UNIXServer === io + IO_PURGATORY << io + io = server_cast(io) + end + logger.info "listening on addr=#{sock_name(io)} fd=#{io.fileno}" + LISTENERS << io + io + rescue Errno::EADDRINUSE => err + logger.error "adding listener failed addr=#{address} (in use)" + raise err if tries == 0 + tries -= 1 + logger.error "retrying in #{delay} seconds " \ + "(#{tries < 0 ? 'infinite' : tries} tries left)" + sleep(delay) + retry + rescue => err + logger.fatal "error adding listener addr=#{address}" + raise err + end + end + + # monitors children and receives signals forever + # (or until a termination signal is sent). This handles signals + # one-at-a-time time and we'll happily drop signals in case somebody + # is signalling us too often. + def join + respawn = true + last_check = Time.now + + proc_name 'master' + logger.info "master process ready" # test_exec.rb relies on this message + if @ready_pipe + @ready_pipe.syswrite($$.to_s) + @ready_pipe.close rescue nil + @ready_pipe = nil + end + begin + reap_all_workers + case SIG_QUEUE.shift + when nil + # avoid murdering workers after our master process (or the + # machine) comes out of suspend/hibernation + if (last_check + @timeout) >= (last_check = Time.now) + sleep_time = murder_lazy_workers + else + # wait for workers to wakeup on suspend + sleep_time = @timeout/2.0 + 1 + end + maintain_worker_count if respawn + master_sleep(sleep_time) + when :QUIT # graceful shutdown + break + when :TERM, :INT # immediate shutdown + stop(false) + break + when :USR1 # rotate logs + logger.info "master reopening logs..." + Unicorn::Util.reopen_logs + logger.info "master done reopening logs" + kill_each_worker(:USR1) + when :USR2 # exec binary, stay alive in case something went wrong + reexec + when :WINCH + if Process.ppid == 1 || Process.getpgrp != $$ + respawn = false + logger.info "gracefully stopping all workers" + kill_each_worker(:QUIT) + self.worker_processes = 0 + else + logger.info "SIGWINCH ignored because we're not daemonized" + end + when :TTIN + respawn = true + self.worker_processes += 1 + when :TTOU + self.worker_processes -= 1 if self.worker_processes > 0 + when :HUP + respawn = true + if config.config_file + load_config! + else # exec binary and exit if there's no config file + logger.info "config_file not present, reexecuting binary" + reexec + end + end + rescue Errno::EINTR + rescue => e + logger.error "Unhandled master loop exception #{e.inspect}." + logger.error e.backtrace.join("\n") + end while true + stop # gracefully shutdown all workers on our way out + logger.info "master complete" + unlink_pid_safe(pid) if pid + end + + # Terminates all workers, but does not exit master process + def stop(graceful = true) + self.listeners = [] + limit = Time.now + timeout + until WORKERS.empty? || Time.now > limit + kill_each_worker(graceful ? :QUIT : :TERM) + sleep(0.1) + reap_all_workers + end + kill_each_worker(:KILL) + end + + def rewindable_input + Unicorn::HttpRequest.input_class.method_defined?(:rewind) + end + + def rewindable_input=(bool) + Unicorn::HttpRequest.input_class = bool ? + Unicorn::TeeInput : Unicorn::StreamInput + end + + def client_body_buffer_size + Unicorn::TeeInput.client_body_buffer_size + end + + def client_body_buffer_size=(bytes) + Unicorn::TeeInput.client_body_buffer_size = bytes + end + + def trust_x_forwarded + Unicorn::HttpParser.trust_x_forwarded? + end + + def trust_x_forwarded=(bool) + Unicorn::HttpParser.trust_x_forwarded = bool + end + + private + + # wait for a signal hander to wake us up and then consume the pipe + def master_sleep(sec) + IO.select([ SELF_PIPE[0] ], nil, nil, sec) or return + SELF_PIPE[0].kgio_tryread(11) + end + + def awaken_master + SELF_PIPE[1].kgio_trywrite('.') # wakeup master process from select + end + + # reaps all unreaped workers + def reap_all_workers + begin + wpid, status = Process.waitpid2(-1, Process::WNOHANG) + wpid or return + if reexec_pid == wpid + logger.error "reaped #{status.inspect} exec()-ed" + self.reexec_pid = 0 + self.pid = pid.chomp('.oldbin') if pid + proc_name 'master' + else + worker = WORKERS.delete(wpid) and worker.tmp.close rescue nil + m = "reaped #{status.inspect} worker=#{worker.nr rescue 'unknown'}" + status.success? ? logger.info(m) : logger.error(m) + end + rescue Errno::ECHILD + break + end while true + end + + # reexecutes the START_CTX with a new binary + def reexec + if reexec_pid > 0 + begin + Process.kill(0, reexec_pid) + logger.error "reexec-ed child already running PID:#{reexec_pid}" + return + rescue Errno::ESRCH + self.reexec_pid = 0 + end + end + + if pid + old_pid = "#{pid}.oldbin" + begin + self.pid = old_pid # clear the path for a new pid file + rescue ArgumentError + logger.error "old PID:#{valid_pid?(old_pid)} running with " \ + "existing pid=#{old_pid}, refusing rexec" + return + rescue => e + logger.error "error writing pid=#{old_pid} #{e.class} #{e.message}" + return + end + end + + self.reexec_pid = fork do + listener_fds = LISTENERS.map { |sock| sock.fileno } + ENV['UNICORN_FD'] = listener_fds.join(',') + Dir.chdir(START_CTX[:cwd]) + cmd = [ START_CTX[0] ].concat(START_CTX[:argv]) + + # avoid leaking FDs we don't know about, but let before_exec + # unset FD_CLOEXEC, if anything else in the app eventually + # relies on FD inheritence. + (3..1024).each do |io| + next if listener_fds.include?(io) + io = IO.for_fd(io) rescue next + IO_PURGATORY << io + io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) + end + logger.info "executing #{cmd.inspect} (in #{Dir.pwd})" + before_exec.call(self) + exec(*cmd) + end + proc_name 'master (old)' + end + + # forcibly terminate all workers that haven't checked in in timeout + # seconds. The timeout is implemented using an unlinked File + # shared between the parent process and each worker. The worker + # runs File#chmod to modify the ctime of the File. If the ctime + # is stale for >timeout seconds, then we'll kill the corresponding + # worker. + def murder_lazy_workers + t = @timeout + next_sleep = 1 + WORKERS.dup.each_pair do |wpid, worker| + stat = worker.tmp.stat + # skip workers that disable fchmod or have never fchmod-ed + stat.mode == 0100600 and next + diff = Time.now - stat.ctime + if diff <= t + tmp = t - diff + next_sleep < tmp and next_sleep = tmp + next + end + logger.error "worker=#{worker.nr} PID:#{wpid} timeout " \ + "(#{diff}s > #{t}s), killing" + kill_worker(:KILL, wpid) # take no prisoners for timeout violations + end + next_sleep + end + + def after_fork_internal + @ready_pipe.close if @ready_pipe + self.ready_pipe = nil # XXX Rainbows! compat, change for Unicorn 4.x + srand # http://redmine.ruby-lang.org/issues/4338 + + # The OpenSSL PRNG is seeded with only the pid, and apps with frequently + # dying workers can recycle pids + OpenSSL::Random.seed(rand.to_s) if defined?(OpenSSL::Random) + end + + def spawn_missing_workers + (0...worker_processes).each do |worker_nr| + WORKERS.values.include?(worker_nr) and next + worker = Worker.new(worker_nr, Unicorn::TmpIO.new) + before_fork.call(self, worker) + WORKERS[fork { + after_fork_internal + worker_loop(worker) + }] = worker + end + end + + def maintain_worker_count + (off = WORKERS.size - worker_processes) == 0 and return + off < 0 and return spawn_missing_workers + WORKERS.dup.each_pair { |wpid,w| + w.nr >= worker_processes and kill_worker(:QUIT, wpid) rescue nil + } + end + + # if we get any error, try to write something back to the client + # assuming we haven't closed the socket, but don't get hung up + # if the socket is already closed or broken. We'll always ensure + # the socket is closed at the end of this function + def handle_error(client, e) + msg = case e + when EOFError,Errno::ECONNRESET,Errno::EPIPE,Errno::EINVAL,Errno::EBADF + Unicorn::Const::ERROR_500_RESPONSE + when Unicorn::HttpParserError # try to tell the client they're bad + Unicorn::Const::ERROR_400_RESPONSE + else + logger.error "Read error: #{e.inspect}" + logger.error e.backtrace.join("\n") + Unicorn::Const::ERROR_500_RESPONSE + end + client.kgio_trywrite(msg) + client.close + rescue + end + + # once a client is accepted, it is processed in its entirety here + # in 3 easy steps: read request, call app, write app response + def process_client(client) + status, headers, body = @app.call(env = @request.read(client)) + + if 100 == status.to_i + client.write(Unicorn::Const::EXPECT_100_RESPONSE) + env.delete(Unicorn::Const::HTTP_EXPECT) + status, headers, body = @app.call(env) + end + @request.headers? or headers = nil + http_response_write(client, status, headers, body) + client.close # flush and uncork socket immediately, no keepalive + rescue => e + handle_error(client, e) + end + + # gets rid of stuff the worker has no business keeping track of + # to free some resources and drops all sig handlers. + # traps for USR1, USR2, and HUP may be set in the after_fork Proc + # by the user. + def init_worker_process(worker) + QUEUE_SIGS.each { |sig| trap(sig, nil) } + trap(:CHLD, 'DEFAULT') + SIG_QUEUE.clear + proc_name "worker[#{worker.nr}]" + START_CTX.clear + init_self_pipe! + WORKERS.values.each { |other| other.tmp.close rescue nil } + WORKERS.clear + LISTENERS.each { |sock| sock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) } + worker.tmp.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) + after_fork.call(self, worker) # can drop perms + worker.user(*user) if user.kind_of?(Array) && ! worker.switched + self.timeout /= 2.0 # halve it for select() + build_app! unless preload_app + end + + def reopen_worker_logs(worker_nr) + logger.info "worker=#{worker_nr} reopening logs..." + Unicorn::Util.reopen_logs + logger.info "worker=#{worker_nr} done reopening logs" + init_self_pipe! + rescue => e + logger.error(e) rescue nil + exit!(77) # EX_NOPERM in sysexits.h + end + + # runs inside each forked worker, this sits around and waits + # for connections and doesn't die until the parent dies (or is + # given a INT, QUIT, or TERM signal) + def worker_loop(worker) + ppid = master_pid + init_worker_process(worker) + nr = 0 # this becomes negative if we need to reopen logs + alive = worker.tmp # tmp is our lifeline to the master process + ready = LISTENERS + + # closing anything we IO.select on will raise EBADF + trap(:USR1) { nr = -65536; SELF_PIPE[0].close rescue nil } + trap(:QUIT) { alive = nil; LISTENERS.each { |s| s.close rescue nil }.clear } + [:TERM, :INT].each { |sig| trap(sig) { exit!(0) } } # instant shutdown + logger.info "worker=#{worker.nr} ready" + m = 0 + + begin + nr < 0 and reopen_worker_logs(worker.nr) + nr = 0 + + # we're a goner in timeout seconds anyways if alive.chmod + # breaks, so don't trap the exception. Using fchmod() since + # futimes() is not available in base Ruby and I very strongly + # prefer temporary files to be unlinked for security, + # performance and reliability reasons, so utime is out. No-op + # changes with chmod doesn't update ctime on all filesystems; so + # we change our counter each and every time (after process_client + # and before IO.select). + alive.chmod(m = 0 == m ? 1 : 0) + + ready.each do |sock| + if client = sock.kgio_tryaccept + process_client(client) + nr += 1 + alive.chmod(m = 0 == m ? 1 : 0) + end + break if nr < 0 + end + + # make the following bet: if we accepted clients this round, + # we're probably reasonably busy, so avoid calling select() + # and do a speculative non-blocking accept() on ready listeners + # before we sleep again in select(). + redo unless nr == 0 # (nr < 0) => reopen logs + + ppid == Process.ppid or return + alive.chmod(m = 0 == m ? 1 : 0) + + # timeout used so we can detect parent death: + ret = IO.select(LISTENERS, nil, SELF_PIPE, timeout) and ready = ret[0] + rescue Errno::EINTR + ready = LISTENERS + rescue Errno::EBADF + nr < 0 or return + rescue => e + if alive + logger.error "Unhandled listen loop exception #{e.inspect}." + logger.error e.backtrace.join("\n") + end + end while alive + end + + # delivers a signal to a worker and fails gracefully if the worker + # is no longer running. + def kill_worker(signal, wpid) + Process.kill(signal, wpid) + rescue Errno::ESRCH + worker = WORKERS.delete(wpid) and worker.tmp.close rescue nil + end + + # delivers a signal to each worker + def kill_each_worker(signal) + WORKERS.keys.each { |wpid| kill_worker(signal, wpid) } + end + + # unlinks a PID file at given +path+ if it contains the current PID + # still potentially racy without locking the directory (which is + # non-portable and may interact badly with other programs), but the + # window for hitting the race condition is small + def unlink_pid_safe(path) + (File.read(path).to_i == $$ and File.unlink(path)) rescue nil + end + + # returns a PID if a given path contains a non-stale PID file, + # nil otherwise. + def valid_pid?(path) + wpid = File.read(path).to_i + wpid <= 0 and return + Process.kill(0, wpid) + wpid + rescue Errno::ESRCH, Errno::ENOENT + # don't unlink stale pid files, racy without non-portable locking... + end + + def load_config! + loaded_app = app + logger.info "reloading config_file=#{config.config_file}" + config[:listeners].replace(init_listeners) + config.reload + config.commit!(self) + kill_each_worker(:QUIT) + Unicorn::Util.reopen_logs + self.app = orig_app + build_app! if preload_app + logger.info "done reloading config_file=#{config.config_file}" + rescue StandardError, LoadError, SyntaxError => e + logger.error "error reloading config_file=#{config.config_file}: " \ + "#{e.class} #{e.message} #{e.backtrace}" + self.app = loaded_app + end + + # returns an array of string names for the given listener array + def listener_names(listeners = LISTENERS) + listeners.map { |io| sock_name(io) } + end + + def build_app! + if app.respond_to?(:arity) && app.arity == 0 + if defined?(Gem) && Gem.respond_to?(:refresh) + logger.info "Refreshing Gem list" + Gem.refresh + end + self.app = app.call + end + end + + def proc_name(tag) + $0 = ([ File.basename(START_CTX[0]), tag + ]).concat(START_CTX[:argv]).join(' ') + end + + def redirect_io(io, path) + File.open(path, 'ab') { |fp| io.reopen(fp) } if path + io.sync = true + end + + def init_self_pipe! + SELF_PIPE.each { |io| io.close rescue nil } + SELF_PIPE.replace(Kgio::Pipe.new) + SELF_PIPE.each { |io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) } + end +end + diff --git a/lib/unicorn/launcher.rb b/lib/unicorn/launcher.rb index 0d415dd..5eafe5b 100644 --- a/lib/unicorn/launcher.rb +++ b/lib/unicorn/launcher.rb @@ -1,5 +1,6 @@ # -*- encoding: binary -*- +# :enddoc: $stdout.sync = $stderr.sync = true $stdin.binmode $stdout.binmode @@ -20,6 +21,7 @@ module Unicorn::Launcher # to pickup code changes if the original deployment directory # is a symlink or otherwise got replaced. def self.daemonize!(options) + cfg = Unicorn::Configurator $stdin.reopen("/dev/null") # We only start a new process group if we're not being reexecuted @@ -52,9 +54,9 @@ module Unicorn::Launcher end end # $stderr/$stderr can/will be redirected separately in the Unicorn config - Unicorn::Configurator::DEFAULTS[:stderr_path] ||= "/dev/null" - Unicorn::Configurator::DEFAULTS[:stdout_path] ||= "/dev/null" - Unicorn::Configurator::RACKUP[:daemonized] = true + cfg::DEFAULTS[:stderr_path] ||= "/dev/null" + cfg::DEFAULTS[:stdout_path] ||= "/dev/null" + cfg::RACKUP[:daemonized] = true end end diff --git a/lib/unicorn/oob_gc.rb b/lib/unicorn/oob_gc.rb index a0e8f1d..312b44c 100644 --- a/lib/unicorn/oob_gc.rb +++ b/lib/unicorn/oob_gc.rb @@ -47,9 +47,9 @@ module Unicorn::OobGC @@nr = interval self.const_set :OOBGC_PATH, path self.const_set :OOBGC_INTERVAL, interval - self.const_set :OOBGC_ENV, Unicorn::HttpRequest::REQ ObjectSpace.each_object(Unicorn::HttpServer) do |s| s.extend(self) + self.const_set :OOBGC_ENV, s.instance_variable_get(:@request).env end app # pretend to be Rack middleware since it was in the past end diff --git a/lib/unicorn/preread_input.rb b/lib/unicorn/preread_input.rb new file mode 100644 index 0000000..12eb3e8 --- /dev/null +++ b/lib/unicorn/preread_input.rb @@ -0,0 +1,33 @@ +# -*- encoding: binary -*- + +module Unicorn +# This middleware is used to ensure input is buffered to memory +# or disk (depending on size) before the application is dispatched +# by entirely consuming it (from TeeInput) beforehand. +# +# Usage (in config.ru): +# +# require 'unicorn/preread_input' +# if defined?(Unicorn) +# use Unicorn::PrereadInput +# end +# run YourApp.new +class PrereadInput + + # :stopdoc: + def initialize(app) + @app = app + end + + def call(env) + buf = "" + input = env["rack.input"] + if input.respond_to?(:rewind) + true while input.read(16384, buf) + input.rewind + end + @app.call(env) + end + # :startdoc: +end +end diff --git a/lib/unicorn/socket_helper.rb b/lib/unicorn/socket_helper.rb index 1d03eab..9f2d55c 100644 --- a/lib/unicorn/socket_helper.rb +++ b/lib/unicorn/socket_helper.rb @@ -17,9 +17,16 @@ module Unicorn # denial-of-service attacks :tcp_defer_accept => 1, - # FreeBSD, we need to override this to 'dataready' when we + # FreeBSD, we need to override this to 'dataready' if we # eventually get HTTPS support :accept_filter => 'httpready', + + # same default value as Mongrel + :backlog => 1024, + + # since we don't do keepalive, we'll always flush-on-close and + # this saves packets for everyone. + :tcp_nopush => true, } #:startdoc: @@ -41,19 +48,20 @@ module Unicorn end def set_tcp_sockopt(sock, opt) - # highly portable, but off by default because we don't do keepalive - if defined?(TCP_NODELAY) && ! (val = opt[:tcp_nodelay]).nil? + if defined?(TCP_NODELAY) + val = opt[:tcp_nodelay] + val = DEFAULTS[:tcp_nodelay] if nil == val sock.setsockopt(IPPROTO_TCP, TCP_NODELAY, val ? 1 : 0) end - unless (val = opt[:tcp_nopush]).nil? - val = val ? 1 : 0 - if defined?(TCP_CORK) # Linux - sock.setsockopt(IPPROTO_TCP, TCP_CORK, val) - elsif defined?(TCP_NOPUSH) # TCP_NOPUSH is untested (FreeBSD) - sock.setsockopt(IPPROTO_TCP, TCP_NOPUSH, val) - end + val = opt[:tcp_nopush] + val = DEFAULTS[:tcp_nopush] if nil == val + val = val ? 1 : 0 + if defined?(TCP_CORK) # Linux + sock.setsockopt(IPPROTO_TCP, TCP_CORK, val) + elsif defined?(TCP_NOPUSH) # TCP_NOPUSH is untested (FreeBSD) + sock.setsockopt(IPPROTO_TCP, TCP_NOPUSH, val) end # No good reason to ever have deferred accepts off @@ -61,26 +69,24 @@ module Unicorn if defined?(TCP_DEFER_ACCEPT) # this differs from nginx, since nginx doesn't allow us to # configure the the timeout... - tmp = DEFAULTS.merge(opt) - seconds = tmp[:tcp_defer_accept] - seconds = DEFAULTS[:tcp_defer_accept] if seconds == true + seconds = opt[:tcp_defer_accept] + seconds = DEFAULTS[:tcp_defer_accept] if [true,nil].include?(seconds) seconds = 0 unless seconds # nil/false means disable this sock.setsockopt(SOL_TCP, TCP_DEFER_ACCEPT, seconds) elsif respond_to?(:accf_arg) - tmp = DEFAULTS.merge(opt) - if name = tmp[:accept_filter] - begin - sock.setsockopt(SOL_SOCKET, SO_ACCEPTFILTER, accf_arg(name)) - rescue => e - logger.error("#{sock_name(sock)} " \ - "failed to set accept_filter=#{name} (#{e.inspect})") - end + name = opt[:accept_filter] + name = DEFAULTS[:accept_filter] if nil == name + begin + sock.setsockopt(SOL_SOCKET, SO_ACCEPTFILTER, accf_arg(name)) + rescue => e + logger.error("#{sock_name(sock)} " \ + "failed to set accept_filter=#{name} (#{e.inspect})") end end end def set_server_sockopt(sock, opt) - opt ||= {} + opt = DEFAULTS.merge(opt || {}) TCPSocket === sock and set_tcp_sockopt(sock, opt) @@ -90,7 +96,7 @@ module Unicorn sock.setsockopt(SOL_SOCKET, SO_SNDBUF, opt[:sndbuf]) if opt[:sndbuf] log_buffer_sizes(sock, " after: ") end - sock.listen(opt[:backlog] || 1024) + sock.listen(opt[:backlog]) rescue => e logger.error "error setting socket options: #{e.inspect}" logger.error e.backtrace.join("\n") @@ -126,12 +132,13 @@ module Unicorn end old_umask = File.umask(opt[:umask] || 0) begin - UNIXServer.new(address) + Kgio::UNIXServer.new(address) ensure File.umask(old_umask) end - elsif address =~ /^(\d+\.\d+\.\d+\.\d+):(\d+)$/ - TCPServer.new($1, $2.to_i) + elsif /\A(\d+\.\d+\.\d+\.\d+):(\d+)\z/ =~ address || + /\A\[([a-fA-F0-9:]+)\]:(\d+)\z/ =~ address + Kgio::TCPServer.new($1, $2.to_i) else raise ArgumentError, "Don't know how to bind: #{address}" end @@ -139,6 +146,13 @@ module Unicorn sock end + # returns rfc2732-style (e.g. "[::1]:666") addresses for IPv6 + def tcp_name(sock) + port, addr = Socket.unpack_sockaddr_in(sock.getsockname) + /:/ =~ addr ? "[#{addr}]:#{port}" : "#{addr}:#{port}" + end + module_function :tcp_name + # Returns the configuration name of a socket as a string. sock may # be a string value, in which case it is returned as-is # Warning: TCP sockets may not always return the name given to it. @@ -148,10 +162,10 @@ module Unicorn when UNIXServer Socket.unpack_sockaddr_un(sock.getsockname) when TCPServer - Socket.unpack_sockaddr_in(sock.getsockname).reverse!.join(':') + tcp_name(sock) when Socket begin - Socket.unpack_sockaddr_in(sock.getsockname).reverse!.join(':') + tcp_name(sock) rescue ArgumentError Socket.unpack_sockaddr_un(sock.getsockname) end @@ -166,9 +180,9 @@ module Unicorn def server_cast(sock) begin Socket.unpack_sockaddr_in(sock.getsockname) - TCPServer.for_fd(sock.fileno) + Kgio::TCPServer.for_fd(sock.fileno) rescue ArgumentError - UNIXServer.for_fd(sock.fileno) + Kgio::UNIXServer.for_fd(sock.fileno) end end diff --git a/lib/unicorn/stream_input.rb b/lib/unicorn/stream_input.rb new file mode 100644 index 0000000..4ca5a04 --- /dev/null +++ b/lib/unicorn/stream_input.rb @@ -0,0 +1,145 @@ +# -*- encoding: binary -*- + +# When processing uploads, Unicorn may expose a StreamInput object under +# "rack.input" of the (future) Rack (2.x) environment. +class Unicorn::StreamInput + # The I/O chunk size (in +bytes+) for I/O operations where + # the size cannot be user-specified when a method is called. + # The default is 16 kilobytes. + @@io_chunk_size = Unicorn::Const::CHUNK_SIZE + + # Initializes a new StreamInput object. You normally do not have to call + # this unless you are writing an HTTP server. + def initialize(socket, request) + @chunked = request.content_length.nil? + @socket = socket + @parser = request + @buf = request.buf + @rbuf = '' + @bytes_read = 0 + filter_body(@rbuf, @buf) unless @buf.empty? + end + + # :call-seq: + # ios.read([length [, buffer ]]) => string, buffer, or nil + # + # Reads at most length bytes from the I/O stream, or to the end of + # file if length is omitted or is nil. length must be a non-negative + # integer or nil. If the optional buffer argument is present, it + # must reference a String, which will receive the data. + # + # At end of file, it returns nil or '' depend on length. + # ios.read() and ios.read(nil) returns ''. + # ios.read(length [, buffer]) returns nil. + # + # If the Content-Length of the HTTP request is known (as is the common + # case for POST requests), then ios.read(length [, buffer]) will block + # until the specified length is read (or it is the last chunk). + # Otherwise, for uncommon "Transfer-Encoding: chunked" requests, + # ios.read(length [, buffer]) will return immediately if there is + # any data and only block when nothing is available (providing + # IO#readpartial semantics). + def read(length = nil, rv = '') + if length + if length <= @rbuf.size + length < 0 and raise ArgumentError, "negative length #{length} given" + rv.replace(@rbuf.slice!(0, length)) + else + to_read = length - @rbuf.size + rv.replace(@rbuf.slice!(0, @rbuf.size)) + until to_read == 0 || eof? || (rv.size > 0 && @chunked) + @socket.kgio_read(to_read, @buf) or eof! + filter_body(@rbuf, @buf) + rv << @rbuf + to_read -= @rbuf.size + end + @rbuf.replace('') + end + rv = nil if rv.empty? && length != 0 + else + read_all(rv) + end + rv + end + + # :call-seq: + # ios.gets => string or nil + # + # Reads the next ``line'' from the I/O stream; lines are separated + # by the global record separator ($/, typically "\n"). A global + # record separator of nil reads the entire unread contents of ios. + # Returns nil if called at the end of file. + # This takes zero arguments for strict Rack::Lint compatibility, + # unlike IO#gets. + def gets + sep = $/ + if sep.nil? + read_all(rv = '') + return rv.empty? ? nil : rv + end + re = /\A(.*?#{Regexp.escape(sep)})/ + + begin + @rbuf.sub!(re, '') and return $1 + return @rbuf.empty? ? nil : @rbuf.slice!(0, @rbuf.size) if eof? + @socket.kgio_read(@@io_chunk_size, @buf) or eof! + filter_body(once = '', @buf) + @rbuf << once + end while true + end + + # :call-seq: + # ios.each { |line| block } => ios + # + # Executes the block for every ``line'' in *ios*, where lines are + # separated by the global record separator ($/, typically "\n"). + def each + while line = gets + yield line + end + + self # Rack does not specify what the return value is here + end + +private + + def eof? + if @parser.body_eof? + while @chunked && ! @parser.parse + once = @socket.kgio_read(@@io_chunk_size) or eof! + @buf << once + end + @socket = nil + true + else + false + end + end + + def filter_body(dst, src) + rv = @parser.filter_body(dst, src) + @bytes_read += dst.size + rv + end + + def read_all(dst) + dst.replace(@rbuf) + @socket or return + until eof? + @socket.kgio_read(@@io_chunk_size, @buf) or eof! + filter_body(@rbuf, @buf) + dst << @rbuf + end + ensure + @rbuf.replace('') + end + + def eof! + # in case client only did a premature shutdown(SHUT_WR) + # we do support clients that shutdown(SHUT_WR) after the + # _entire_ request has been sent, and those will not have + # raised EOFError on us. + @socket.close if @socket + raise Unicorn::ClientShutdown, "bytes_read=#{@bytes_read}", [] + end +end diff --git a/lib/unicorn/tee_input.rb b/lib/unicorn/tee_input.rb index 540cfe0..637c583 100644 --- a/lib/unicorn/tee_input.rb +++ b/lib/unicorn/tee_input.rb @@ -11,31 +11,30 @@ # # When processing uploads, Unicorn exposes a TeeInput object under # "rack.input" of the Rack environment. -class Unicorn::TeeInput < Struct.new(:socket, :req, :parser, - :buf, :len, :tmp, :buf2) - +class Unicorn::TeeInput < Unicorn::StreamInput # The maximum size (in +bytes+) to buffer in memory before # resorting to a temporary file. Default is 112 kilobytes. @@client_body_buffer_size = Unicorn::Const::MAX_BODY - # The I/O chunk size (in +bytes+) for I/O operations where - # the size cannot be user-specified when a method is called. - # The default is 16 kilobytes. - @@io_chunk_size = Unicorn::Const::CHUNK_SIZE + # sets the maximum size of request bodies to buffer in memory, + # amounts larger than this are buffered to the filesystem + def self.client_body_buffer_size=(bytes) + @@client_body_buffer_size = bytes + end + + # returns the maximum size of request bodies to buffer in memory, + # amounts larger than this are buffered to the filesystem + def self.client_body_buffer_size + @@client_body_buffer_size + end # Initializes a new TeeInput object. You normally do not have to call # this unless you are writing an HTTP server. - def initialize(*args) - super(*args) - self.len = parser.content_length - self.tmp = len && len < @@client_body_buffer_size ? - StringIO.new("") : Unicorn::Util.tmpio - self.buf2 = "" - if buf.size > 0 - parser.filter_body(buf2, buf) and finalize_input - tmp.write(buf2) - tmp.rewind - end + def initialize(socket, request) + @len = request.content_length + super + @tmp = @len && @len <= @@client_body_buffer_size ? + StringIO.new("") : Unicorn::TmpIO.new end # :call-seq: @@ -55,16 +54,11 @@ class Unicorn::TeeInput < Struct.new(:socket, :req, :parser, # earlier. Most applications should only need to call +read+ with a # specified +length+ in a loop until it returns +nil+. def size - len and return len - - if socket - pos = tmp.pos - while tee(@@io_chunk_size, buf2) - end - tmp.seek(pos) - end - - self.len = tmp.size + @len and return @len + pos = @tmp.pos + consume! + @tmp.pos = pos + @len = @tmp.size end # :call-seq: @@ -87,24 +81,7 @@ class Unicorn::TeeInput < Struct.new(:socket, :req, :parser, # any data and only block when nothing is available (providing # IO#readpartial semantics). def read(*args) - socket or return tmp.read(*args) - - length = args.shift - if nil == length - rv = tmp.read || "" - while tee(@@io_chunk_size, buf2) - rv << buf2 - end - rv - else - rv = args.shift || "" - diff = tmp.size - tmp.pos - if 0 == diff - ensure_length(tee(length, rv), length) - else - ensure_length(tmp.read(diff > length ? length : diff, rv), length) - end - end + @socket ? tee(super) : @tmp.read(*args) end # :call-seq: @@ -117,43 +94,7 @@ class Unicorn::TeeInput < Struct.new(:socket, :req, :parser, # This takes zero arguments for strict Rack::Lint compatibility, # unlike IO#gets. def gets - socket or return tmp.gets - sep = $/ or return read - - orig_size = tmp.size - if tmp.pos == orig_size - tee(@@io_chunk_size, buf2) or return nil - tmp.seek(orig_size) - end - - sep_size = Rack::Utils.bytesize(sep) - line = tmp.gets # cannot be nil here since size > pos - sep == line[-sep_size, sep_size] and return line - - # unlikely, if we got here, then tmp is at EOF - begin - orig_size = tmp.pos - tee(@@io_chunk_size, buf2) or break - tmp.seek(orig_size) - line << tmp.gets - sep == line[-sep_size, sep_size] and return line - # tmp is at EOF again here, retry the loop - end while true - - line - end - - # :call-seq: - # ios.each { |line| block } => ios - # - # Executes the block for every ``line'' in *ios*, where lines are - # separated by the global record separator ($/, typically "\n"). - def each(&block) - while line = gets - yield line - end - - self # Rack does not specify what the return value is here + @socket ? tee(super) : @tmp.gets end # :call-seq: @@ -163,70 +104,23 @@ class Unicorn::TeeInput < Struct.new(:socket, :req, :parser, # the offset (zero) of the +ios+ pointer. Subsequent reads will # start from the beginning of the previously-buffered input. def rewind - tmp.rewind # Rack does not specify what the return value is here + return 0 if 0 == @tmp.size + consume! if @socket + @tmp.rewind # Rack does not specify what the return value is here end private - def client_error(e) - case e - when EOFError - # in case client only did a premature shutdown(SHUT_WR) - # we do support clients that shutdown(SHUT_WR) after the - # _entire_ request has been sent, and those will not have - # raised EOFError on us. - socket.close if socket - raise Unicorn::ClientShutdown, "bytes_read=#{tmp.size}", [] - when Unicorn::HttpParserError - e.set_backtrace([]) - end - raise e + # consumes the stream of the socket + def consume! + junk = "" + nil while read(@@io_chunk_size, junk) end - # tees off a +length+ chunk of data from the input into the IO - # backing store as well as returning it. +dst+ must be specified. - # returns nil if reading from the input returns nil - def tee(length, dst) - unless parser.body_eof? - if parser.filter_body(dst, socket.readpartial(length, buf)).nil? - tmp.write(dst) - tmp.seek(0, IO::SEEK_END) # workaround FreeBSD/OSX + MRI 1.8.x bug - return dst - end + def tee(buffer) + if buffer && buffer.size > 0 + @tmp.write(buffer) end - finalize_input - rescue => e - client_error(e) + buffer end - - def finalize_input - while parser.trailers(req, buf).nil? - # Don't worry about raising ClientShutdown here on EOFError, tee() - # will catch EOFError when app is processing it, otherwise in - # initialize we never get any chance to enter the app so the - # EOFError will just get trapped by Unicorn and not the Rack app - buf << socket.readpartial(@@io_chunk_size) - end - self.socket = nil - end - - # tee()s into +dst+ until it is of +length+ bytes (or until - # we've reached the Content-Length of the request body). - # Returns +dst+ (the exact object, not a duplicate) - # To continue supporting applications that need near-real-time - # streaming input bodies, this is a no-op for - # "Transfer-Encoding: chunked" requests. - def ensure_length(dst, length) - # len is nil for chunked bodies, so we can't ensure length for those - # since they could be streaming bidirectionally and we don't want to - # block the caller in that case. - return dst if dst.nil? || len.nil? - - while dst.size < length && tee(length - dst.size, buf2) - dst << buf2 - end - - dst - end - end diff --git a/lib/unicorn/tmpio.rb b/lib/unicorn/tmpio.rb new file mode 100644 index 0000000..2da05a2 --- /dev/null +++ b/lib/unicorn/tmpio.rb @@ -0,0 +1,29 @@ +# -*- encoding: binary -*- +# :stopdoc: +require 'tmpdir' + +# some versions of Ruby had a broken Tempfile which didn't work +# well with unlinked files. This one is much shorter, easier +# to understand, and slightly faster. +class Unicorn::TmpIO < File + + # creates and returns a new File object. The File is unlinked + # immediately, switched to binary mode, and userspace output + # buffering is disabled + def self.new + fp = begin + super("#{Dir::tmpdir}/#{rand}", RDWR|CREAT|EXCL, 0600) + rescue Errno::EEXIST + retry + end + unlink(fp.path) + fp.binmode + fp.sync = true + fp + end + + # for easier env["rack.input"] compatibility with Rack <= 1.1 + def size + stat.size + end unless File.method_defined?(:size) +end diff --git a/lib/unicorn/util.rb b/lib/unicorn/util.rb index e9dd57f..cde2563 100644 --- a/lib/unicorn/util.rb +++ b/lib/unicorn/util.rb @@ -1,101 +1,68 @@ # -*- encoding: binary -*- -require 'fcntl' -require 'tmpdir' +module Unicorn::Util -module Unicorn +# :stopdoc: + def self.is_log?(fp) + append_flags = File::WRONLY | File::APPEND - class TmpIO < ::File + ! fp.closed? && + fp.sync && + (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags + rescue IOError, Errno::EBADF + false + end - # for easier env["rack.input"] compatibility - def size - # flush if sync - stat.size + def self.chown_logs(uid, gid) + ObjectSpace.each_object(File) do |fp| + fp.chown(uid, gid) if is_log?(fp) end end - - module Util - class << self - - def is_log?(fp) - append_flags = File::WRONLY | File::APPEND - - ! fp.closed? && - fp.sync && - fp.path[0] == ?/ && - (fp.fcntl(Fcntl::F_GETFL) & append_flags) == append_flags - rescue IOError, Errno::EBADF - false +# :startdoc: + + # This reopens ALL logfiles in the process that have been rotated + # using logrotate(8) (without copytruncate) or similar tools. + # A +File+ object is considered for reopening if it is: + # 1) opened with the O_APPEND and O_WRONLY flags + # 2) the current open file handle does not match its original open path + # 3) unbuffered (as far as userspace buffering goes, not O_SYNC) + # Returns the number of files reopened + # + # In Unicorn 3.5.x and earlier, files must be opened with an absolute + # path to be considered a log file. + def self.reopen_logs + to_reopen = [] + nr = 0 + ObjectSpace.each_object(File) { |fp| is_log?(fp) and to_reopen << fp } + + to_reopen.each do |fp| + orig_st = begin + fp.stat + rescue IOError, Errno::EBADF + next end - def chown_logs(uid, gid) - ObjectSpace.each_object(File) do |fp| - fp.chown(uid, gid) if is_log?(fp) - end + begin + b = File.stat(fp.path) + next if orig_st.ino == b.ino && orig_st.dev == b.dev + rescue Errno::ENOENT end - # This reopens ALL logfiles in the process that have been rotated - # using logrotate(8) (without copytruncate) or similar tools. - # A +File+ object is considered for reopening if it is: - # 1) opened with the O_APPEND and O_WRONLY flags - # 2) opened with an absolute path (starts with "/") - # 3) the current open file handle does not match its original open path - # 4) unbuffered (as far as userspace buffering goes, not O_SYNC) - # Returns the number of files reopened - def reopen_logs - to_reopen = [] - nr = 0 - ObjectSpace.each_object(File) { |fp| is_log?(fp) and to_reopen << fp } - - to_reopen.each do |fp| - orig_st = begin - fp.stat - rescue IOError, Errno::EBADF - next - end - - begin - b = File.stat(fp.path) - next if orig_st.ino == b.ino && orig_st.dev == b.dev - rescue Errno::ENOENT - end - - begin - File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) } - fp.sync = true - new_st = fp.stat - - # this should only happen in the master: - if orig_st.uid != new_st.uid || orig_st.gid != new_st.gid - fp.chown(orig_st.uid, orig_st.gid) - end + begin + File.open(fp.path, 'a') { |tmpfp| fp.reopen(tmpfp) } + fp.sync = true + new_st = fp.stat - nr += 1 - rescue IOError, Errno::EBADF - # not much we can do... - end + # this should only happen in the master: + if orig_st.uid != new_st.uid || orig_st.gid != new_st.gid + fp.chown(orig_st.uid, orig_st.gid) end - nr - end - - # creates and returns a new File object. The File is unlinked - # immediately, switched to binary mode, and userspace output - # buffering is disabled - def tmpio - fp = begin - TmpIO.open("#{Dir::tmpdir}/#{rand}", - File::RDWR|File::CREAT|File::EXCL, 0600) - rescue Errno::EEXIST - retry - end - File.unlink(fp.path) - fp.binmode - fp.sync = true - fp + nr += 1 + rescue IOError, Errno::EBADF + # not much we can do... end - end - + nr end end diff --git a/lib/unicorn/worker.rb b/lib/unicorn/worker.rb new file mode 100644 index 0000000..39e9e32 --- /dev/null +++ b/lib/unicorn/worker.rb @@ -0,0 +1,47 @@ +# -*- encoding: binary -*- + +# This class and its members can be considered a stable interface +# and will not change in a backwards-incompatible fashion between +# releases of \Unicorn. Knowledge of this class is generally not +# not needed for most users of \Unicorn. +# +# Some users may want to access it in the before_fork/after_fork hooks. +# See the Unicorn::Configurator RDoc for examples. +class Unicorn::Worker < Struct.new(:nr, :tmp, :switched) + + # worker objects may be compared to just plain Integers + def ==(other_nr) # :nodoc: + nr == other_nr + end + + # In most cases, you should be using the Unicorn::Configurator#user + # directive instead. This method should only be used if you need + # fine-grained control of exactly when you want to change permissions + # in your after_fork hooks. + # + # Changes the worker process to the specified +user+ and +group+ + # This is only intended to be called from within the worker + # process from the +after_fork+ hook. This should be called in + # the +after_fork+ hook after any priviledged functions need to be + # run (e.g. to set per-worker CPU affinity, niceness, etc) + # + # Any and all errors raised within this method will be propagated + # directly back to the caller (usually the +after_fork+ hook. + # These errors commonly include ArgumentError for specifying an + # invalid user/group and Errno::EPERM for insufficient priviledges + def user(user, group = nil) + # we do not protect the caller, checking Process.euid == 0 is + # insufficient because modern systems have fine-grained + # capabilities. Let the caller handle any and all errors. + uid = Etc.getpwnam(user).uid + gid = Etc.getgrnam(group).gid if group + Unicorn::Util.chown_logs(uid, gid) + tmp.chown(uid, gid) + if gid && Process.egid != gid + Process.initgroups(user, gid) + Process::GID.change_privilege(gid) + end + Process.euid != uid and Process::UID.change_privilege(uid) + self.switched = true + end +end |