diff options
-rwxr-xr-x | bin/mog | 10 | ||||
-rw-r--r-- | lib/mogilefs.rb | 3 | ||||
-rw-r--r-- | lib/mogilefs/bigfile.rb | 147 | ||||
-rw-r--r-- | lib/mogilefs/mogilefs.rb | 1 | ||||
-rw-r--r-- | test/test_bigfile.rb | 48 |
5 files changed, 208 insertions, 1 deletions
@@ -33,6 +33,7 @@ config_file = nil ls_l = false ls_h = false test = {} +cat = { :raw => false } ARGV.options do |x| x.banner = "Usage: #{$0} [options] <command> [<arguments>]" @@ -47,6 +48,7 @@ ARGV.options do |x| end x.on('-e', 'True if key exists') { test[:e] = true } + x.on('-r', '--raw', 'show raw big_info file information') { cat[:raw] = true } x.on('-C', '--class=s', 'class') { |klass| cli_cfg[:class] = klass } x.on('-d', '--domain=s', 'domain') { |domain| cli_cfg[:domain] = domain } @@ -123,7 +125,13 @@ begin store_file_retry(mg, key, cfg[:class], filename) when 'cat' ARGV.empty? and raise ArgumentError, '<key1> [<key2> ...]' - ARGV.each { |key| mg.get_file_data(key) { |fp| sysrwloop(fp, STDOUT) } } + ARGV.each do |key| + if (!cat[:raw] && key =~ /^_big_info:/) + mg.bigfile_write(key, STDOUT, {:verify => true}) + else + mg.get_file_data(key) { |fp| sysrwloop(fp, STDOUT) } + end + end when 'ls' prefixes = ARGV.empty? ? [ nil ] : ARGV prefixes.each do |prefix| diff --git a/lib/mogilefs.rb b/lib/mogilefs.rb index 1b29e1d..6a568b0 100644 --- a/lib/mogilefs.rb +++ b/lib/mogilefs.rb @@ -13,6 +13,8 @@ module MogileFS class Error < StandardError; end class UnreadableSocketError < Error; end + class SizeMismatchError < Error; end + class ChecksumMismatchError < RuntimeError; end class ReadOnlyError < Error def message; 'readonly mogilefs'; end end @@ -34,6 +36,7 @@ require 'mogilefs/backend' require 'mogilefs/nfsfile' require 'mogilefs/httpfile' require 'mogilefs/client' +require 'mogilefs/bigfile' require 'mogilefs/mogilefs' require 'mogilefs/admin' diff --git a/lib/mogilefs/bigfile.rb b/lib/mogilefs/bigfile.rb new file mode 100644 index 0000000..8c8284b --- /dev/null +++ b/lib/mogilefs/bigfile.rb @@ -0,0 +1,147 @@ +require 'zlib' +require 'digest/md5' +require 'uri' +Thread.abort_on_exception = true + +module MogileFS::Bigfile + GZIP_HEADER = "\x1f\x8b".freeze # mogtool(1) has this + # VALID_TYPES = %w(file tarball partition).map { |x| x.freeze }.freeze + + def bigfile_stat(key) + info = get_file_data(key) + parse_info(info) + end + + # returns the big_info hash if successful, raises an exception if not + def bigfile_write(key, wr, opts = { :verify => false }) + info = bigfile_stat(key) + zi = nil + md5 = opts[:verify] ? Digest::MD5.new : nil + total = 0 + + # we only decode raw zlib deflated streams that mogtool (unfortunately) + # generates. tarballs and gzip(1) are up to to the application to decrypt. + filter = Proc.new do |buf| + if zi == nil + if info[:compressed] && info[:type] == 'file' && + buf.length >= 2 && buf[0,2] != GZIP_HEADER + zi = Zlib::Inflate.new + + # mogtool(1) seems to have a bug that causes it to generate bogus + # MD5s if zlib deflate is used. Don't trust those MD5s for now... + md5 = nil + else + zi = false + end + end + buf ||= '' + if zi + zi.inflate(buf) + else + md5 << buf + buf + end + end if (info[:compressed] || md5) + + info[:parts].each_with_index do |part,part_nr| + next if part_nr == 0 # info[:parts][0] is always empty + uris = verify_uris(part[:paths].map { |path| URI.parse(path) }) + if uris.empty? + # part[:paths] may not be valid anymore due to rebalancing, however we + # can get_keys on key,<part_nr> and retry paths if all paths fail + part[:paths] = get_paths("#{key.gsub(/^big_info:/, '')},#{part_nr}") + uris = verify_uris(part[:paths].map { |path| URI.parse(path) }) + raise MogileFS::Backend::NoDevices if uris.empty? + end + + sock = http_get_sock(uris[0]) + md5.reset if md5 + w = sysrwloop(sock, wr, filter) + + if md5 && md5.hexdigest != part[:md5] + raise MogileFS::ChecksumMismatchError, "#{md5} != #{part[:md5]}" + end + total += w + end + + wr.syswrite(zi.finish) if zi + + [ total, info ] + end + + private + + def parse_info(info = '') + rv = { :parts => [] } + info.each_line do |line| + line.chomp! + case line + when /^(des|type|filename)\s+(.+)$/ + rv[$1.to_sym] = $2 + when /^compressed\s+([01])$/ + rv[:compressed] = ($1 == '1') + when /^(chunks|size)\s+(\d+)$/ + rv[$1.to_sym] = $2.to_i + when /^part\s+(\d+)\s+bytes=(\d+)\s+md5=(.+)\s+paths:\s+(.+)$/ + rv[:parts][$1.to_i] = { + :bytes => $2.to_i, + :md5 => $3.downcase, + :paths => $4.split(/\s*,\s*/), + } + end + end + + rv + end + +end # module MogileFS::Bigfile + +__END__ +# Copied from mogtool: +# http://code.sixapart.com/svn/mogilefs/utils/mogtool, r1221 + +# this is a temporary file that we delete when we're doing recording all chunks + +_big_pre:<key> + + starttime=UNIXTIMESTAMP + +# when done, we write the _info file and delete the _pre. + +_big_info:<key> + + des Cow's ljdb backup as of 2004-11-17 + type { partition, file, tarball } + compressed {0, 1} + filename ljbinlog.305.gz + partblocks 234324324324 + + + part 1 <bytes> <md5hex> + part 2 <bytes> <md5hex> + part 3 <bytes> <md5hex> + part 4 <bytes> <md5hex> + part 5 <bytes> <md5hex> + +_big:<key>,<n> +_big:<key>,<n> +_big:<key>,<n> + + +Receipt format: + +BEGIN MOGTOOL RECIEPT +type partition +des Foo +compressed foo + +part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid +part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid +part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid +part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid + + +END RECIEPT + + + diff --git a/lib/mogilefs/mogilefs.rb b/lib/mogilefs/mogilefs.rb index fcb33d8..00149ec 100644 --- a/lib/mogilefs/mogilefs.rb +++ b/lib/mogilefs/mogilefs.rb @@ -16,6 +16,7 @@ class MogileFS::Timeout < Timeout::Error; end class MogileFS::MogileFS < MogileFS::Client include MogileFS::Util + include MogileFS::Bigfile ## # The path to the local MogileFS mount point if you are using NFS mode. diff --git a/test/test_bigfile.rb b/test/test_bigfile.rb new file mode 100644 index 0000000..6bc78cd --- /dev/null +++ b/test/test_bigfile.rb @@ -0,0 +1,48 @@ +require 'test/setup' +require 'mogilefs/bigfile' + +class TestMogileFS__Bigfile < TestMogileFS + include MogileFS::Bigfile + + def setup + @klass = MogileFS::MogileFS + super + end + + def test_parser + expect = {:type=>"file", + :des=>"no description", + :chunks=>2, + :parts=> + [nil, + {:md5=>"d3b4d15c294b24d9f853e26095dfe3d0", + :paths=> + ["http://foo1:7500/dev2/0/000/144/0000144411.fid", + "http://foo2:7500/dev1/0/000/144/0000144411.fid"], + :bytes=>12}, + {:md5=>"d3b4d15c294b24d9f853e26095dfe3d0", + :paths=> + ["http://foo4:7500/dev2/0/000/144/0000144411.fid", + "http://foo3:7500/dev1/0/000/144/0000144411.fid"], + :bytes=>6}], + :size=>18, + :filename=>"foo.tar", + :compressed=>false} + + s = <<EOS +des no description +type file +compressed 0 +filename foo.tar +chunks 2 +size 18 + +part 1 bytes=12 md5=d3b4d15c294b24d9f853e26095dfe3d0 paths: http://foo1:7500/dev2/0/000/144/0000144411.fid, http://foo2:7500/dev1/0/000/144/0000144411.fid +part 2 bytes=6 md5=d3b4d15c294b24d9f853e26095dfe3d0 paths: http://foo4:7500/dev2/0/000/144/0000144411.fid, http://foo3:7500/dev1/0/000/144/0000144411.fid +EOS + i = parse_info(s) + assert_equal expect, i + end + +end + |