about summary refs log tree commit homepage
path: root/lib/mogilefs
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2008-12-02 16:33:05 -0800
committerEric Wong <normalperson@yhbt.net>2008-12-10 12:25:35 -0800
commitcb85f82e640762aea81b7ad2b8aec1efed0aa572 (patch)
treea37c1f9316b803d1e8c6bd85f1a06090efdb5e17 /lib/mogilefs
parent83dfe9644cbbc83b0f3bd05537874eecc8ad0a8c (diff)
downloadmogilefs-client-cb85f82e640762aea81b7ad2b8aec1efed0aa572.tar.gz
Needs more tests, but it seems to work...

I seem to have discovered a bug in mogtool which causes it to
generate incorrect MD5 checksums when the --gzip flag is used
(and --gzip actually just does zlib deflate, not something that
gzip(1) can actually decrypt).

So right now MD5 checksums are only verified on non-zlib-deflated
files.
Diffstat (limited to 'lib/mogilefs')
-rw-r--r--lib/mogilefs/bigfile.rb147
-rw-r--r--lib/mogilefs/mogilefs.rb1
2 files changed, 148 insertions, 0 deletions
diff --git a/lib/mogilefs/bigfile.rb b/lib/mogilefs/bigfile.rb
new file mode 100644
index 0000000..8c8284b
--- /dev/null
+++ b/lib/mogilefs/bigfile.rb
@@ -0,0 +1,147 @@
+require 'zlib'
+require 'digest/md5'
+require 'uri'
+Thread.abort_on_exception = true
+
+module MogileFS::Bigfile
+  GZIP_HEADER = "\x1f\x8b".freeze # mogtool(1) has this
+  # VALID_TYPES = %w(file tarball partition).map { |x| x.freeze }.freeze
+
+  def bigfile_stat(key)
+    info = get_file_data(key)
+    parse_info(info)
+  end
+
+  # returns the big_info hash if successful, raises an exception if not
+  def bigfile_write(key, wr, opts = { :verify => false })
+    info = bigfile_stat(key)
+    zi = nil
+    md5 = opts[:verify] ? Digest::MD5.new : nil
+    total = 0
+
+    # we only decode raw zlib deflated streams that mogtool (unfortunately)
+    # generates.  tarballs and gzip(1) are up to to the application to decrypt.
+    filter = Proc.new do |buf|
+      if zi == nil
+        if info[:compressed] && info[:type] == 'file' &&
+             buf.length >= 2 && buf[0,2] != GZIP_HEADER
+          zi = Zlib::Inflate.new
+
+          # mogtool(1) seems to have a bug that causes it to generate bogus
+          # MD5s if zlib deflate is used.  Don't trust those MD5s for now...
+          md5 = nil
+        else
+          zi = false
+        end
+      end
+      buf ||= ''
+      if zi
+        zi.inflate(buf)
+      else
+        md5 << buf
+        buf
+      end
+    end if (info[:compressed] || md5)
+
+    info[:parts].each_with_index do |part,part_nr|
+      next if part_nr == 0 # info[:parts][0] is always empty
+      uris = verify_uris(part[:paths].map { |path| URI.parse(path) })
+      if uris.empty?
+        # part[:paths] may not be valid anymore due to rebalancing, however we
+        # can get_keys on key,<part_nr> and retry paths if all paths fail
+        part[:paths] = get_paths("#{key.gsub(/^big_info:/, '')},#{part_nr}")
+        uris = verify_uris(part[:paths].map { |path| URI.parse(path) })
+        raise MogileFS::Backend::NoDevices if uris.empty?
+      end
+
+      sock = http_get_sock(uris[0])
+      md5.reset if md5
+      w = sysrwloop(sock, wr, filter)
+
+      if md5 && md5.hexdigest != part[:md5]
+        raise MogileFS::ChecksumMismatchError, "#{md5} != #{part[:md5]}"
+      end
+      total += w
+    end
+
+    wr.syswrite(zi.finish) if zi
+
+    [ total, info ]
+  end
+
+  private
+
+    def parse_info(info = '')
+      rv = { :parts => [] }
+      info.each_line do |line|
+        line.chomp!
+        case line
+        when /^(des|type|filename)\s+(.+)$/
+          rv[$1.to_sym] = $2
+        when /^compressed\s+([01])$/
+          rv[:compressed] = ($1 == '1')
+        when /^(chunks|size)\s+(\d+)$/
+          rv[$1.to_sym] = $2.to_i
+        when /^part\s+(\d+)\s+bytes=(\d+)\s+md5=(.+)\s+paths:\s+(.+)$/
+          rv[:parts][$1.to_i] = {
+            :bytes => $2.to_i,
+            :md5 => $3.downcase,
+            :paths => $4.split(/\s*,\s*/),
+          }
+        end
+      end
+
+      rv
+    end
+
+end # module MogileFS::Bigfile
+
+__END__
+# Copied from mogtool:
+# http://code.sixapart.com/svn/mogilefs/utils/mogtool, r1221
+
+# this is a temporary file that we delete when we're doing recording all chunks
+
+_big_pre:<key>
+
+    starttime=UNIXTIMESTAMP
+
+# when done, we write the _info file and delete the _pre.
+
+_big_info:<key>
+
+    des Cow's ljdb backup as of 2004-11-17
+    type  { partition, file, tarball }
+    compressed {0, 1}
+    filename  ljbinlog.305.gz
+    partblocks  234324324324
+
+
+    part 1 <bytes> <md5hex>
+    part 2 <bytes> <md5hex>
+    part 3 <bytes> <md5hex>
+    part 4 <bytes> <md5hex>
+    part 5 <bytes> <md5hex>
+
+_big:<key>,<n>
+_big:<key>,<n>
+_big:<key>,<n>
+
+
+Receipt format:
+
+BEGIN MOGTOOL RECIEPT
+type partition
+des Foo
+compressed foo
+
+part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid
+part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid
+part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid
+part 1 bytes=23423432 md5=2349823948239423984 paths: http://dev5/2/23/23/.fid, http://dev6/23/423/4/324.fid
+
+
+END RECIEPT
+
+
+
diff --git a/lib/mogilefs/mogilefs.rb b/lib/mogilefs/mogilefs.rb
index fcb33d8..00149ec 100644
--- a/lib/mogilefs/mogilefs.rb
+++ b/lib/mogilefs/mogilefs.rb
@@ -16,6 +16,7 @@ class MogileFS::Timeout < Timeout::Error; end
 class MogileFS::MogileFS < MogileFS::Client
 
   include MogileFS::Util
+  include MogileFS::Bigfile
 
   ##
   # The path to the local MogileFS mount point if you are using NFS mode.