From RdiffBackupWiki
#!/usr/bin/ruby -w
# Take an archive created by rdiff-backup and split it into multiple separate
# archives, leaving the source intact.
#
# Copyright (C) 2007 National Fitness Financial Systems
# Written by Steven Willoughby (stevenw _at_ nffs _dot_ com)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
require 'optparse'
require 'ftools'
require 'zlib'
@options = {}
OptionParser.new do |opts|
opts.banner = "Usage: split-rdiff-backup [options] source dest_dir path [more paths]
This script copies the rdiff-backup archive called $source to $dest_dir/_other.
Then it removes each of the paths given from _other as and places them in
$dest_dir/new-path. The control files and increments in $source/rdiff-backup-data are split up appropriately."
opts.on( "-v", "--verbose", "Run verbosely" ) do |v|
@options[:verbose] = v
end
# opts.on( "-m", "--move", "Move instead of copy." ) do |m|
# @options[:move] = m
# end
end.parse!
other = "_other"
source = ARGV.shift
dest_dir = ARGV.shift
# Sort paths in reverse length order so that the following input data is
# handled properly:
# foo
# foo/bar
paths = ARGV.sort { |a,b| b.length <=> a.length }
paths.push other
def info(message)
puts message if @options[:verbose]
end
info "Source: '#{source}' Dest: '#{dest_dir}'"
def mkdir( dir )
if File.exist?(dir)
info "Directory already existed: #{dir}"
else
info "Making directory: #{dir}"
Dir.mkdir( dir )
end
end
def fix_path( path )
return path unless path =~ /\//
return File.split( path ).join( '-' )
end
# Populate the rdiff-backup-data directories with data
paths.each do |path|
src = File.join( source, path )
src = source if path == other
raise "No such path: #{src}" unless File.exist?( src )
raise "Not a directory: #{src}" unless File.directory?( src )
info "#{src} => #{File.join( dest_dir, fix_path( path ) )}"
end
def remove_path_prefix( original, prefix )
original = original.slice( prefix.length..-1 )
original = "." if original.length == 0
original = original.slice( 1..-1 ) if original[0] == ?/
return original
end
# Copy the data and increments
# Copy everything to _other first and then move files to accomplish the split
inc = "rdiff-backup-data/increments"
mkdir dest_dir
cmd = ["rsync", "-a", source + "/", File.join( dest_dir, other ) + "/" ]
info "Copying entire archive"
system( *cmd ) or raise "Could not run command: #{cmd}"
info "Splitting archive"
paths.each do |path|
next if path == other
src = File.join( dest_dir, other, path )
dest = File.join( dest_dir, fix_path( path ) )
info "Renaming #{src} to #{dest}"
File.rename( src, dest )
mkdir( File.join( dest_dir, fix_path( path ), 'rdiff-backup-data' ) )
src = File.join( dest_dir, other, inc, path )
dest = File.join( dest_dir, fix_path(path), inc)
info "Renaming #{src} to #{dest}"
File.rename( src, dest )
#
# FIXME Escape characters in glob file
# FIXME Check for exact rdiff-backup file format
Dir.glob( src + ".*" ).each do |file|
foo = dest + remove_path_prefix( file, src )
puts "Renaming #{file} to #{foo}"
File.rename( file, foo )
end
end
def mirror_metadata_parse( path, entry )
raise "Bad entry: #{entry}" unless entry =~ /^File (.*)$/
foo = $1
return false unless begins_with( foo, path )
entry = entry.split("\n")
entry.shift
return "File " + remove_path_prefix( foo, path ) + "\n" + entry.join( "\n" ) + "\n"
end
def file_statistics_parse( path, line )
return true if line =~ /^#/
if begins_with( line, path )
line = line.split( " " )
foo = line.slice(0..-5).join( " " )
extra = line.slice(-4..-1).join( " " )
return remove_path_prefix( foo, path ) + " " + extra + "\n"
end
return false
end
def begins_with( haystack, needle )
return haystack[0,needle.length] == needle
end
# Clean out the "other" control files
Dir.foreach( File.join( dest_dir, other, "rdiff-backup-data" ) ) do |file|
next if file == '.' || file == '..'
src = File.join( dest_dir, other, "rdiff-backup-data", file )
next if File.directory?(src)
File.unlink( src )
end
info "Splitting control files"
Dir.foreach( File.join( source, "rdiff-backup-data" ) ) do |file|
next if file == '.' || file == '..'
info "Splitting file #{file}"
mask = file.split( "." ).shift
src = File.join( source, "rdiff-backup-data", file )
paths.each do |path|
dest = File.join( dest_dir, fix_path( path ), "rdiff-backup-data", file )
case mask
when 'backup', 'chars_to_quote', 'current_mirror', 'error_log', 'session_statistics'
File.copy( src, dest )
when 'extended_attributes'
raise "File type is not supported: #{src}" if File.size( src ) > 0
File.copy( src, dest )
when 'increments'
when 'mirror_metadata', 'file_statistics'
else
raise "No support for file '#{file}'"
end
end
next unless mask == 'file_statistics' || mask == 'mirror_metadata'
data = nil
if src =~ /\.gz$/
Zlib::GzipReader.open( src ) do |gz|
data = gz.readlines
end
else
data = IO.readlines( src )
end
entries = []
if mask == 'mirror_metadata'
current_entry = ""
data.each do |line|
unless line =~ /^File /
current_entry += line
next
end
if current_entry.length > 0
entries.push current_entry
end
current_entry = line
end
entries.push current_entry if current_entry.length > 0
elsif mask == 'file_statistics'
entries = data
end
write_me = {}
entries.each do |entry|
handled = false
paths.each do |path|
write_me[path] ||= ""
next if path == other
if mask == 'file_statistics'
result = file_statistics_parse( path, entry )
else
result = mirror_metadata_parse( path, entry )
end
# True result means we write to all
if result == true
write_me[path] += entry
next
end
# False means not to write to this path
if result == false
next
end
# Anything else must be a string of the new entry for just this path
write_me[path] += result
handled = true
break
end
write_me[other] += entry unless handled
end
write_me.each do |path,data|
dest = File.join( dest_dir, fix_path( path ),
"rdiff-backup-data", file )
if dest =~ /\.gz$/ && data.length == 0
dest = dest[0..-4]
end
File.open( dest, 'w' ) do |f|
if dest =~ /\.gz$/
gz = Zlib::GzipWriter.new( f )
gz.write( data )
gz.close
else
f.write( data )
end
end
end
end