SplitRdiffBackup

From RdiffBackupWiki

Jump to: navigation, search
#!/usr/bin/ruby -w
# Take an archive created by rdiff-backup and split it into multiple separate
# archives, leaving the source intact.
#
# Copyright (C) 2007 National Fitness Financial Systems
# Written by Steven Willoughby (stevenw _at_ nffs _dot_ com)
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#


require 'optparse'
require 'ftools'
require 'zlib'

@options = {}
OptionParser.new do |opts|
	opts.banner = "Usage: split-rdiff-backup [options] source dest_dir path [more paths]
This script copies the rdiff-backup archive called $source to $dest_dir/_other.
Then it removes each of the paths given from _other as and places them in 
$dest_dir/new-path.  The control files and increments in $source/rdiff-backup-data are split up appropriately."
	opts.on( "-v", "--verbose", "Run verbosely" ) do |v|
		@options[:verbose] = v
	end
# 	opts.on( "-m", "--move", "Move instead of copy." ) do |m|
# 		@options[:move] = m
# 	end
end.parse!

other = "_other"

source = ARGV.shift
dest_dir = ARGV.shift

# Sort paths in reverse length order so that the following input data is
# handled properly:
#  foo
#  foo/bar
paths = ARGV.sort { |a,b| b.length <=> a.length }

paths.push other

def info(message)
	puts message if @options[:verbose]
end

info "Source: '#{source}'  Dest: '#{dest_dir}'"

def mkdir( dir )
	if File.exist?(dir)
		info "Directory already existed: #{dir}"
	else
		info "Making directory: #{dir}"
		Dir.mkdir( dir )
	end
end

def fix_path( path )
	return path unless path =~ /\//
	return File.split( path ).join( '-' )
end

# Populate the rdiff-backup-data directories with data
paths.each do |path|
	src = File.join( source, path )
	src = source if path == other
	raise "No such path: #{src}" unless File.exist?( src )
	raise "Not a directory: #{src}" unless File.directory?( src )
	info "#{src} => #{File.join( dest_dir, fix_path( path ) )}"
end

def remove_path_prefix( original, prefix )
	original = original.slice( prefix.length..-1 )
	original = "." if original.length == 0
	original = original.slice( 1..-1 ) if original[0] == ?/
	return original
end

# Copy the data and increments
# Copy everything to _other first and then move files to accomplish the split
inc = "rdiff-backup-data/increments"

mkdir dest_dir
cmd = ["rsync", "-a", source + "/", File.join( dest_dir, other ) + "/" ]

info "Copying entire archive"
system( *cmd ) or raise "Could not run command: #{cmd}"

info "Splitting archive"
paths.each do |path|
	next if path == other
	src = File.join( dest_dir, other, path )
	dest = File.join( dest_dir, fix_path( path ) )
	info "Renaming #{src} to #{dest}"
	File.rename( src, dest )
	mkdir( File.join( dest_dir, fix_path( path ), 'rdiff-backup-data' ) )
	src = File.join( dest_dir, other, inc, path )
	dest = File.join( dest_dir, fix_path(path), inc)
	info "Renaming #{src} to #{dest}"
	File.rename( src, dest )
	#
	# FIXME Escape characters in glob file
	# FIXME Check for exact rdiff-backup file format
	Dir.glob( src + ".*" ).each do |file|
		foo = dest + remove_path_prefix( file, src )
		puts "Renaming #{file} to #{foo}"
		File.rename( file, foo )
	end
end

def mirror_metadata_parse( path, entry )
	raise "Bad entry: #{entry}" unless entry =~ /^File (.*)$/
	foo = $1
	return false unless begins_with( foo, path )
	entry = entry.split("\n")
	entry.shift
	return "File " + remove_path_prefix( foo, path ) + "\n" + entry.join( "\n" ) + "\n"
end

def file_statistics_parse( path, line )
	return true if line =~ /^#/
	if begins_with( line, path )
		line = line.split( " " )
		foo = line.slice(0..-5).join( " " )
		extra = line.slice(-4..-1).join( " " )
		return remove_path_prefix( foo, path ) + " " + extra + "\n"
	end
	return false
end

def begins_with( haystack, needle )
	return haystack[0,needle.length] == needle
end

# Clean out the "other" control files
Dir.foreach( File.join( dest_dir, other, "rdiff-backup-data" ) ) do |file|
	next if file == '.' || file == '..'
	src = File.join( dest_dir, other, "rdiff-backup-data", file ) 
	next if File.directory?(src)
	File.unlink( src )
end

info "Splitting control files"
Dir.foreach( File.join( source, "rdiff-backup-data" ) ) do |file|
	next if file == '.' || file == '..'
	info "Splitting file #{file}"
	mask = file.split( "." ).shift
	src = File.join( source, "rdiff-backup-data", file )
	paths.each do |path|
		dest = File.join( dest_dir, fix_path( path ), "rdiff-backup-data", file )
		case mask
		when 'backup', 'chars_to_quote', 'current_mirror', 'error_log', 'session_statistics'
			File.copy( src, dest )
		when 'extended_attributes'
			raise "File type is not supported: #{src}" if File.size( src ) > 0
			File.copy( src, dest )
		when 'increments'
		when 'mirror_metadata', 'file_statistics'
		else
			raise "No support for file '#{file}'"
		end
	end

	next unless mask == 'file_statistics' || mask == 'mirror_metadata'

	data = nil

	if src =~ /\.gz$/
		Zlib::GzipReader.open( src ) do |gz|
			data = gz.readlines
		end
	else
		data = IO.readlines( src )
	end

	entries = []
	if mask == 'mirror_metadata'
		current_entry = ""
		data.each do |line|
			unless line =~ /^File /
				current_entry += line
				next
			end
			if current_entry.length > 0
				entries.push current_entry
			end
			current_entry = line
		end
		entries.push current_entry if current_entry.length > 0
	elsif mask == 'file_statistics'
		entries = data
	end

	write_me = {}
	entries.each do |entry|
		handled = false
		paths.each do |path|
			write_me[path] ||= ""
			next if path == other

			if mask == 'file_statistics'
				result = file_statistics_parse( path, entry )
			else
				result = mirror_metadata_parse( path, entry )
			end
			# True result means we write to all
			if result == true
				write_me[path] += entry
				next
			end
			
			# False means not to write to this path
			if result == false
				next
			end

			# Anything else must be a string of the new entry for just this path
			write_me[path] += result
			handled = true
			break
		end
		write_me[other] += entry unless handled
	end
	write_me.each do |path,data|
		dest = File.join( dest_dir, fix_path( path ),
				 "rdiff-backup-data", file )
		
		if dest =~ /\.gz$/ && data.length == 0
			dest = dest[0..-4]
		end

		File.open( dest, 'w' ) do |f|
			if dest =~ /\.gz$/
				gz = Zlib::GzipWriter.new( f )
				gz.write( data )
				gz.close
			else
				f.write( data )
			end
		end
	end
end
Personal tools