From 3d7b8868a06b3fa929ca4e75b793544e14dc2e6d Mon Sep 17 00:00:00 2001 From: Jan van Haarst <jan.vanhaarst@wur.nl> Date: Thu, 13 Sep 2007 09:04:45 +0000 Subject: [PATCH] script to do a faster sync than a simple cluster-fork, it uses the already synced hosts as seeds for further copying --- fastcopy.pl | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100755 fastcopy.pl diff --git a/fastcopy.pl b/fastcopy.pl new file mode 100755 index 0000000..cd6f212 --- /dev/null +++ b/fastcopy.pl @@ -0,0 +1,125 @@ +#!/usr/bin/perl -w +# This script does a faster sync than a simple cluster-fork +# It uses the already synced hosts as seeds for further copying. +# Usage: $0 -d directory/to/synchronize [rsync settings] + +# Modules +use strict; +use Getopt::Long; +# Let unknown options pass through to @ARGV (so they can be forwarded to rsync) +&Getopt::Long::Configure( 'pass_through'); +# Use a signalhandler to work the finished childs +use POSIX ":sys_wait_h"; +$SIG{CHLD} = \&REAPER; + +# Variables +my $stage = 0; +my $number_of_sinks = 0; +my $sync_dir = ''; +my @errors; +my @seeds; +my @sinks; +my %processes; +my $rsync_settings = '--archive'; +# Retrieve data from commandline +GetOptions ( + 'directory=s' => \$sync_dir, + 'settings:s' => \$rsync_settings, + ); + if ($sync_dir eq ""){ # Als project niet meegegeven. + print "Usage: $0 --directory directory/to/synchronize [$rsync_settings]\n"; + exit; + } + # Put the rest of the options into rsync_settings + foreach my $option (@ARGV){ + $rsync_settings .= " ".$option; + } +# Fill arrays +# First set the frontend as seed +push (@seeds, `qconf -ss`); +# Then the nodes as sinks +push (@sinks, `qconf -sel`); + +# Put the size of the sinks array in a variable +$number_of_sinks=scalar(@sinks); +# Clear newlines from names +chomp @seeds; +chomp @sinks; + +# Now do the actual copying +while ($number_of_sinks >0) { + fork_sync(); +} +# All sinks are being worked on, now wait for them to finish. +while (scalar keys %processes) { + print "waiting for ".(scalar keys %processes); + print " child"; + print "s" if (scalar keys %processes >1); + print " to exit\n"; + sleep 10; +} +# Print out errors +if (scalar(@errors)){ + print "The following nodes gave errors and weren't synced:"; + foreach (@errors) { + print $_."\n"; + } +} +# Done +# Subs. +sub REAPER { + # This gets called whenever a child exits + my $process; + # Put the processid into a variable, and continue + while (($process = waitpid(-1, &WNOHANG)) > 0) { + # Check for errors and put seed and sink in the right array + for my $seed (keys %{$processes{$process}}) { + my $sink= $processes{$process}{$seed}; + # Enter errors into array + if ($? != 0){ + push (@errors,$sink); + push (@seeds,$seed); + }else{ + # If no error occured, both can be used as seed + push (@seeds,$seed); + push (@seeds,$sink); + } + } + # Remove finished process from hash + delete $processes{$process}; + } + $SIG{CHLD} = \&REAPER; +} +sub fork_sync { + # Fork as many processes as there are seeds + foreach (@seeds) { + $number_of_sinks=scalar(@sinks); + # The while checks at the end of the loop, which is too late + next if $number_of_sinks <=0; + # grab the seed and sink from the arrays + my $temp_seed = pop @seeds; + my $temp_sink = pop @sinks; + # Print where we are + print sprintf "%25s" ,$temp_seed; + print sprintf " --> %25s" ,$temp_sink; + print "\n"; + # Do the actual fork + my $pid = fork(); + die "Cannot fork: $!" unless defined($pid); + if ($pid == 0) { + # Child + # Do the sync + `ssh $temp_sink "rsync $rsync_settings -e ssh $temp_seed:$sync_dir $sync_dir"`; +########################################################################################################## +#FIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXME + my $return_value=0; +#FIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXMEFIXME +########################################################################################################## + exit($return_value); + } else { + # Parent + # Track the process + $processes{$pid}{$temp_seed}=$temp_sink; + } + } +} -- GitLab