#!/usr/bin/perl -w # # psinib - Perl Snapshot Is Not Incremental Backup # # written by Dobrica Pavlinusic 2003-01-03 # released under GPL v2 or later. # # Backup SMB directories using file produced by LinNeighbourhood (or some # other program [vi :-)] which produces file in format: # # smbmount service mountpoint options # # # usage: # $ psinib.pl mountscript use strict 'vars'; use Data::Dumper; use Net::Ping; use POSIX qw(strftime); use List::Compare; use Filesys::SmbClient; #use Taint; use Fcntl qw(LOCK_EX LOCK_NB); use Digest::MD5; use File::Basename; use DB_file; # configuration my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir my $LOG = '/var/log/backup.log'; # add path here... #$LOG = '/tmp/backup.log'; # store backups in which directory my $BACKUP_DEST = '/backup/isis_backup'; # files to ignore in backup my @ignore = ('.md5sum', '.md5db', '.backupignore', 'backupignore.txt'); # open log open(L, ">> $LOG") || die "can't open log $LOG: $!"; select((select(L), $|=1)[0]); # flush output # make a lock on logfile my $c = 0; { flock L, LOCK_EX | LOCK_NB and last; sleep 1; redo if ++$c < 10; # no response for 10 sec, bail out print STDERR "can't take lock on $LOG -- another $0 running?\n"; exit 1; } # taint path: nmblookup should be there! $ENV{'PATH'} = "/usr/bin:/bin"; my $mounts = shift @ARGV || 'mountscript'; # die "usage: $0 mountscript"; my @in_backup; # shares which are backeduped this run my $p = new Net::Ping->new("tcp", 2); # ping will try tcp connect to netbios-ssn (139) $p->{port_num} = getservbyname("netbios-ssn", "tcp"); my $backup_ok = 0; my $smb; my %smb_atime; my %smb_mtime; my %file_md5; open(M, $mounts) || die "can't open $mounts: $!"; while() { chomp; next if !/^\s*smbmount\s/; my (undef,$share,undef,$opt) = split(/\s+/,$_,4); my ($user,$passwd,$workgroup); foreach (split(/,/,$opt)) { my ($n,$v) = split(/=/,$_,2); if ($n =~ m/username/i) { if ($v =~ m#^(.+)/(.+)%(.+)$#) { ($user,$passwd,$workgroup) = ($1,$2,$3); } elsif ($v =~ m#^(.+)/(.+)$#) { ($user,$workgroup) = ($1,$2); } elsif ($v =~ m#^(.+)%(.+)$#) { ($user,$passwd) = ($1,$2); } else { $user = $v; } } elsif ($n =~ m#workgroup#i) { $workgroup = $v; } } push @in_backup,$share; my ($host,$dir,$date_dir) = share2host_dir($share); my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one my $real_bl; if (-l $bl) { $real_bl=readlink($bl) || die "can't read link $bl: $!"; $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); if (-l $bc && $real_bl eq $bc) { print "$share allready backuped...\n"; $backup_ok++; next; } } print "working on $share\n"; my $ip = get_ip($share); if ($ip) { xlog($share,"IP is $ip"); if ($p->ping($ip)) { snap_share($share,$user,$passwd,$workgroup); $backup_ok++; } } } close(M); xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)"); 1; #------------------------------------------------------------------------- # get IP number from share sub get_ip { my $share = shift; my $host = $1 if ($share =~ m#//([^/]+)/#); my $ip = `nmblookup $host`; if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) { return $1; } } # write entry to screen and log sub xlog { my $share = shift; my $t = strftime $LOG_TIME_FMT, localtime; my $m = shift || '[no log entry]'; print STDERR $m,"\n"; print L "$t $share\t$m\n"; } # dump warn and dies into log BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } } BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } } # split share name to host, dir and currnet date dir sub share2host_dir { my $share = shift; my ($host,$dir); if ($share =~ m#//([^/]+)/(.+)$#) { ($host,$dir) = ($1,$2); $dir =~ s/\W/_/g; $dir =~ s/^_+//; $dir =~ s/_+$//; } else { print "Can't parse share $share into host and directory!\n"; return; } return ($host,$dir,strftime $DIR_TIME_FMT, localtime); } # make a snapshot of a share sub snap_share { my $share = shift; my %param = ( debug => 0 ); $param{username} = shift || warn "can't find username for share $share"; $param{password} = shift || warn "can't find passwod for share $share"; $param{workgroup} = shift || warn "can't find workgroup for share $share"; my ($host,$dir,$date_dir) = share2host_dir($share); # latest backup directory my $bl = "$BACKUP_DEST/$host/$dir/latest"; # current backup directory my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; my $real_bl; if (-l $bl) { $real_bl=readlink($bl) || die "can't read link $bl: $!"; $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); } else { print "no old backup, trying to find last backup, "; if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) { my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR); closedir(BL_DIR); $real_bl=pop @bl_dirs; print "using $real_bl as latest...\n"; $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); if ($real_bl eq $bc) { xlog($share,"latest from today (possible partial backup)"); rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!"; $real_bl .= ".partial"; } } else { print "this is first run...\n"; } } if (-l $bc && $real_bl && $real_bl eq $bc) { print "$share allready backuped...\n"; return; } die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST); if (! -e "$BACKUP_DEST/$host") { mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!"; print "created host directory $BACKUP_DEST/$host...\n"; } if (! -e "$BACKUP_DEST/$host/$dir") { mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!"; print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n"; } mkdir $bc || die "can't make dir for current backup $bc: $!"; my @dirs = ( "/" ); my @smb_dirs = ( "/" ); my $transfer = 0; # bytes transfered over network # this will store all available files and sizes my @files; my %file_size; my %file_atime; my %file_mtime; #my %file_md5; my @smb_files; my %smb_size; #my %smb_atime; #my %smb_mtime; sub norm_dir { my $foo = shift; my $prefix = shift; $foo =~ s#//+#/#g; $foo =~ s#/+$##g; $foo =~ s#^/+##g; return $prefix.$foo if ($prefix); return $foo; } # read local filesystem my $di = 0; while ($di <= $#dirs && $real_bl) { my $d=$dirs[$di++]; opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n"; # read .backupignore if exists if (-f "$real_bl/$d/.backupignore") { open(I,"$real_bl/$d/.backupignore"); while() { chomp; push @ignore,norm_dir("$d/$_"); } close(I); #print STDERR "ignore: ",join("|",@ignore),"\n"; link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" || warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n"; } # read .md5sum if exists if (-f "$real_bl/$d/.md5sum") { open(I,"$real_bl/$d/.md5sum"); while() { chomp; my ($md5,$f) = split(/\s+/,$_,2); $file_md5{$f}=$md5; } close(I); } my @clutter = readdir(DIR); foreach my $f (@clutter) { next if ($f eq '.'); next if ($f eq '..'); my $pr = norm_dir("$d/$f"); # path relative my $pf = norm_dir("$d/$f","$real_bl/"); # path full if (grep(/^\Q$pr\E$/,@ignore) == 0) { if (-f $pf) { push @files,$pr; $file_size{$pr}=(stat($pf))[7]; $file_atime{$pr}=(stat($pf))[8]; $file_mtime{$pr}=(stat($pf))[9]; } elsif (-d $pf) { push @dirs,$pr; } else { print STDERR "unknown type: $pf\n"; } } else { print STDERR "ignored: $pr\n"; } } } xlog($share,($#files+1)." files and ".($#dirs+1)." dirs on local disk before backup"); # read smb filesystem xlog($share,"smb to $share as $param{username}/$param{workgroup}"); # FIX: how to aviod creation of ~/.smb/smb.conf ? $smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n"; $di = 0; while ($di <= $#smb_dirs) { my $d=$smb_dirs[$di]; my $pf = norm_dir($d,"smb:$share/"); # path full my $D = $smb->opendir($pf); if (! $D) { xlog($share,"FATAL: $share: $!"); # remove failing dir delete $smb_dirs[$di]; next; } $di++; my @clutter = $smb->readdir_struct($D); foreach my $item (@clutter) { my $f = $item->[1]; next if ($f eq '.'); next if ($f eq '..'); my $pr = norm_dir("$d/$f"); # path relative my $pf = norm_dir("$d/$f","smb:$share/"); # path full if (grep(/^\Q$pr\E$/,@ignore) == 0) { if ($item->[0] == main::SMBC_FILE) { push @smb_files,$pr; $smb_size{$pr}=($smb->stat($pf))[7]; $smb_atime{$pr}=($smb->stat($pf))[10]; $smb_mtime{$pr}=($smb->stat($pf))[11]; } elsif ($item->[0] == main::SMBC_DIR) { push @smb_dirs,$pr; } else { print STDERR "unknown type: $pf\n"; } } else { print STDERR "smb ignored: $pr\n"; } } } xlog($share,($#smb_files+1)." files and ".($#smb_dirs+1)." dirs on remote share"); # sync dirs my $lc = List::Compare->new(\@dirs, \@smb_dirs); my @dirs2erase = $lc->get_Lonly; my @dirs2create = $lc->get_Ronly; xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create"); # create new dirs foreach (sort @smb_dirs) { mkdir "$bc/$_" || warn "mkdir $_: $!\n"; } # sync files $lc = List::Compare->new(\@files, \@smb_files); my @files2erase = $lc->get_Lonly; my @files2create = $lc->get_Ronly; xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create"); sub smb_copy { my $smb = shift; my $from = shift; my $to = shift; my $l = 0; foreach my $f (@_) { #print "smb_copy $from/$f -> $to/$f\n"; if (! open(F,"> $to/$f")) { print STDERR "can't open new file $to/$f: $!\n"; next; } my $md5 = Digest::MD5->new; my $fd = $smb->open("$from/$f"); if (! $fd) { print STDERR "can't open smb file $from/$f: $!\n"; next; } while (defined(my $b=$smb->read($fd,4096))) { print F $b; $l += length($b); $md5->add($b); } $smb->close($fd); close(F); $file_md5{$f} = $md5->hexdigest; # FIX: this fails with -T my ($a,$m) = ($smb->stat("$from/$f"))[10,11]; utime $a, $m, "$to/$f" || warn "can't update utime on $to/$f: $!\n"; } return $l; } # copy new files foreach (@files2create) { $transfer += smb_copy($smb,"smb:$share",$bc,$_); } my $size_sync = 0; my $atime_sync = 0; my $mtime_sync = 0; my @sync_files; my @ln_files; foreach ($lc->get_intersection) { my $f; if ($file_size{$_} != $smb_size{$_}) { $f=$_; $size_sync++; } if ($file_atime{$_} != $smb_atime{$_}) { $f=$_; $atime_sync++; } if ($file_mtime{$_} != $smb_mtime{$_}) { $f=$_; $mtime_sync++; } if ($f) { push @sync_files, $f; } else { push @ln_files, $_; } } xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked."); foreach (@sync_files) { $transfer += smb_copy($smb,"smb:$share",$bc,$_); } xlog($share,"$transfer bytes transfered..."); foreach (@ln_files) { link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n"; } # remove files foreach (sort @files2erase) { unlink "$bc/$_" || warn "unlink $_: $!\n"; } # remove not needed dirs (after files) foreach (sort @dirs2erase) { rmdir "$bc/$_" || warn "rmdir $_: $!\n"; } # remove old .md5sum foreach (sort @dirs) { unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum"); } # create .md5sum my $last_dir = ''; my $md5; foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) { my $dir = dirname($f); my $file = basename($f); #print "$f -- $dir / $file<--\n"; if ($dir ne $last_dir) { close($md5) if ($md5); open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!"; $last_dir = $dir; #print STDERR "writing $last_dir/.md5sum\n"; } print $md5 $file_md5{$f}," $file\n"; } close($md5); # create leatest link #print "ln -s $bc $real_bl\n"; if (-l $bl) { unlink $bl || warn "can't remove old latest symlink $bl: $!\n"; } symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n"; # FIX: sanity check -- remove for speedup xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl); xlog($share,"backup completed..."); } __END__ #------------------------------------------------------------------------- =head1 NAME psinib - Perl Snapshot Is Not Incremental Backup =head1 SYNOPSIS ./psinib.pl =head1 DESCRIPTION This script in current version support just backup of Samba (or Micro$oft Winblowz) shares to central disk space. Central disk space is organized in multiple directories named after: =over 4 =item * server which is sharing files to be backed up =item * name of share on server =item * dated directory named like standard ISO date format (YYYYMMDD). =back In each dated directory you will find I of all files on exported share on that particular date. You can also use symlink I which will lead you to last completed backup. After that you can use some other backup software to transfer I to tape, CD-ROM or some other media. =head2 Design considerations Since taking of share snapshot every day requires a lot of disk space and network bandwidth, B uses several techniques to keep disk usage and network traffic at acceptable level: =over 3 =item - usage of hard-links to provide same files in each snapshot (as opposed to have multiple copies of same file) =item - usage of file size, atime and mtime to find changes of files without transferring whole file over network (just share browsing is transfered over network) =item - usage of C<.md5sum> files (compatible with command-line utility C) to keep file between snapshots hard-linked =back =head1 CONFIGURATION This section is not yet written. =head1 HACKS, TRICKS, BUGS and LIMITATIONS This chapter will have all content that doesn't fit anywhere else. =head2 Can snapshots be more frequent than daily? There is not real reason why you can't take snapshot more often than once a day. Actually, if you are using B to backup Windows workstations you already know that they tend to come-and-go during the day (reboots probably ;-), so running B several times a day increases your chance of having up-to-date backup (B will not make multiple snapshots for same day, nor will it update snapshot for current day if it already exists). However, changing B to produce snapshots which are, for example, hourly is a simple change of C<$DIR_TIME_FMT> which is currently set to C<'%Y%m%d'> (see I documentation for explanation of that format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots (if your network is fast enough, that is...). Also, some of messages in program will sound strange, but other than that it should work. I. =head2 Do I really need to share every directory which I want to snapshot? Actually, no. Due to usage of C module, you can also specify sub-directory inside your share that you want to backup. This feature is most useful if you want to use administrative shares (but, have in mind that you have to enter your Win administrator password in unencrypted file on disk to do that) like this: smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win After that you will get directories with snapshots like: server/c_WinNT_fonts/yyyymmdd/.... =head2 Won't I run out of disk space? Of course you will... Snapshots and logfiles will eventually fill-up your disk. However, you can do two things to stop that: =head3 Clean snapshort older than x days You can add following command to your C crontab: find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \; I assume that C is directory in which are your snapshots and that you don't want to keep snapshots older than 11 days (that's C<-mtime +11> part of command). =head3 Rotate your logs I will leave that to you. I relay on GNU/Debian's C to do it for me. =head2 What are I directories? If there isn't I symlink in snapshot directory, it's preatty safe to assume that previous backup from that day failed. So, that directory will be renamed to I and snapshot will be performed again, linking same files (other alternative would be to erase that dir and find second-oldest directory, but this seemed like more correct approach). =head1 AUTHOR Dobrica Pavlinusic L =head1 LICENSE This product is licensed under GNU Public License (GPL) v2 or later. =cut