#! /usr/bin/perl -w # # Written by Adam Byrtek , 2002 # Rewritten by David Sterba , 2009 # # Extfs to handle patches in context and unified diff format. # Known issues: When name of file to patch is modified during editing, # hunk is duplicated on copyin. It is unavoidable. use bytes; use strict; use POSIX; use File::Temp 'tempfile'; # standard binaries my $lzip = 'lzip'; my $lz4 = 'lz4'; my $lzma = 'lzma'; my $xz = 'xz'; my $bzip = 'bzip2'; my $gzip = 'gzip'; my $fileutil = 'file -b'; # date parsing requires Date::Parse from TimeDate module my $parsedates = eval 'require Date::Parse'; # regular expressions my $unified_header=qr/^--- .*\t.*\n\+\+\+ .*\t.*\n$/; my $unified_extract=qr/^--- ([^\t]+).*\n\+\+\+ ([^\t]+)\s*(.*)\n/; my $unified_header2=qr/^--- .*\n\+\+\+ .*\n$/; my $unified_extract2=qr/^--- ([^\s]+).*\n\+\+\+ ([^\s]+)\s*(.*)\n/; my $unified_contents=qr/^([+\-\\ \n]|@@ .* @@)/; my $unified_hunk=qr/@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@.*\n/; my $context_header=qr/^\*\*\* .*\t.*\n--- .*\t.*\n$/; my $context_extract=qr/^\*\*\* ([^\t]+).*\n--- ([^\t]+)\s*(.*)\n/; my $context_header2=qr/^\*\*\* .*\n--- .*\n$/; my $context_extract2=qr/^\*\*\* ([^\s]+).*\n--- ([^\s]+)\s*(.*)\n/; my $context_contents=qr/^([!+\-\\ \n]|-{3} .* -{4}|\*{3} .* \*{4}|\*{15})/; my $ls_extract_id=qr/^[^\s]+\s+[^\s]+\s+([^\s]+)\s+([^\s]+)/; my $basename=qr|^(.*/)*([^/]+)$|; sub patchfs_canonicalize_path ($) { my ($fname) = @_; $fname =~ s,/+,/,g; $fname =~ s,(^|/)(?:\.?\./)+,$1,; return $fname; } # output unix date in a mc-readable format sub timef { my @time=localtime($_[0]); return sprintf '%02d-%02d-%02d %02d:%02d', $time[4]+1, $time[3], $time[5]+1900, $time[2], $time[1]; } # parse given string as a date and return unix time sub datetime { # in case of problems fall back to 0 in unix time # note: str2time interprets some wrong values (eg. " ") as 'today' if ($parsedates && defined (my $t=str2time($_[0]))) { return timef($t); } return timef(time); } # print message on stderr and exit sub error { print STDERR $_[0], "\n"; exit 1; } # (compressed) input sub myin { my ($qfname)=(quotemeta $_[0]); $_=`$fileutil $qfname`; if (/^'*lz4/) { return "$lz4 -dc $qfname"; } elsif (/^'*lzip/) { return "$lzip -dc $qfname"; } elsif (/^'*lzma/) { return "$lzma -dc $qfname"; } elsif (/^'*xz/) { return "$xz -dc $qfname"; } elsif (/^'*bzip/) { return "$bzip -dc $qfname"; } elsif (/^'*gzip/) { return "$gzip -dc $qfname"; } else { return "cat $qfname"; } } # (compressed) output sub myout { my ($qfname,$append)=(quotemeta $_[0],$_[1]); my ($sep) = $append ? '>>' : '>'; $_=`$fileutil $qfname`; if (/^'*lz4/) { return "$lz4 -c $sep $qfname"; } elsif (/^'*lzip/) { return "$lzip -c $sep $qfname"; } elsif (/^'*lzma/) { return "$lzma -c $sep $qfname"; } elsif (/^'*xz/) { return "$xz -c $sep $qfname"; } elsif (/^'*bzip/) { return "$bzip -c $sep $qfname"; } elsif (/^'*gzip/) { return "$gzip -c $sep $qfname"; } else { return "cat $sep $qfname"; } } # select diff filename conforming with rules found in diff.info sub diff_filename { my ($fsrc,$fdst)= @_; # TODO: can remove these two calls later $fsrc = patchfs_canonicalize_path ($fsrc); $fdst = patchfs_canonicalize_path ($fdst); if (!$fdst && !$fsrc) { error 'Index: not yet implemented'; } elsif (!$fsrc || $fsrc eq '/dev/null') { return ($fdst,'PATCH-CREATE/'); } elsif (!$fdst || $fdst eq '/dev/null') { return ($fsrc,'PATCH-REMOVE/'); } elsif (($fdst eq '/dev/null') && ($fsrc eq '/dev/null')) { error 'Malformed diff, missing a sane filename'; } else { # fewest path name components if ($fdst=~s|/|/|g < $fsrc=~s|/|/|g) { return ($fdst,''); } elsif ($fdst=~s|/|/|g > $fsrc=~s|/|/|g) { return ($fsrc,''); } else { # shorter base name if (($fdst=~/$basename/o,length $2) < ($fsrc=~/$basename/o,length $2)) { return ($fdst,''); } elsif (($fdst=~/$basename/o,length $2) > ($fsrc=~/$basename/o,length $2)) { return ($fsrc,''); } else { # shortest names if (length $fdst < length $fsrc) { return ($fdst,''); } else { return ($fsrc,''); } } } } } # IN: diff "archive" name # IN: file handle for output; STDIN for list, tempfile else # IN: filename to watch (for: copyout, rm), '' for: list # IN: remove the file? # true - ... and print out the rest # false - ie. copyout mode, print just the file sub parse($$$$) { my $archive=quotemeta shift; my $fh=shift; my $file=shift; my $rmmod=shift; my ($state,$fsize,$time); my ($f,$fsrc,$fdst,$prefix); my ($unified,$context); my ($skipread, $filetoprint, $filefound); my ($h_add,$h_del,$h_ctx); # hunk line counts my ($h_r1,$h_r2); # hunk ranges my @outsrc; # if desired ... my @outdst; my $line; my %fmap_size=(); my %fmap_time=(); import Date::Parse if ($parsedates && $file eq ''); $line=1; $state=0; $fsize=0; $f=''; $filefound=0; while ($skipread || ($line++,$_=)) { $skipread=0; if($state == 0) { # expecting comments $unified=$context=0; $unified=1 if (/^--- /); $context=1 if (/^\*\*\* /); if (!$unified && !$context) { $filefound=0 if($file ne '' && $filetoprint); # shortcut for rmmod xor filefound # - in rmmod we print if not found # - in copyout (!rmmod) we print if found print $fh $_ if($rmmod != $filefound); next; } if($file eq '' && $filetoprint) { $fmap_size{"$prefix$f"}+=$fsize; $fmap_time{"$prefix$f"}=$time; } # start of new file $_ .=; # steal next line, both formats $line++; if($unified) { if(/$unified_header/o) { ($fsrc,$fdst,$time) = /$unified_extract/o; } elsif(/$unified_header2/o) { ($fsrc,$fdst,$time) = /$unified_extract2/o; } else { error "Can't parse unified diff header"; } } elsif($context) { if(/$context_header/o) { ($fsrc,$fdst,$time) = /$context_extract/o; } elsif(/$context_header2/o) { ($fsrc,$fdst,$time) = /$context_extract2/o; } else { error "Can't parse context diff header"; } } else { error "Unrecognized diff header"; } $fsrc=patchfs_canonicalize_path($fsrc); $fdst=patchfs_canonicalize_path($fdst); if(wantarray) { push @outsrc,$fsrc; push @outdst,$fdst; } ($f,$prefix)=diff_filename($fsrc,$fdst); $filefound=($f eq $file); $f="$f.diff"; $filetoprint=1; $fsize=length; print $fh $_ if($rmmod != $filefound); $state=1; } elsif($state == 1) { # expecting diff hunk headers, end of file or comments if($unified) { my ($a,$b,$c,$d); ($a,$b,$h_r1,$c,$d,$h_r2)=/$unified_hunk/o; if(!defined($a) || !defined($c)) { # hunk header does not come, a comment inside # or maybe a new file, state 0 will decide $skipread=1; $state=0; next; } $fsize+=length; print $fh $_ if($rmmod != $filefound); $h_r1=1 if(!defined($b)); $h_r2=1 if(!defined($d)); $h_add=$h_del=$h_ctx=0; $state=2; } elsif($context) { if(!/$context_contents/o) { $skipread=1; $state=0; next; } print $fh $_ if($rmmod != $filefound); $fsize+=length; } } elsif($state == 2) { # expecting hunk contents if($h_del + $h_ctx == $h_r1 && $h_add + $h_ctx == $h_r2) { # hooray, end of hunk # we optimistically ended with a hunk before but # the line has been read already $skipread=1; $state=1; next; } print $fh $_ if($rmmod != $filefound); $fsize+=length; my ($first)= /^(.)/; if(ord($first) == ord('+')) { $h_add++; } elsif(ord($first) == ord('-')) { $h_del++; } elsif(ord($first) == ord(' ')) { $h_ctx++; } elsif(ord($first) == ord('\\')) { 0; } elsif(ord($first) == ord('@')) { error "Malformed hunk, header came too early"; } else { error "Unrecognized character in hunk"; } } } if($file eq '' && $filetoprint) { $fmap_size{"$prefix$f"}+=$fsize; $fmap_time{"$prefix$f"}=$time; } # use uid and gid from file my ($uid,$gid)=(`ls -l $archive`=~/$ls_extract_id/o); # flush all file names with cumulative file size while(my ($fn, $fs) = each %fmap_size) { printf $fh "-rw-r--r-- 1 %s %s %d %s %s\n", $uid, $gid, $fs, datetime($fmap_time{$fn}), $fn; } close($fh) if($file ne ''); return \(@outsrc, @outdst) if wantarray; } # list files affected by patch sub list($) { parse($_[0], *STDOUT, '', 0); close(I); } # extract diff from patch # IN: diff file to find # IN: output file name sub copyout($$) { my ($file,$out)=@_; $file=~s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/; $file = patchfs_canonicalize_path ($file); open(FH, ">$out") or error("Cannot open output file"); parse('', *FH, $file, 0); } # remove diff(s) from patch # IN: archive # IN: file to delete sub rm($$) { my $archive=shift; my ($tmp,$tmpname)=tempfile(); @_=map {scalar(s/^(PATCH-(CREATE|REMOVE)\/)?(.*)\.diff$/$3/,$_)} @_; # just the first file for now parse($archive, $tmp, $_[0], 1); close I; # replace archive system("cat \Q$tmpname\E | " . myout($archive,0))==0 or error "Can't write to archive"; system("rm -f -- \Q$tmpname\E"); } # append diff to archive # IN: diff archive name # IN: newly created file name in archive # IN: the real source file sub copyin($$$) { # TODO: seems to be tricky. what to do? # copyin of file which is already there may: # * delete the original and copy only the new # * just append the new hunks to the same file # problems: may not be a valid diff, unmerged hunks # * try to merge the two together # ... but we do not want write patchutils again, right? error "Copying files into diff not supported"; return; my ($archive,$name,$src)=@_; # in case we are appending another diff, we have # to delete/merge all the files open(DEVNULL, ">/dev/null"); open I, myin($src).'|'; my ($srclist,$dstlist)=parse($archive, *DEVNULL, '', 0); close(I); close(DEVNULL); foreach(@$srclist) { print("SRC: del $_\n"); } foreach(@$dstlist) { print("DST: del $_\n"); } return; # remove overwritten file open I, myin($archive).'|'; rm ($archive, $name); close I; my $cmd1=myin("$src.diff"); my $cmd2=myout($archive,1); system("$cmd1 | $cmd2")==0 or error "Can't write to archive"; } my $fin = $ARGV[1]; # resolve symlink while (-l $fin) { $fin = readlink $fin; } if ($ARGV[0] eq 'list') { open I, myin($fin).'|'; list ($fin); exit 0; } elsif ($ARGV[0] eq 'copyout') { open I, myin($fin)."|"; copyout ($ARGV[2], $ARGV[3]); exit 0; } elsif ($ARGV[0] eq 'rm') { open I, myin($fin)."|"; rm ($fin, $ARGV[2]); exit 0; } elsif ($ARGV[0] eq 'rmdir') { exit 0; } elsif ($ARGV[0] eq 'mkdir') { exit 0; } elsif ($ARGV[0] eq 'copyin') { copyin ($fin, $ARGV[2], $ARGV[3]); exit 0; } exit 1;