1 #!/usr/bin/perl 2 # author: seth 3 # e-mail: for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php 4 # description: replaces strings in a text-file or a directory using regexps 5 # textre means TEXT-REplacing (REcursively) by Regular Expressions 6 # 7 # tab-size: 2 8 9 use strict; 10 use warnings; 11 use Cwd; 12 13 sub syntaxCheck{ 14 my @params = @_; 15 my @path_splitted = split(/[\/\\]/, reverse($0)); 16 my $prg_name = reverse($path_splitted[0]); 17 my $version = '1.01.20110226'; 18 my %param_hash; 19 # default values 20 $param_hash{'filesRE'} = '(\\.bas|\\.bat|\\.c|\\.cc|\\.cgi|\\.cpp|\\.css|\\.csv|\\.f|\\.h|\\.hpp|\\.html?|\\.js|\\.pas|\\.php\\d?|\\.pl|\\.tex|\\.txt|\\.vbs)$'; 21 $param_hash{'ignorecase'} = 0; # s///i (findRE) 22 $param_hash{'emodifier'} = 0; # s///e (findRE) 23 $param_hash{'icf'} = 0; # s///i (filesRE) 24 $param_hash{'lowercase'} = 0; # tr/[A-Z]/[a-z]/ and some umlauts too 25 $param_hash{'uppercase'} = 0; # tr/[a-z]/[A-Z]/ and some umlauts too 26 $param_hash{'charwise'} = 0; # read charwise (not linewise) 27 $param_hash{'linesRE'} = '.'; # lines to work at 28 $param_hash{'recursively'} = 0; # search subdirs 29 $param_hash{'germanshit'} = 0; # replace äÄöÖüÜß 30 $param_hash{'test'} = 0; # show result only (without renaming) 31 $param_hash{'verbose'} = 1; # trace; grade of verbosity 32 $param_hash{'version'} = 0; # diplay version and exit 33 my $usage = 'replaces strings in a text-file or a directory using regexps 34 35 syntax: '.$prg_name.' findRE replaceRE [options] 36 37 findRE text to be replaced 38 replaceRE replacement 39 -f, --filesRE=s files to search (default="'.$param_hash{'filesRE'}.'") 40 -c, --charwise don\'t read files linewise (default), but charwise 41 -e, --emodifier use e-modifier in findRE, i.e., s///e 42 -g, --germanshit _after_ replacing, convert äÄöÖüÜß to ae, ..., ss 43 -i, --ignorecase-find ignore case in findRE 44 -I, --ignorecase-files ignore case in filesRE 45 -l, --lower-case _after_ replacing, convert _all_ to lower case 46 -L, --lines=s replace only in lines s, s is interpreted as a regexp, default = all lines 47 -u, --upper-case _after_ replacing, convert _all_ to upper case 48 -r, --recursively for search subdirectories recursively 49 -t, --test don\'t change anything, just print possible changes 50 -V, --version display version and exit. 51 -q, --silent same as --verbose=0 52 -v, --verbose same as --verbose=1 (default) 53 -vv,--very-verbose same as --verbose=2 54 -v, --verbose=x grade of verbosity 55 x=0: no output 56 x=1: default output 57 x=2: much output 58 59 examples: '.$prg_name.' "bratwurst" "gruenkohl" 60 61 '.$prg_name.' "Blutwurst" "salat" -l 62 63 '.$prg_name.' "blutwurst" "salat" -i -l (not the same as above) 64 65 '.$prg_name.' "blutwurst" "salat" -L="^123$" 66 67 '.$prg_name.' "(/d)(/d)" "$2$1" -f="(\\.htm|\\.txt)$" -r'."\n".' 68 note that in linux you have to use single quotes instead of double quotes. 69 alternatively you can mask the dollar-signs etc. '."\n"; 70 my $syntax_correct = 0; 71 if(defined($params[1])){ 72 $param_hash{'findRE'} = shift(@params); 73 $param_hash{'replaceRE'} = shift(@params); 74 $syntax_correct = (@params==0)? 1 : preparse_options(\@params); 75 for(@params){ 76 if($_ eq '-c' || $_ eq '--charwise'){ 77 $param_hash{'charwise'} = 1; 78 next; 79 } 80 if($_=~/^-(?:f|-filesRE)=(.*)$/){ 81 $param_hash{'filesRE'} = $1; 82 next; 83 } 84 if($_ eq '-e' || $_ eq '--emodifier'){ 85 $param_hash{'emodifier'} = 1; 86 next; 87 } 88 if($_ eq '-g' || $_ eq '--germanshit'){ 89 $param_hash{'germanshit'} = 1; 90 next; 91 } 92 if($_ eq '-i' || $_ eq '--ignorecase-find'){ 93 $param_hash{'ignorecase'} = 1; 94 next; 95 } 96 if($_ eq '-I' || $_ eq '--ignorecase-files'){ 97 $param_hash{'icf'} = 1; 98 next; 99 } 100 if($_ eq '-l' || $_ eq '--lower-case'){ 101 $param_hash{'lowercase'} = 1; 102 die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'uppercase'}); 103 next; 104 } 105 if($_=~/^-(?:L|-lines)=(.*)$/){ 106 $param_hash{'linesRE'} = $1; 107 next; 108 } 109 if($_ eq '-r' || $_=~/--recursive(?:ly)?/){ 110 $param_hash{'recursively'} = 1; 111 next; 112 } 113 if($_ eq '-t' || $_ eq '--test'){ 114 $param_hash{'test'} = 1; 115 next; 116 } 117 if($_ eq '-u' || $_ eq '--upper-case'){ 118 $param_hash{'uppercase'} = 1; 119 die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'lowercase'}); 120 next; 121 } 122 if($_ eq '-V' || $_ eq '--version'){ 123 $param_hash{'version'} = 1; 124 next; 125 } 126 # verbosity 127 if($_ eq '-q' || $_ eq '--silent'){ 128 $param_hash{'verbose'} = 0; 129 next; 130 } 131 if($_=~/^-(?:v|-verbose)$/){ 132 $param_hash{'verbose'} = 1; 133 next; 134 } 135 if($_=~/^-(?:vv|-very-verbose)$/){ 136 $param_hash{'verbose'} = 2; 137 next; 138 } 139 if($_=~/^-(?:v|-verbose)=([0123])$/){ 140 $param_hash{'verbose'} = $1; 141 next; 142 } # else 143 $syntax_correct = 0; 144 last; 145 } 146 } 147 if($param_hash{'version'} || (defined($params[0]) && !defined($params[1]) && $params[0] =~ '^(-V|--version)$')){ 148 my $version_info = 'textre.pl '.$version."\n".' 149 this program is distributed in the hope that it will be useful, 150 but without any warranty; without even the implied warranty of 151 merchantability or fitness for a particular purpose. 152 153 originally written by (and suggestions to) seth (for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php).'."\n"; 154 die $version_info; 155 }else{ 156 $syntax_correct || die $usage; 157 } 158 return %param_hash; 159 } 160 161 sub preparse_options{ 162 # yeah, i know that at CPAN there already exist many get-opt-modules. 163 my $unparsed_params = shift; 164 my @params = (); 165 my $l = '[a-zA-Z]'; # leading char of long param (--Xooo) 166 my $n = '[a-zA-Z-]'; # non-leading char of long param (--oXXX) 167 my $p = '.'; # param of param (--oooo=X or -o=X) 168 my $s = '[a-zA-Z]'; # short params (-X) 169 my $waiting_for_param_param = 0; 170 my $syntax_check = 1; 171 my $param_param; 172 for my $param (@$unparsed_params){ 173 if($param =~ /^--$l$n+(?:=$p+)?\z/){ # long param 174 push @params, $param; 175 $waiting_for_param_param = 0; 176 }elsif($param =~ /-($s*)($s=$p+)\z/){ # short param with param 177 $param_param = $2; 178 push @params, grep s/^/-/, split /(?=$s)/, $1; 179 push @params, '-'.$param_param; 180 $waiting_for_param_param = 0; 181 }elsif($param =~ /-$s+\z/){ # short param w/o param 182 push @params, grep s/^/-/, split /(?=$s)/, substr($param, 1); 183 $waiting_for_param_param = 1; 184 }elsif($waiting_for_param_param==1){ # separated param of param 185 $params[$#params].='='.$param; 186 $waiting_for_param_param = 0; 187 }else{ 188 $syntax_check = 0; 189 last; 190 } 191 } 192 @{$unparsed_params} = @params; 193 return $syntax_check; 194 } 195 196 sub log10{ 197 my $n = shift; 198 return ($n<=0)?0:log($n)/log(10); 199 } 200 sub max{ 201 my $a = shift; 202 my $b = shift; 203 return ($a>$b)?$a:$b; 204 } 205 sub vernichte_bloede_deutsche_umlaute_und_sz{ 206 my $str = shift; 207 $str=~s/ä/ae/g; 208 $str=~s/Ä/Ae/g; 209 $str=~s/ö/oe/g; 210 $str=~s/Ö/Oe/g; 211 $str=~s/ü/ue/g; 212 $str=~s/Ü/Ue/g; 213 $str=~s/ß/ss/g; 214 return $str; 215 } 216 217 sub loadFile{ 218 my $infile = shift; 219 open(INFILE, "<".$infile) || die "error: $!\n"; 220 my @lines = <INFILE>; 221 close(INFILE); 222 return @lines; 223 } 224 sub loadFile_charwise{ 225 my $infile = shift; 226 my $content = ''; 227 open(INFILE, "<".$infile) || die "error: $!\n"; 228 while(!eof(INFILE)){ 229 $content.=getc(INFILE); 230 } 231 close(INFILE); 232 return $content; 233 } 234 sub saveFile{ 235 my $outfile = shift; 236 print "write file ".$outfile."\n"; 237 open(OUTFILE, ">".$outfile) || die " could not write file. $!\n"; 238 print OUTFILE @_; 239 close(OUTFILE); 240 return 1; 241 } 242 243 # core 244 sub search_file{ 245 my $flag_dir_printed = shift; 246 my $work_dir = shift; 247 my $file = shift; 248 my $params = shift; 249 my $findRE = $$params{'findRE'}; 250 my $linesRE = $$params{'linesRE'}; 251 my $replaceRE = $$params{'replaceRE'}; 252 my $ignorecase = ($$params{'ignorecase'})?'i':''; 253 my $emodifier = ($$params{'emodifier'})?'e':''; 254 my $verbose = $$params{'verbose'}; 255 my $charwise = $$params{'charwise'}; # not used yet 256 my %counter = ('lines'=>0, 'changed_lines'=>0, 'changes'=>0); 257 258 if($charwise==1){ 259 my $file_content = loadFile_charwise($file); 260 my $file_new_content = ''; 261 my $found_str; 262 my $len; 263 my $old_pos = 0; 264 my $after_matched; 265 while($ignorecase eq 'i' && $file_content=~/$findRE/ig || $ignorecase ne 'i' && $file_content=~/$findRE/g){ 266 print $work_dir.$file."\n" if($counter{'changes'}==0 && $verbose>0); 267 $$flag_dir_printed = 1; 268 ++$counter{'changes'}; 269 $found_str = $&; 270 $after_matched = $'; 271 $len = length($found_str); 272 $file_new_content.=substr($file_content, $old_pos, pos($file_content)-$len-$old_pos); 273 $old_pos = pos($file_content); 274 print ' orig: '.$found_str."\n" if($verbose>0); 275 eval('$found_str=~s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.$emodifier); 276 print ' new: '.$found_str."\n" if($verbose>0); 277 $file_new_content.=$found_str; 278 } 279 if($counter{'changes'}>0){ 280 $file_content = $file_new_content.$after_matched; 281 $file_content = uc($file_content) if($$params{'uppercase'}==1); 282 $file_content = lc($file_content) if($$params{'lowercase'}==1); 283 $file_content = vernichte_bloede_deutsche_umlaute_und_sz($file_content) if($$params{'germanshit'}==1); 284 print ' '.$counter{'changes'}.' changes'."\n" if($verbose>0); 285 saveFile($file, $file_content) if($$params{'test'}==0); 286 } 287 }else{ # linewise 288 my @lines = loadFile($file); 289 my $loglines = int(log10($#lines+1)+1); 290 for(@lines){ 291 ++$counter{'lines'}; 292 if($counter{'lines'} =~ /$linesRE/ 293 && ( 294 ($ignorecase eq 'i' && $_=~/$findRE/i) 295 || ($ignorecase eq '' && $_=~/$findRE/ ) 296 ) 297 ){ 298 print $work_dir.$file."\n" if($counter{'changed_lines'}==0 && $verbose>0); 299 $$flag_dir_printed = 1; 300 printf('%0'.max($loglines, 2).'d: ', $counter{'lines'}) if($verbose>0); 301 print $_ if($verbose>0); 302 ++$counter{'changed_lines'}; 303 eval('$_ =~ s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.'g'.$emodifier); 304 print ' ' x (max(0,$loglines-2)).'->: '.$_ if($verbose>0); 305 } 306 $_ = uc($_) if($$params{'uppercase'}==1); 307 $_ = lc($_) if($$params{'lowercase'}==1); 308 $_ = vernichte_bloede_deutsche_umlaute_und_sz($_) if($$params{'germanshit'}==1); 309 } 310 if($counter{'changed_lines'}>0){ 311 print ' '.$counter{'changed_lines'}.' lines replaced'."\n" if($verbose>0); 312 saveFile($file, @lines) if($$params{'test'}==0); 313 } 314 } 315 return %counter; 316 } 317 318 sub text_replacer{ 319 my $counter = shift; 320 my $working_dir = shift; 321 my $params = shift; 322 my $filesRE = $$params{'filesRE'}; 323 my $icf = ($$params{'icf'})?'i':''; 324 my $recursively = $$params{'recursively'}; 325 my $verbose = $$params{'verbose'}; 326 my $entry; 327 my @dirs; 328 print "\n\n".' '.$working_dir.'/'."\n" if $verbose>1; 329 opendir(DIR, ".") || die $working_dir.": $!"; # read_dir and generate renaming_array 330 my $flag_first_search = 1; 331 my $flag_first_skip = 1; 332 my $flag_dir_printed = 0; 333 my %counter_present; 334 my $work_dir; 335 my @entries = sort(readdir(DIR)); # cosmetics 336 closedir(DIR); 337 for $entry (@entries){ 338 if(-d $entry){ 339 push(@dirs, $entry); 340 }else{ 341 if( (($icf eq '' && $entry=~/$filesRE/) || ($icf eq 'i' && $entry=~/$filesRE/i)) ){ 342 $work_dir = ($verbose==1 && $flag_dir_printed==0)?"\n".' '.$working_dir.'/'."\n":"\n"; 343 print 'search:' if $verbose>1 && $flag_first_search++; 344 print ' "'.$entry.'"' if $verbose>1; 345 ++$$counter{'files'}; 346 %counter_present = &search_file(\$flag_dir_printed, $work_dir, $entry, $params); 347 $$counter{'changed_files'}+=($counter_present{'changed_lines'}+$counter_present{'changes'}>0); 348 $$counter{'lines'}+=$counter_present{'lines'}; 349 $$counter{'changed_lines'}+=$counter_present{'changed_lines'}; 350 $$counter{'changes'}+=$counter_present{'changes'}; 351 }else{ 352 print 'skip:' if $verbose>2 && $flag_first_skip++; 353 print ' "'.$entry.'"' if $verbose>1; 354 } 355 } 356 } 357 # @dirs = sort(@dirs); # not necessary 358 if($recursively==1){ # search subdirectories 359 for(@dirs){ 360 if($_ ne '.' && $_ ne '..'){ 361 chdir($_); 362 ++$$counter{'dir'}; 363 text_replacer($counter, $working_dir.'/'.$_, $params); 364 chdir('..'); 365 } 366 } 367 } 368 } 369 370 sub stats{ 371 my %counter = %{shift()}; 372 my $params = shift; 373 print "\n".'stats:'."\n"; 374 print ' searched: '.$counter{'dir'}.' dirs, '.$counter{'files'}.' files'; 375 print ', '.$counter{'lines'}.' lines' if($$params{'charwise'}==0); 376 print "\n"; 377 print ' changed: '.$counter{'changed_files'}.' files, '; 378 print (($$params{'charwise'}==0)? $counter{'changed_lines'}.' lines' : $counter{'changes'}.' places'); 379 print "\n"; 380 print '(case-changing not included)'."\n" if($$params{'lowercase'}|$$params{'uppercase'}); 381 } 382 383 sub text_replacing_using_regexps{ 384 my %params = syntaxCheck(@_); # command line parameters 385 my $working_dir = cwd; 386 my %counter; 387 $counter{'dir'} = 1; # number of searched directories 388 $counter{'files'} = 0; # number of searched files 389 $counter{'lines'} = 0; # number of searched lines 390 $counter{'changed_files'} = 0; # number of changed files 391 $counter{'changed_lines'} = 0; # number of changed lines 392 $counter{'changes'} = 0; # number of changes 393 text_replacer(\%counter, $working_dir, \%params); 394 chdir($working_dir); 395 stats(\%counter, \%params) if $params{'verbose'}>0; 396 } 397 398 text_replacing_using_regexps(@ARGV);