textre.pl-txt

  1 #!/usr/bin/perl
  2 # author: seth
  3 # e-mail: for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php 
  4 # description: replaces strings in a text-file or a directory using regexps
  5 # textre means TEXT-REplacing (REcursively) by Regular Expressions
  6 #
  7 # tab-size: 2
  8 
  9 use strict;
 10 use warnings;
 11 use Cwd;
 12 
 13 sub syntaxCheck{
 14 	my @params = @_;
 15 	my @path_splitted = split(/[\/\\]/, reverse($0));
 16 	my $prg_name = reverse($path_splitted[0]);
 17 	my $version = '1.01.20110226';
 18 	my %param_hash;
 19 	# default values
 20 	$param_hash{'filesRE'} = '(\\.bas|\\.bat|\\.c|\\.cc|\\.cgi|\\.cpp|\\.css|\\.csv|\\.f|\\.h|\\.hpp|\\.html?|\\.js|\\.pas|\\.php\\d?|\\.pl|\\.tex|\\.txt|\\.vbs)$';
 21 	$param_hash{'ignorecase'} = 0;	# s///i (findRE)
 22 	$param_hash{'emodifier'} = 0;	  # s///e (findRE)
 23 	$param_hash{'icf'} = 0;					# s///i (filesRE)
 24 	$param_hash{'lowercase'} = 0;		# tr/[A-Z]/[a-z]/ and some umlauts too
 25 	$param_hash{'uppercase'} = 0;		# tr/[a-z]/[A-Z]/ and some umlauts too
 26 	$param_hash{'charwise'} = 0;		# read charwise (not linewise)
 27 	$param_hash{'linesRE'} = '.';   # lines to work at
 28 	$param_hash{'recursively'} = 0;	# search subdirs
 29 	$param_hash{'germanshit'} = 0;	# replace äÄöÖüÜß
 30 	$param_hash{'test'} = 0;				# show result only (without renaming)
 31 	$param_hash{'verbose'} = 1;			# trace; grade of verbosity
 32 	$param_hash{'version'} = 0;			# diplay version and exit
 33 	my $usage = 'replaces strings in a text-file or a directory using regexps
 34 
 35 syntax: '.$prg_name.' findRE replaceRE [options]
 36 
 37   findRE                   text to be replaced
 38   replaceRE                replacement
 39   -f, --filesRE=s          files to search (default="'.$param_hash{'filesRE'}.'")
 40   -c, --charwise           don\'t read files linewise (default), but charwise
 41   -e, --emodifier          use e-modifier in findRE, i.e., s///e
 42   -g, --germanshit         _after_ replacing, convert äÄöÖüÜß to ae, ..., ss
 43   -i, --ignorecase-find    ignore case in findRE
 44   -I, --ignorecase-files   ignore case in filesRE
 45   -l, --lower-case         _after_ replacing, convert _all_ to lower case
 46   -L, --lines=s            replace only in lines s, s is interpreted as a regexp, default = all lines
 47   -u, --upper-case         _after_ replacing, convert _all_ to upper case
 48   -r, --recursively        for search subdirectories recursively
 49   -t, --test               don\'t change anything, just print possible changes
 50   -V, --version            display version and exit.
 51   -q, --silent             same as --verbose=0
 52   -v, --verbose            same as --verbose=1 (default)
 53   -vv,--very-verbose       same as --verbose=2
 54   -v, --verbose=x          grade of verbosity
 55                             x=0: no output
 56                             x=1: default output
 57                             x=2: much output
 58 
 59 examples: '.$prg_name.' "bratwurst" "gruenkohl"
 60 
 61           '.$prg_name.' "Blutwurst" "salat" -l
 62 
 63           '.$prg_name.' "blutwurst" "salat" -i -l  (not the same as above)
 64 
 65           '.$prg_name.' "blutwurst" "salat" -L="^123$"
 66 
 67           '.$prg_name.' "(/d)(/d)" "$2$1" -f="(\\.htm|\\.txt)$" -r'."\n".'
 68 note that in linux you have to use single quotes instead of double quotes.
 69 alternatively you can mask the dollar-signs etc. '."\n";
 70 	my $syntax_correct = 0;
 71 	if(defined($params[1])){
 72 		$param_hash{'findRE'} = shift(@params);
 73 		$param_hash{'replaceRE'} = shift(@params);
 74 		$syntax_correct = (@params==0)? 1 : preparse_options(\@params);
 75 		for(@params){
 76 			if($_ eq '-c' || $_ eq '--charwise'){
 77 				$param_hash{'charwise'} = 1;
 78 				next;
 79 			}
 80 			if($_=~/^-(?:f|-filesRE)=(.*)$/){
 81 				$param_hash{'filesRE'} = $1;
 82 				next;
 83 			}
 84 			if($_ eq '-e' || $_ eq '--emodifier'){
 85 				$param_hash{'emodifier'} = 1;
 86 				next;
 87 			}
 88 			if($_ eq '-g' || $_ eq '--germanshit'){
 89 				$param_hash{'germanshit'} = 1;
 90 				next;
 91 			}
 92 			if($_ eq '-i' || $_ eq '--ignorecase-find'){
 93 				$param_hash{'ignorecase'} = 1;
 94 				next;
 95 			}
 96 			if($_ eq '-I' || $_ eq '--ignorecase-files'){
 97 				$param_hash{'icf'} = 1;
 98 				next;
 99 			}
100 			if($_ eq '-l' || $_ eq '--lower-case'){
101 				$param_hash{'lowercase'} = 1;
102 				die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'uppercase'});
103 				next;
104 			}
105 			if($_=~/^-(?:L|-lines)=(.*)$/){
106 				$param_hash{'linesRE'} = $1;
107 				next;
108 			}
109 			if($_ eq '-r' || $_=~/--recursive(?:ly)?/){
110 				$param_hash{'recursively'} = 1;
111 				next;
112 			}
113 			if($_ eq '-t' || $_ eq '--test'){
114 				$param_hash{'test'} = 1;
115 				next;
116 			}
117 			if($_ eq '-u' || $_ eq '--upper-case'){
118 				$param_hash{'uppercase'} = 1;
119 				die 'error: conversion to lowercase _and_ uppercase not possible!'."\n" if($param_hash{'lowercase'});
120 				next;
121 			}
122 			if($_ eq '-V' || $_ eq '--version'){
123 				$param_hash{'version'} = 1;
124 				next;
125 			}
126 			# verbosity
127 			if($_ eq '-q' || $_ eq '--silent'){
128 				$param_hash{'verbose'} = 0;
129 				next;
130 			}
131 			if($_=~/^-(?:v|-verbose)$/){
132 				$param_hash{'verbose'} = 1;
133 				next;
134 			}
135 			if($_=~/^-(?:vv|-very-verbose)$/){
136 				$param_hash{'verbose'} = 2;
137 				next;
138 			}
139 			if($_=~/^-(?:v|-verbose)=([0123])$/){
140 				$param_hash{'verbose'} = $1;
141 				next;
142 			} # else
143 			$syntax_correct = 0;
144 			last;
145 		}
146 	}
147 	if($param_hash{'version'} || (defined($params[0]) && !defined($params[1]) && $params[0] =~ '^(-V|--version)$')){
148 		my $version_info = 'textre.pl '.$version."\n".'
149 this program is distributed in the hope that it will be useful,
150 but without any warranty; without even the implied warranty of
151 merchantability or fitness for a particular purpose.
152 
153 originally written by (and suggestions to) seth (for e-mail-address see http://www.wg-karlsruhe.de/seth/email_address.php).'."\n";
154 		die $version_info;
155 	}else{
156 		$syntax_correct || die $usage;
157 	}
158 	return %param_hash;
159 }
160 
161 sub preparse_options{
162 	# yeah, i know that at CPAN there already exist many get-opt-modules.
163 	my $unparsed_params = shift;
164 	my @params = ();
165 	my $l = '[a-zA-Z]';           # leading char of long param      (--Xooo)
166 	my $n = '[a-zA-Z-]';          # non-leading char of long param  (--oXXX)
167 	my $p = '.';                  # param of param                  (--oooo=X or -o=X)
168 	my $s = '[a-zA-Z]';           # short params                    (-X)
169 	my $waiting_for_param_param = 0;
170 	my $syntax_check = 1;
171 	my $param_param;
172 	for my $param (@$unparsed_params){
173 		if($param =~ /^--$l$n+(?:=$p+)?\z/){  # long param
174 			push @params, $param;
175 			$waiting_for_param_param = 0;
176 		}elsif($param =~ /-($s*)($s=$p+)\z/){ # short param with param
177 			$param_param = $2;
178 			push @params, grep s/^/-/, split /(?=$s)/, $1;
179 			push @params, '-'.$param_param;
180 			$waiting_for_param_param = 0;
181 		}elsif($param =~ /-$s+\z/){          # short param w/o param
182 			push @params, grep s/^/-/, split /(?=$s)/, substr($param, 1);
183 			$waiting_for_param_param = 1;
184 		}elsif($waiting_for_param_param==1){  # separated param of param
185 			$params[$#params].='='.$param;
186 			$waiting_for_param_param = 0;
187 		}else{
188 			$syntax_check = 0;
189 			last;
190 		}
191 	}
192 	@{$unparsed_params} = @params;
193 	return $syntax_check;
194 }
195 
196 sub log10{
197 	my $n = shift;
198 	return ($n<=0)?0:log($n)/log(10);
199 }
200 sub max{
201 	my $a = shift;
202 	my $b = shift;
203 	return ($a>$b)?$a:$b;
204 }
205 sub vernichte_bloede_deutsche_umlaute_und_sz{
206 	my $str = shift;
207 	$str=~s/ä/ae/g;
208 	$str=~s/Ä/Ae/g;
209 	$str=~s/ö/oe/g;
210 	$str=~s/Ö/Oe/g;
211 	$str=~s/ü/ue/g;
212 	$str=~s/Ü/Ue/g;
213 	$str=~s/ß/ss/g;
214 	return $str;
215 }
216 
217 sub loadFile{
218 	my $infile = shift;
219 	open(INFILE, "<".$infile) || die "error: $!\n";
220 		my @lines = <INFILE>;
221 	close(INFILE);
222 	return @lines;
223 }
224 sub loadFile_charwise{
225 	my $infile = shift;
226 	my $content = '';
227 	open(INFILE, "<".$infile) || die "error: $!\n";
228 		while(!eof(INFILE)){
229 			$content.=getc(INFILE);
230 		}
231 	close(INFILE);
232 	return $content;
233 }
234 sub saveFile{
235 	my $outfile = shift;
236 	print "write file ".$outfile."\n";
237 	open(OUTFILE, ">".$outfile) || die " could not write file. $!\n";
238 		print OUTFILE @_;
239 	close(OUTFILE);
240 	return 1;
241 }
242 
243 # core
244 sub search_file{
245 	my $flag_dir_printed = shift;
246 	my $work_dir = shift;
247 	my $file = shift;
248 	my $params = shift;
249 	my $findRE = $$params{'findRE'};
250 	my $linesRE = $$params{'linesRE'};
251 	my $replaceRE = $$params{'replaceRE'};
252 	my $ignorecase = ($$params{'ignorecase'})?'i':'';
253 	my $emodifier = ($$params{'emodifier'})?'e':'';
254 	my $verbose = $$params{'verbose'};
255 	my $charwise = $$params{'charwise'}; # not used yet
256 	my %counter = ('lines'=>0, 'changed_lines'=>0, 'changes'=>0);
257 
258 	if($charwise==1){
259 		my $file_content = loadFile_charwise($file);
260 		my $file_new_content = '';
261 		my $found_str;
262 		my $len;
263 		my $old_pos = 0;
264 		my $after_matched;
265 		while($ignorecase eq 'i' && $file_content=~/$findRE/ig || $ignorecase ne 'i' && $file_content=~/$findRE/g){
266 			print $work_dir.$file."\n" if($counter{'changes'}==0 && $verbose>0);
267 			$$flag_dir_printed = 1;
268 			++$counter{'changes'};
269 			$found_str = $&;
270 			$after_matched = $';
271 			$len = length($found_str);
272 			$file_new_content.=substr($file_content, $old_pos, pos($file_content)-$len-$old_pos);
273 			$old_pos = pos($file_content);
274 			print ' orig: '.$found_str."\n" if($verbose>0);
275 			eval('$found_str=~s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.$emodifier);
276 			print ' new:  '.$found_str."\n" if($verbose>0);
277 			$file_new_content.=$found_str;
278 		}
279 		if($counter{'changes'}>0){
280 			$file_content = $file_new_content.$after_matched;
281 			$file_content = uc($file_content) if($$params{'uppercase'}==1);
282 			$file_content = lc($file_content) if($$params{'lowercase'}==1);
283 			$file_content = vernichte_bloede_deutsche_umlaute_und_sz($file_content) if($$params{'germanshit'}==1);
284 			print ' '.$counter{'changes'}.' changes'."\n" if($verbose>0);
285 			saveFile($file, $file_content) if($$params{'test'}==0);
286 		}
287 	}else{ # linewise
288 		my @lines = loadFile($file);
289 		my $loglines = int(log10($#lines+1)+1);
290 		for(@lines){
291 			++$counter{'lines'};
292 			if($counter{'lines'} =~ /$linesRE/
293 					&& (
294 					     ($ignorecase eq 'i' && $_=~/$findRE/i)
295 					  || ($ignorecase eq ''  && $_=~/$findRE/ )
296 					)
297 				){
298 				print $work_dir.$file."\n" if($counter{'changed_lines'}==0 && $verbose>0);
299 				$$flag_dir_printed = 1;
300 				printf('%0'.max($loglines, 2).'d: ', $counter{'lines'}) if($verbose>0);
301 				print $_ if($verbose>0);
302 				++$counter{'changed_lines'};
303 				eval('$_ =~ s/'.$findRE.'/'.$replaceRE.'/'.$ignorecase.'g'.$emodifier);
304 				print ' ' x (max(0,$loglines-2)).'->: '.$_ if($verbose>0);
305 			}
306 			$_ = uc($_) if($$params{'uppercase'}==1);
307 			$_ = lc($_) if($$params{'lowercase'}==1);
308 			$_ = vernichte_bloede_deutsche_umlaute_und_sz($_) if($$params{'germanshit'}==1);
309 		}
310 		if($counter{'changed_lines'}>0){
311 			print ' '.$counter{'changed_lines'}.' lines replaced'."\n" if($verbose>0);
312 			saveFile($file, @lines) if($$params{'test'}==0);
313 		}
314 	}
315 	return %counter;
316 }
317 
318 sub text_replacer{
319 	my $counter = shift;
320 	my $working_dir = shift;
321 	my $params = shift;
322 	my $filesRE = $$params{'filesRE'};
323 	my $icf = ($$params{'icf'})?'i':'';
324 	my $recursively = $$params{'recursively'};
325 	my $verbose = $$params{'verbose'};
326 	my $entry;
327 	my @dirs;
328 	print "\n\n".'  '.$working_dir.'/'."\n" if $verbose>1;
329 	opendir(DIR, ".") || die $working_dir.": $!"; # read_dir and generate renaming_array
330 	my $flag_first_search = 1;
331 	my $flag_first_skip = 1;
332 	my $flag_dir_printed = 0;
333 	my %counter_present;
334 	my $work_dir;
335 	my @entries = sort(readdir(DIR)); # cosmetics
336 	closedir(DIR);
337 	for $entry (@entries){
338 		if(-d $entry){
339 			push(@dirs, $entry);
340 		}else{
341 			if( (($icf eq '' && $entry=~/$filesRE/) || ($icf eq 'i' && $entry=~/$filesRE/i)) ){
342 				$work_dir = ($verbose==1 && $flag_dir_printed==0)?"\n".'  '.$working_dir.'/'."\n":"\n";
343 				print 'search:' if $verbose>1 && $flag_first_search++;
344 				print ' "'.$entry.'"' if $verbose>1;
345 				++$$counter{'files'};
346 				%counter_present = &search_file(\$flag_dir_printed, $work_dir, $entry, $params);
347 				$$counter{'changed_files'}+=($counter_present{'changed_lines'}+$counter_present{'changes'}>0);
348 				$$counter{'lines'}+=$counter_present{'lines'};
349 				$$counter{'changed_lines'}+=$counter_present{'changed_lines'};
350 				$$counter{'changes'}+=$counter_present{'changes'};
351 			}else{
352 				print 'skip:' if $verbose>2 && $flag_first_skip++;
353 				print ' "'.$entry.'"' if $verbose>1;
354 			}
355 		}
356 	}
357 	# @dirs = sort(@dirs); # not necessary
358 	if($recursively==1){ # search subdirectories
359 		for(@dirs){
360 			if($_ ne '.' && $_ ne '..'){
361 				chdir($_);
362 				++$$counter{'dir'};
363 				text_replacer($counter, $working_dir.'/'.$_, $params);
364 				chdir('..');
365 			}
366 		}
367 	}
368 }
369 
370 sub stats{
371 	my %counter = %{shift()};
372 	my $params = shift;
373 	print "\n".'stats:'."\n";
374 	print ' searched: '.$counter{'dir'}.' dirs, '.$counter{'files'}.' files';
375 	print ', '.$counter{'lines'}.' lines' if($$params{'charwise'}==0);
376 	print "\n";
377 	print ' changed:  '.$counter{'changed_files'}.' files, ';
378 	print (($$params{'charwise'}==0)? $counter{'changed_lines'}.' lines' : $counter{'changes'}.' places');
379 	print "\n";
380 	print '(case-changing not included)'."\n" if($$params{'lowercase'}|$$params{'uppercase'});
381 }
382 
383 sub text_replacing_using_regexps{
384 	my %params = syntaxCheck(@_);		# command line parameters
385 	my $working_dir = cwd;
386 	my %counter;
387 	$counter{'dir'} = 1;						# number of searched directories
388 	$counter{'files'} = 0;					# number of searched files
389 	$counter{'lines'} = 0;					# number of searched lines
390 	$counter{'changed_files'} = 0;	# number of changed files
391 	$counter{'changed_lines'} = 0;	# number of changed lines
392 	$counter{'changes'} = 0;				# number of changes
393 	text_replacer(\%counter, $working_dir, \%params);
394 	chdir($working_dir);
395 	stats(\%counter, \%params) if $params{'verbose'}>0;
396 }
397 
398 text_replacing_using_regexps(@ARGV);