1 #!/usr/local/bin/perl -w
  2 # author: seth
  3 # email: email_software@wg-karlsruhe.de
  4 # description: renames file using html-file xor id3-tags
  5 # todo-list:
  6 #  - support of noalbum, (--noalbum = do not search for a whole album)
  7 
  8 use MP3::Tag;
  9 use Cwd;
 10 use strict;
 11 use File::Copy;
 12 my $html_file_path='C:/Dokumente und Einstellungen/Administrator/Eigene Dateien/musik-db/music_list.htm';
 13 
 14 sub syntaxCheck{
 15 	my @params=@_;
 16 	my @path_splitted=split(/[\/\\]/, reverse($0));
 17 	my $prg_name=reverse($path_splitted[0]);
 18 	my $version='0.95.20070324';
 19 	my $usage='renames file using html-file xor id3-tags
 20 
 21 usage: '.$prg_name.' filesRE [options]
 22 
 23   filesRE               files to rename (use regular expressions!)
 24   -r,   --recursively   for searching subdirectories recursively
 25         --v[erbose]=x   verbose (x=0: no output, x=1: default output, x=2: much output)
 26   -t,   --test          don\'t change anything, just print possible changes
 27   -s,   --sortToDir     makes (and moves files to) directory artist/album/*
 28   -V,   --version       display version and exit.
 29         --htmlfile=x    use other path than default (x="path to html_music_list")
 30         --notrackno     do not use trackno in filename
 31   -u    --useid3        use id3-tags (don\'t search html-file, but id3v1 and id3v2)
 32         --useid3v1      use id3-tags (don\'t search html-file, but id3v1 only)
 33         --useid3v2      use id3-tags (don\'t search html-file, but id3v2 only)
 34   forced info:          may be (multiple) chosen from the following case-sensitive list
 35         --TPE1=s        artist (default s=..)
 36         --TALB=s        album (default s=.)
 37                    s is a string
 38                    s=. means "use current dir name"
 39                    s=.. means "use parent dir name"
 40                    (there are no bands called . or ..)
 41 
 42 examples: .../artist/album/'.$prg_name.' "^\\d\\d\\.mp3$"
 43              renames all files "xx.mp3" to "xx artist - track.mp3"
 44 
 45           .../artist/'.$prg_name.' "\\.mp3$" --TPE1=. --TALB="dick und doof"
 46              renames all files "*.mp3" to "xx artist - track.mp3"'."\n";
 47 	my $syntax_correct=0;
 48 	my %param_hash;
 49 	$param_hash{'noalbum'}=0;
 50 	$param_hash{'notrackno'}=0;
 51 	$param_hash{'recursively'}=0;
 52 	$param_hash{'test'}=0;
 53 	$param_hash{'sortToDir'}=0;
 54 	$param_hash{'useid3'}=0;
 55 	$param_hash{'useid3v1'}=0;
 56 	$param_hash{'useid3v2'}=0;
 57 	$param_hash{'verbose'}=1;
 58 	$param_hash{'version'}=0;
 59 	$param_hash{'TPE1'}='..';
 60 	$param_hash{'TALB'}='.';
 61 	if(defined($params[0])){
 62 		$param_hash{'filesRA'}=shift(@params);
 63 		$syntax_correct=1;
 64 		$syntax_correct=0 if $param_hash{'filesRA'}=~/^(-?-h|\/\?)$/; # if somebody trys -h or --h or /? for help
 65 		foreach(@params){
 66 			if($_=~/^-[^-]./){
 67 				while(length($_)>2){
 68 					push(@params, '-'.substr($_, 2, 1));
 69 					$_=substr($_, 0, 2).((length($_)>3)?substr($_, 3):'');
 70 				}
 71 			}
 72 			if($_ eq '--noalbum'){
 73 				$param_hash{'noalbum'}=1;
 74 				next;
 75 			}
 76 			if($_ eq '--notrackno'){
 77 				$param_hash{'notrackno'}=1;
 78 				next;
 79 			}
 80 			if($_ eq '-r' || $_ eq '--recursively'){
 81 				$param_hash{'recursively'}=1;
 82 				next;
 83 			}
 84 			if($_ eq '-t' || $_ eq '--test'){
 85 				$param_hash{'test'}=1;
 86 				next;
 87 			}
 88 			if($_ eq '-s' || $_ eq '--sortToDir'){
 89 				$param_hash{'sortToDir'}=1;
 90 				next;
 91 			}
 92 			if($_ eq '-u' || $_ eq '--useid3'){
 93 				$param_hash{'useid3'}=1;
 94 				next;
 95 			}
 96 			if($_ eq '--useid3v1'){
 97 				$param_hash{'useid3v1'}=1;
 98 				next;
 99 			}
100 			if($_ eq '--useid3v2'){
101 				$param_hash{'useid3v2'}=1;
102 				next;
103 			}
104 			if($_=~/^--v(erbose)?=([0123])$/){
105 				$param_hash{'verbose'}=$2;
106 				next;
107 			}
108 			if($_ eq '-V' || $_ eq '--version'){
109 				$param_hash{'version'}=1;
110 				next;
111 			}
112 			if($_=~/^--htmlfile=(.+)$/){
113 				$html_file_path=$1;
114 				next;
115 			}
116 			if($_=~/^--(T[A-Z0-9]{3})=(.*)$/){
117 				$param_hash{$1}=$2;
118 			}else{
119 				$syntax_correct=0;
120 				last;
121 			}
122 		}
123 	}
124 	$syntax_correct=0 if 1<$param_hash{'useid3'}+$param_hash{'useid3v1'}+$param_hash{'useid3v2'};
125 	if($param_hash{'version'} || ($param_hash{'filesRA'} && ($param_hash{'filesRA'} eq '-V' || $param_hash{'filesRA'} eq '--version'))){
126 		my $version_info='ren_mp3.pl '.$version."\n".'
127 this program is distributed in the hope that it will be useful,
128 but without any warranty; without even the implied warranty of
129 merchantability or fitness for a particular purpose.
130 
131 originally written by seth <email_software@wg-karlsruhe.de>.'."\n";
132 		die $version_info;
133 	}
134 	else{
135 		$syntax_correct || die $usage;
136 	}
137 	return %param_hash;
138 }
139 
140 sub get_info_from_id3{
141 	my $file = shift;
142 	my $verbose = shift;
143 	my $notrackno = shift;
144 	my $mp3 = MP3::Tag->new($file);
145 	$mp3->config('autoinfo', @_);
146 	my @info = $mp3->autoinfo();
147 	$info[1] =~ s~/.*~~g;
148 	$info[1] = '0'.$info[1] if(length($info[1])==1);
149 	my $result_filename = $info[1].' '.$info[2].' - '.$info[0];
150 	print '   ('.$info[3].') '.$result_filename if $verbose>1;
151 	$result_filename = $info[2].' - '.$info[0] if $notrackno==1;
152 	if(exists $mp3->{ID3v2}){
153 		if($mp3->{ID3v2}->get_frame("TYER")){
154 			push(@info, $mp3->{ID3v2}->get_frame("TYER"));
155 			print ' ('.$info[4].')' if $verbose>1;
156 		}
157 	}
158 	$result_filename=subst_disallowed_chars($result_filename, '_');
159 	print "\n" if $verbose>1;
160 	$mp3->close;
161 	return $result_filename;
162 }
163 
164 sub loadFile{
165 	my $infile=shift;
166 	my $verbose=shift;
167 	print 'read file "'.$infile."\"\n" if $verbose>0;
168 	open(INFILE, "<".$infile) || die ' datei "'.$infile.'" nicht gefunden'."\n";
169 		my @lines=<INFILE>;
170 	close(INFILE);
171 	return @lines;
172 }
173 
174 sub dehtml{
175 	my $s=shift;
176 	$s=~s/&amp;/&/g;
177 	$s=~s/&auml;/ä/g;
178 	$s=~s/&ouml;/ö/g;
179 	$s=~s/&uuml;/ü/g;
180 	$s=~s/&Auml;/Ä/g;
181 	$s=~s/&Ouml;/Ö/g;
182 	$s=~s/&Uuml;/Ü/g;
183 	$s=~s/&szlig;/ß/g;
184 	$s=~s/&AElig;/Æ/g;
185 	$s=~s/[\/\?*!]/_/g;
186 	# that's not all yet!
187 	return $s;
188 }
189 
190 sub fuzzy_string{
191 	my $s=shift;
192 	$s=~s/[_!]/[_\\\/\\?\\\\*!]{0,2}/g;
193 	$s=~s/&/(?:&| ?and ?| ?und ?)/g;
194 	$s=~s/(ä|ae|Æ)/(?:[äÆ]|ae)/g;
195 	$s=~s/(ö|oe)/(?:ö|oe)/g;
196 	$s=~s/(ü|ue)/(?:ü|ue)/g;
197 	$s=~s/(Ä|Ae)/(?:Ä|ae)/g;
198 	$s=~s/(Ö|Oe)/(?:Ö|oe)/g;
199 	$s=~s/(Ü|Ue)/(?:Ü|ue)/g;
200 	$s=~s/(ß|ss)/(?:ß|ss)/g;
201 	# that's not all yet!
202 	return $s;
203 }
204 
205 sub quote_meta_chars{ # quotes {}[]()^$.|*+?\
206 	my $s=shift;
207 	$s=~s/([\{\}\[\]\(\)\^\$\.\|\*\+\?\\])/\\$1/g;
208 	return $s;
209 }
210 
211 sub subst_disallowed_chars{ # substitutes \/:*?"<>| with $char
212 	my $str=shift;
213 	my $char=shift;
214 	$str=~s/[\\\/:*\?"<>\|]/$char/g;
215 	return $str;
216 }
217 
218 sub cut_file_extension{ # deletes e.g. '.mp3'
219 	my $s=shift;
220 	my $pt_pos=index(reverse($s),'.');
221 	if($pt_pos>-1 && $pt_pos<6){
222 		$s=substr($s,0,length($s)-$pt_pos-1);
223 	}
224 	return $s;
225 }
226 
227 sub subtract_str{
228 	my $haystack=shift;
229 	my $needle=shift;
230 	my $pos=index($haystack, $needle);
231 	$haystack=substr($haystack, 0, $pos).substr($haystack, $pos+length($needle)) if($pos>-1);
232 	return $haystack;
233 }
234 
235 # search html_music_list for album/track information
236 sub get_info_from_html_file{
237 	my $verbose=shift;
238 	my $noalbum=shift;
239 	my $notrackno=shift;
240 	my $TPE1=shift;
241 	my $TALB=shift;
242 	my $TPE1_fuzzy=$TPE1;
243 	my $TALB_fuzzy=$TALB;
244 	$TPE1_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g;
245 	$TALB_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g;
246 	my @files=@_;
247 	my %info;
248 	my @html_file_tracknames=loadFile($html_file_path, $verbose);
249 	if($noalbum==1){
250 		die 'not implemented yet'."\n";
251 	}else{ # extract album from html-file
252 		print 'a name=([^>]+>){1,2}'.$TPE1_fuzzy.' - '.$TALB_fuzzy."\n" if $verbose>2;
253 		while((dehtml(shift(@html_file_tracknames))=~/a name=([^>]+>){1,2}${TPE1_fuzzy} - ${TALB_fuzzy}/i)==0){
254 			if(0>=@html_file_tracknames){
255 				print 'not found in html-file'."\n" if $verbose>0;
256 				last;
257 			}	# cut begin
258 		}
259 		my $no_tracks=0;
260 		foreach(@html_file_tracknames){ # reformat and count names
261 			if(/^\t\d\d\s-\s/){
262 				$_=subtract_str($_,' (ianacd)');
263 				$_=dehtml(substr($_, 1, 2)." $TPE1".substr($_, 3, length($_)-8));
264 				$_=substr($_, 3) if $notrackno==1;
265 				$no_tracks++;
266 			}else{
267 				last if not(/^\tCD\d<br>/);
268 			}
269 		}
270 		while(@html_file_tracknames>$no_tracks){
271 			pop(@html_file_tracknames); # cut end
272 		}
273 		if($verbose>1){
274 			print 'found: '."\n ".@html_file_tracknames.' tracks in htmlfile'."\n";
275 			print ' '.@files.' files in dir'."\n";
276 		}
277 		my $temp_no_track;
278 		my $temp_file_trackname;
279 		my $html_file_trackname;
280 		my $html_file_trackname_quoted;
281 		my @track_no_bucket=(1); # any value but -1
282 		for(my $i=0;$i<@html_file_tracknames;++$i){
283 			$html_file_tracknames[$i]=subst_disallowed_chars($html_file_tracknames[$i], '_');
284 			push(@track_no_bucket,-1); # init bucket
285 		}
286 		for(my $i=0;$i<@html_file_tracknames;++$i){									# loop over all tracknames (found in html_file)
287 			$html_file_trackname=substr($html_file_tracknames[$i],6+length($TPE1)); # cut /\d\d $artist - /
288 			$html_file_trackname_quoted=quote_meta_chars($html_file_trackname);
289 			$html_file_trackname_quoted=fuzzy_string($html_file_trackname_quoted);
290 			for(my $j=0;$j<@files;++$j){															# loop over all filenames
291 				if($files[$j]=~/^(?:$TALB|$TPE1)?[-_ ]{0,3}(\d\d)\D/){	# identification using 2-digit number in filename
292 					$temp_no_track=(substr($1,0,1) eq '0')?substr($1,1,1):$1;# extract this tracknumber
293 					if($track_no_bucket[$temp_no_track]==-1){							# if tracknumber still available
294 						$track_no_bucket[$temp_no_track]=$j;								# occupy tracknumber
295 						print 'tracing: '.$1.' ; '.$temp_no_track.' ; ' if $verbose>2;
296 						$info{$files[$j]}=$html_file_tracknames[$temp_no_track-1];# link file with track_name (from html_file)
297 						print $files[$j].' ; '.$info{$files[$j]}."\n" if $verbose>2;
298 						last;
299 					}else{																								# if tracknumber occupied already
300 						print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0;
301 					}
302 				}# now identification using parts of name
303 				$temp_file_trackname=quote_meta_chars(cut_file_extension($files[$j]));
304 				$temp_file_trackname=fuzzy_string($temp_file_trackname);
305 				
306 				# for debugging:
307 				#if($files[$j]=~/name/i && $html_file_trackname=~/name/i){
308 				#	print 'file               = '.$files[$j]."\n";
309 				#	print 'temp_file_trackname= '.$temp_file_trackname."\n";
310 				#	print 'html_file_trackname= '.$html_file_trackname."\n";
311 				#	print 'html_file_trackname_quoted= '.$html_file_trackname_quoted."\n";
312 				#}
313 
314 				if($files[$j]=~/$html_file_trackname_quoted/i || $html_file_trackname=~/$temp_file_trackname/i){ # (filename similar to trackname)?
315 					$temp_no_track=(substr($html_file_tracknames[$i],0,1) eq '0')?substr($html_file_tracknames[$i],1,1):substr($html_file_tracknames[$i],0,2);
316 					if($track_no_bucket[$temp_no_track]==-1){				# if tracknumber still available
317 						$track_no_bucket[$temp_no_track]=$j;					# occupy tracknumber
318 						$info{$files[$j]}=$html_file_tracknames[$i];	# link file with track_name (from html_file)
319 						last;
320 					}else{ # if tracknumber occupied already
321 						print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0;
322 						if($verbose>1){
323 							print ' track_no         ='.substr($html_file_tracknames[$i],0,2)."\n";
324 							print ' html_trackname   ='.$html_file_trackname."\n";
325 							print ' file_trackname   ='.$files[$j]."\n";
326 							print ' in conflict with  '.$files[$track_no_bucket[$temp_no_track]]." (hitherto existing)\n";
327 						}
328 						if($files[$j]=~/$html_file_trackname_quoted/i){
329 							$temp_file_trackname=subtract_str(cut_file_extension($files[$j]), $TPE1);
330 							$temp_file_trackname=substr($temp_file_trackname, 3) if(substr($temp_file_trackname, 0, 3) eq ' - ');
331 							if($temp_file_trackname eq $html_file_trackname){
332 								$info{$files[$j]}=$info{$files[$track_no_bucket[$temp_no_track]]}; # = $html_file_tracknames[$i];
333 								$info{$files[$track_no_bucket[$temp_no_track]]}=''; # $html_file_tracknames[$i]; # or better ='' ??
334 								my $swap=$files[$track_no_bucket[$temp_no_track]];
335 								$files[$track_no_bucket[$temp_no_track]]=$files[$j];
336 								$files[$j]=$swap;
337 								# $track_no_bucket[$temp_no_track]=$j; is not being run because of swap($files[$track_no_bucket[$temp_no_track]],$files[$j])
338 								print ' ...tried to correct that' if $verbose>0;
339 								print ' by changing association to '.$files[$track_no_bucket[$temp_no_track]] if $verbose>1;
340 								print ".\n" if $verbose>0;
341 								# $i=0 necessary (or crashes)??
342 							}else{
343 								print ' nothing changed'."\n" if $verbose>1;
344 							}
345 						}
346 					}
347 				}
348 			}
349 		}
350 		if($verbose>0){
351 			my $i=0;
352 			foreach(@track_no_bucket){
353 				print 'trackname of track #'.$i.' not found!'."\n" if $_==-1;
354 				++$i;
355 			}
356 		}
357 	}
358 	return %info;
359 }
360 
361 sub search_dir{
362 	my $working_dir=shift;
363 	my %params=@_;
364 	my $filesRE=$params{'filesRA'};
365 	my $recursively=$params{'recursively'};
366 	my $verbose=$params{'verbose'};
367 	my $test=$params{'test'};
368 	my $sortToDir=$params{'sortToDir'};
369 	my $noalbum=$params{'noalbum'};
370 	my $notrackno=$params{'notrackno'};
371 	my $useid3=$params{'useid3'};
372 	my $useid3v1=$params{'useid3v1'};
373 	my $useid3v2=$params{'useid3v2'};
374 	my $TPE1=$params{'TPE1'};
375 	my $TALB=$params{'TALB'};
376 	my @path_splitted=split(/[\/\\]/, reverse($working_dir));
377 	$TPE1=reverse($path_splitted[1]) if($params{'TPE1'} eq '..');
378 	$TPE1=reverse($path_splitted[0]) if($params{'TPE1'} eq '.');
379 	$TALB=reverse($path_splitted[1]) if($params{'TALB'} eq '..');
380 	$TALB=reverse($path_splitted[0]) if($params{'TALB'} eq '.');
381 	my $entry;
382 	my @files;
383 	my @dirs;
384 	print "\n".'  '.$working_dir.'/'."\n" if $verbose>1;
385 	opendir(DIR, ".") || die $working_dir.": $!";
386 	while(telldir(DIR)>=0){ # collect relevant files
387 		$entry=readdir(DIR);
388 		if(-d $entry){
389 			push(@dirs, $entry)
390 		}else{
391 			if($entry=~/$filesRE/){
392 				push(@files, $entry);
393 			}else{
394 				print 'skip: '.$entry."\n" if $verbose>1;
395 			}
396 		}
397 	}
398 	closedir(DIR);
399 	@dirs=sort(@dirs); # optional
400 	my %info;
401 	if($useid3==1 || $useid3v1==1 || $useid3v2==1){
402 		my @id3_options;
403 		@id3_options=('ID3v2', 'ID3v1') if $useid3==1;
404 		@id3_options=('ID3v1') if $useid3v1==1;
405 		@id3_options=('ID3v2') if $useid3v2==1;
406 		foreach(@files){ # match old filename with new ones
407 			$info{$_}=get_info_from_id3($working_dir.'\\'.$_, $verbose, $notrackno, @id3_options); # read from (intern) mp3_file_id3_tags
408 		}
409 	}else{
410 		%info=get_info_from_html_file($verbose, $noalbum, $notrackno, $TPE1, $TALB, @files); # read from (extern) html_music_list
411 	}
412 	if($sortToDir==1 && keys(%info)>0){
413 		if($test==0){
414 			mkdir($TPE1, 0777) if not -e $TPE1;
415 			chdir($TPE1);
416 			die 'directory exists already!'."\n" if -e $TALB;
417 			mkdir($TALB, 0777);
418 			chdir('..');
419 		}else{
420 			if(-e $TPE1){
421 				chdir($TPE1);
422 				print 'directory of album already exists!'."\n" if -e $TALB;
423 				chdir('..')
424 			}
425 		}
426 	}
427 	while(my ($old, $new)=each %info){ # renaming
428 		print 'old: '.$old."\n".'  -> '.$new.'.mp3'."\n" if $verbose>0;
429 		if($test==0){
430 			rename($old, $new.'.mp3');
431 			move($new.'.mp3',$TPE1.'/'.$TALB.'/'.$new.'.mp3') if $sortToDir==1;
432 		}
433 	}
434 	if($recursively==1){
435 		foreach(@dirs){
436 			if($_ ne '.' && $_ ne '..'){
437 				chdir($_);
438 				search_dir($working_dir.'/'.$_, %params);
439 				chdir('..');
440 			}
441 		}
442 	}
443 }
444 
445 sub rename_music_files{
446 	my %params=syntaxCheck(@_);
447 	my $working_dir=cwd;
448 	search_dir($working_dir, %params);
449 	chdir($working_dir);
450 }
451 
452 rename_music_files(@ARGV);