1 #!/usr/local/bin/perl -w 2 # author: seth 3 # email: email_software@wg-karlsruhe.de 4 # description: renames file using html-file xor id3-tags 5 # todo-list: 6 # - support of noalbum, (--noalbum = do not search for a whole album) 7 8 use MP3::Tag; 9 use Cwd; 10 use strict; 11 use File::Copy; 12 my $html_file_path='C:/Dokumente und Einstellungen/Administrator/Eigene Dateien/musik-db/music_list.htm'; 13 14 sub syntaxCheck{ 15 my @params=@_; 16 my @path_splitted=split(/[\/\\]/, reverse($0)); 17 my $prg_name=reverse($path_splitted[0]); 18 my $version='0.95.20070324'; 19 my $usage='renames file using html-file xor id3-tags 20 21 usage: '.$prg_name.' filesRE [options] 22 23 filesRE files to rename (use regular expressions!) 24 -r, --recursively for searching subdirectories recursively 25 --v[erbose]=x verbose (x=0: no output, x=1: default output, x=2: much output) 26 -t, --test don\'t change anything, just print possible changes 27 -s, --sortToDir makes (and moves files to) directory artist/album/* 28 -V, --version display version and exit. 29 --htmlfile=x use other path than default (x="path to html_music_list") 30 --notrackno do not use trackno in filename 31 -u --useid3 use id3-tags (don\'t search html-file, but id3v1 and id3v2) 32 --useid3v1 use id3-tags (don\'t search html-file, but id3v1 only) 33 --useid3v2 use id3-tags (don\'t search html-file, but id3v2 only) 34 forced info: may be (multiple) chosen from the following case-sensitive list 35 --TPE1=s artist (default s=..) 36 --TALB=s album (default s=.) 37 s is a string 38 s=. means "use current dir name" 39 s=.. means "use parent dir name" 40 (there are no bands called . or ..) 41 42 examples: .../artist/album/'.$prg_name.' "^\\d\\d\\.mp3$" 43 renames all files "xx.mp3" to "xx artist - track.mp3" 44 45 .../artist/'.$prg_name.' "\\.mp3$" --TPE1=. --TALB="dick und doof" 46 renames all files "*.mp3" to "xx artist - track.mp3"'."\n"; 47 my $syntax_correct=0; 48 my %param_hash; 49 $param_hash{'noalbum'}=0; 50 $param_hash{'notrackno'}=0; 51 $param_hash{'recursively'}=0; 52 $param_hash{'test'}=0; 53 $param_hash{'sortToDir'}=0; 54 $param_hash{'useid3'}=0; 55 $param_hash{'useid3v1'}=0; 56 $param_hash{'useid3v2'}=0; 57 $param_hash{'verbose'}=1; 58 $param_hash{'version'}=0; 59 $param_hash{'TPE1'}='..'; 60 $param_hash{'TALB'}='.'; 61 if(defined($params[0])){ 62 $param_hash{'filesRA'}=shift(@params); 63 $syntax_correct=1; 64 $syntax_correct=0 if $param_hash{'filesRA'}=~/^(-?-h|\/\?)$/; # if somebody trys -h or --h or /? for help 65 foreach(@params){ 66 if($_=~/^-[^-]./){ 67 while(length($_)>2){ 68 push(@params, '-'.substr($_, 2, 1)); 69 $_=substr($_, 0, 2).((length($_)>3)?substr($_, 3):''); 70 } 71 } 72 if($_ eq '--noalbum'){ 73 $param_hash{'noalbum'}=1; 74 next; 75 } 76 if($_ eq '--notrackno'){ 77 $param_hash{'notrackno'}=1; 78 next; 79 } 80 if($_ eq '-r' || $_ eq '--recursively'){ 81 $param_hash{'recursively'}=1; 82 next; 83 } 84 if($_ eq '-t' || $_ eq '--test'){ 85 $param_hash{'test'}=1; 86 next; 87 } 88 if($_ eq '-s' || $_ eq '--sortToDir'){ 89 $param_hash{'sortToDir'}=1; 90 next; 91 } 92 if($_ eq '-u' || $_ eq '--useid3'){ 93 $param_hash{'useid3'}=1; 94 next; 95 } 96 if($_ eq '--useid3v1'){ 97 $param_hash{'useid3v1'}=1; 98 next; 99 } 100 if($_ eq '--useid3v2'){ 101 $param_hash{'useid3v2'}=1; 102 next; 103 } 104 if($_=~/^--v(erbose)?=([0123])$/){ 105 $param_hash{'verbose'}=$2; 106 next; 107 } 108 if($_ eq '-V' || $_ eq '--version'){ 109 $param_hash{'version'}=1; 110 next; 111 } 112 if($_=~/^--htmlfile=(.+)$/){ 113 $html_file_path=$1; 114 next; 115 } 116 if($_=~/^--(T[A-Z0-9]{3})=(.*)$/){ 117 $param_hash{$1}=$2; 118 }else{ 119 $syntax_correct=0; 120 last; 121 } 122 } 123 } 124 $syntax_correct=0 if 1<$param_hash{'useid3'}+$param_hash{'useid3v1'}+$param_hash{'useid3v2'}; 125 if($param_hash{'version'} || ($param_hash{'filesRA'} && ($param_hash{'filesRA'} eq '-V' || $param_hash{'filesRA'} eq '--version'))){ 126 my $version_info='ren_mp3.pl '.$version."\n".' 127 this program is distributed in the hope that it will be useful, 128 but without any warranty; without even the implied warranty of 129 merchantability or fitness for a particular purpose. 130 131 originally written by seth <email_software@wg-karlsruhe.de>.'."\n"; 132 die $version_info; 133 } 134 else{ 135 $syntax_correct || die $usage; 136 } 137 return %param_hash; 138 } 139 140 sub get_info_from_id3{ 141 my $file = shift; 142 my $verbose = shift; 143 my $notrackno = shift; 144 my $mp3 = MP3::Tag->new($file); 145 $mp3->config('autoinfo', @_); 146 my @info = $mp3->autoinfo(); 147 $info[1] =~ s~/.*~~g; 148 $info[1] = '0'.$info[1] if(length($info[1])==1); 149 my $result_filename = $info[1].' '.$info[2].' - '.$info[0]; 150 print ' ('.$info[3].') '.$result_filename if $verbose>1; 151 $result_filename = $info[2].' - '.$info[0] if $notrackno==1; 152 if(exists $mp3->{ID3v2}){ 153 if($mp3->{ID3v2}->get_frame("TYER")){ 154 push(@info, $mp3->{ID3v2}->get_frame("TYER")); 155 print ' ('.$info[4].')' if $verbose>1; 156 } 157 } 158 $result_filename=subst_disallowed_chars($result_filename, '_'); 159 print "\n" if $verbose>1; 160 $mp3->close; 161 return $result_filename; 162 } 163 164 sub loadFile{ 165 my $infile=shift; 166 my $verbose=shift; 167 print 'read file "'.$infile."\"\n" if $verbose>0; 168 open(INFILE, "<".$infile) || die ' datei "'.$infile.'" nicht gefunden'."\n"; 169 my @lines=<INFILE>; 170 close(INFILE); 171 return @lines; 172 } 173 174 sub dehtml{ 175 my $s=shift; 176 $s=~s/&/&/g; 177 $s=~s/ä/ä/g; 178 $s=~s/ö/ö/g; 179 $s=~s/ü/ü/g; 180 $s=~s/Ä/Ä/g; 181 $s=~s/Ö/Ö/g; 182 $s=~s/Ü/Ü/g; 183 $s=~s/ß/ß/g; 184 $s=~s/Æ/Æ/g; 185 $s=~s/[\/\?*!]/_/g; 186 # that's not all yet! 187 return $s; 188 } 189 190 sub fuzzy_string{ 191 my $s=shift; 192 $s=~s/[_!]/[_\\\/\\?\\\\*!]{0,2}/g; 193 $s=~s/&/(?:&| ?and ?| ?und ?)/g; 194 $s=~s/(ä|ae|Æ)/(?:[äÆ]|ae)/g; 195 $s=~s/(ö|oe)/(?:ö|oe)/g; 196 $s=~s/(ü|ue)/(?:ü|ue)/g; 197 $s=~s/(Ä|Ae)/(?:Ä|ae)/g; 198 $s=~s/(Ö|Oe)/(?:Ö|oe)/g; 199 $s=~s/(Ü|Ue)/(?:Ü|ue)/g; 200 $s=~s/(ß|ss)/(?:ß|ss)/g; 201 # that's not all yet! 202 return $s; 203 } 204 205 sub quote_meta_chars{ # quotes {}[]()^$.|*+?\ 206 my $s=shift; 207 $s=~s/([\{\}\[\]\(\)\^\$\.\|\*\+\?\\])/\\$1/g; 208 return $s; 209 } 210 211 sub subst_disallowed_chars{ # substitutes \/:*?"<>| with $char 212 my $str=shift; 213 my $char=shift; 214 $str=~s/[\\\/:*\?"<>\|]/$char/g; 215 return $str; 216 } 217 218 sub cut_file_extension{ # deletes e.g. '.mp3' 219 my $s=shift; 220 my $pt_pos=index(reverse($s),'.'); 221 if($pt_pos>-1 && $pt_pos<6){ 222 $s=substr($s,0,length($s)-$pt_pos-1); 223 } 224 return $s; 225 } 226 227 sub subtract_str{ 228 my $haystack=shift; 229 my $needle=shift; 230 my $pos=index($haystack, $needle); 231 $haystack=substr($haystack, 0, $pos).substr($haystack, $pos+length($needle)) if($pos>-1); 232 return $haystack; 233 } 234 235 # search html_music_list for album/track information 236 sub get_info_from_html_file{ 237 my $verbose=shift; 238 my $noalbum=shift; 239 my $notrackno=shift; 240 my $TPE1=shift; 241 my $TALB=shift; 242 my $TPE1_fuzzy=$TPE1; 243 my $TALB_fuzzy=$TALB; 244 $TPE1_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g; 245 $TALB_fuzzy=~s/([^a-zA-Z0-9])/[^a-zA-Z0-9]/g; 246 my @files=@_; 247 my %info; 248 my @html_file_tracknames=loadFile($html_file_path, $verbose); 249 if($noalbum==1){ 250 die 'not implemented yet'."\n"; 251 }else{ # extract album from html-file 252 print 'a name=([^>]+>){1,2}'.$TPE1_fuzzy.' - '.$TALB_fuzzy."\n" if $verbose>2; 253 while((dehtml(shift(@html_file_tracknames))=~/a name=([^>]+>){1,2}${TPE1_fuzzy} - ${TALB_fuzzy}/i)==0){ 254 if(0>=@html_file_tracknames){ 255 print 'not found in html-file'."\n" if $verbose>0; 256 last; 257 } # cut begin 258 } 259 my $no_tracks=0; 260 foreach(@html_file_tracknames){ # reformat and count names 261 if(/^\t\d\d\s-\s/){ 262 $_=subtract_str($_,' (ianacd)'); 263 $_=dehtml(substr($_, 1, 2)." $TPE1".substr($_, 3, length($_)-8)); 264 $_=substr($_, 3) if $notrackno==1; 265 $no_tracks++; 266 }else{ 267 last if not(/^\tCD\d<br>/); 268 } 269 } 270 while(@html_file_tracknames>$no_tracks){ 271 pop(@html_file_tracknames); # cut end 272 } 273 if($verbose>1){ 274 print 'found: '."\n ".@html_file_tracknames.' tracks in htmlfile'."\n"; 275 print ' '.@files.' files in dir'."\n"; 276 } 277 my $temp_no_track; 278 my $temp_file_trackname; 279 my $html_file_trackname; 280 my $html_file_trackname_quoted; 281 my @track_no_bucket=(1); # any value but -1 282 for(my $i=0;$i<@html_file_tracknames;++$i){ 283 $html_file_tracknames[$i]=subst_disallowed_chars($html_file_tracknames[$i], '_'); 284 push(@track_no_bucket,-1); # init bucket 285 } 286 for(my $i=0;$i<@html_file_tracknames;++$i){ # loop over all tracknames (found in html_file) 287 $html_file_trackname=substr($html_file_tracknames[$i],6+length($TPE1)); # cut /\d\d $artist - / 288 $html_file_trackname_quoted=quote_meta_chars($html_file_trackname); 289 $html_file_trackname_quoted=fuzzy_string($html_file_trackname_quoted); 290 for(my $j=0;$j<@files;++$j){ # loop over all filenames 291 if($files[$j]=~/^(?:$TALB|$TPE1)?[-_ ]{0,3}(\d\d)\D/){ # identification using 2-digit number in filename 292 $temp_no_track=(substr($1,0,1) eq '0')?substr($1,1,1):$1;# extract this tracknumber 293 if($track_no_bucket[$temp_no_track]==-1){ # if tracknumber still available 294 $track_no_bucket[$temp_no_track]=$j; # occupy tracknumber 295 print 'tracing: '.$1.' ; '.$temp_no_track.' ; ' if $verbose>2; 296 $info{$files[$j]}=$html_file_tracknames[$temp_no_track-1];# link file with track_name (from html_file) 297 print $files[$j].' ; '.$info{$files[$j]}."\n" if $verbose>2; 298 last; 299 }else{ # if tracknumber occupied already 300 print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0; 301 } 302 }# now identification using parts of name 303 $temp_file_trackname=quote_meta_chars(cut_file_extension($files[$j])); 304 $temp_file_trackname=fuzzy_string($temp_file_trackname); 305 306 # for debugging: 307 #if($files[$j]=~/name/i && $html_file_trackname=~/name/i){ 308 # print 'file = '.$files[$j]."\n"; 309 # print 'temp_file_trackname= '.$temp_file_trackname."\n"; 310 # print 'html_file_trackname= '.$html_file_trackname."\n"; 311 # print 'html_file_trackname_quoted= '.$html_file_trackname_quoted."\n"; 312 #} 313 314 if($files[$j]=~/$html_file_trackname_quoted/i || $html_file_trackname=~/$temp_file_trackname/i){ # (filename similar to trackname)? 315 $temp_no_track=(substr($html_file_tracknames[$i],0,1) eq '0')?substr($html_file_tracknames[$i],1,1):substr($html_file_tracknames[$i],0,2); 316 if($track_no_bucket[$temp_no_track]==-1){ # if tracknumber still available 317 $track_no_bucket[$temp_no_track]=$j; # occupy tracknumber 318 $info{$files[$j]}=$html_file_tracknames[$i]; # link file with track_name (from html_file) 319 last; 320 }else{ # if tracknumber occupied already 321 print 'maybe there will occur an error. mixed up filenames or something...'."\n" if $verbose>0; 322 if($verbose>1){ 323 print ' track_no ='.substr($html_file_tracknames[$i],0,2)."\n"; 324 print ' html_trackname ='.$html_file_trackname."\n"; 325 print ' file_trackname ='.$files[$j]."\n"; 326 print ' in conflict with '.$files[$track_no_bucket[$temp_no_track]]." (hitherto existing)\n"; 327 } 328 if($files[$j]=~/$html_file_trackname_quoted/i){ 329 $temp_file_trackname=subtract_str(cut_file_extension($files[$j]), $TPE1); 330 $temp_file_trackname=substr($temp_file_trackname, 3) if(substr($temp_file_trackname, 0, 3) eq ' - '); 331 if($temp_file_trackname eq $html_file_trackname){ 332 $info{$files[$j]}=$info{$files[$track_no_bucket[$temp_no_track]]}; # = $html_file_tracknames[$i]; 333 $info{$files[$track_no_bucket[$temp_no_track]]}=''; # $html_file_tracknames[$i]; # or better ='' ?? 334 my $swap=$files[$track_no_bucket[$temp_no_track]]; 335 $files[$track_no_bucket[$temp_no_track]]=$files[$j]; 336 $files[$j]=$swap; 337 # $track_no_bucket[$temp_no_track]=$j; is not being run because of swap($files[$track_no_bucket[$temp_no_track]],$files[$j]) 338 print ' ...tried to correct that' if $verbose>0; 339 print ' by changing association to '.$files[$track_no_bucket[$temp_no_track]] if $verbose>1; 340 print ".\n" if $verbose>0; 341 # $i=0 necessary (or crashes)?? 342 }else{ 343 print ' nothing changed'."\n" if $verbose>1; 344 } 345 } 346 } 347 } 348 } 349 } 350 if($verbose>0){ 351 my $i=0; 352 foreach(@track_no_bucket){ 353 print 'trackname of track #'.$i.' not found!'."\n" if $_==-1; 354 ++$i; 355 } 356 } 357 } 358 return %info; 359 } 360 361 sub search_dir{ 362 my $working_dir=shift; 363 my %params=@_; 364 my $filesRE=$params{'filesRA'}; 365 my $recursively=$params{'recursively'}; 366 my $verbose=$params{'verbose'}; 367 my $test=$params{'test'}; 368 my $sortToDir=$params{'sortToDir'}; 369 my $noalbum=$params{'noalbum'}; 370 my $notrackno=$params{'notrackno'}; 371 my $useid3=$params{'useid3'}; 372 my $useid3v1=$params{'useid3v1'}; 373 my $useid3v2=$params{'useid3v2'}; 374 my $TPE1=$params{'TPE1'}; 375 my $TALB=$params{'TALB'}; 376 my @path_splitted=split(/[\/\\]/, reverse($working_dir)); 377 $TPE1=reverse($path_splitted[1]) if($params{'TPE1'} eq '..'); 378 $TPE1=reverse($path_splitted[0]) if($params{'TPE1'} eq '.'); 379 $TALB=reverse($path_splitted[1]) if($params{'TALB'} eq '..'); 380 $TALB=reverse($path_splitted[0]) if($params{'TALB'} eq '.'); 381 my $entry; 382 my @files; 383 my @dirs; 384 print "\n".' '.$working_dir.'/'."\n" if $verbose>1; 385 opendir(DIR, ".") || die $working_dir.": $!"; 386 while(telldir(DIR)>=0){ # collect relevant files 387 $entry=readdir(DIR); 388 if(-d $entry){ 389 push(@dirs, $entry) 390 }else{ 391 if($entry=~/$filesRE/){ 392 push(@files, $entry); 393 }else{ 394 print 'skip: '.$entry."\n" if $verbose>1; 395 } 396 } 397 } 398 closedir(DIR); 399 @dirs=sort(@dirs); # optional 400 my %info; 401 if($useid3==1 || $useid3v1==1 || $useid3v2==1){ 402 my @id3_options; 403 @id3_options=('ID3v2', 'ID3v1') if $useid3==1; 404 @id3_options=('ID3v1') if $useid3v1==1; 405 @id3_options=('ID3v2') if $useid3v2==1; 406 foreach(@files){ # match old filename with new ones 407 $info{$_}=get_info_from_id3($working_dir.'\\'.$_, $verbose, $notrackno, @id3_options); # read from (intern) mp3_file_id3_tags 408 } 409 }else{ 410 %info=get_info_from_html_file($verbose, $noalbum, $notrackno, $TPE1, $TALB, @files); # read from (extern) html_music_list 411 } 412 if($sortToDir==1 && keys(%info)>0){ 413 if($test==0){ 414 mkdir($TPE1, 0777) if not -e $TPE1; 415 chdir($TPE1); 416 die 'directory exists already!'."\n" if -e $TALB; 417 mkdir($TALB, 0777); 418 chdir('..'); 419 }else{ 420 if(-e $TPE1){ 421 chdir($TPE1); 422 print 'directory of album already exists!'."\n" if -e $TALB; 423 chdir('..') 424 } 425 } 426 } 427 while(my ($old, $new)=each %info){ # renaming 428 print 'old: '.$old."\n".' -> '.$new.'.mp3'."\n" if $verbose>0; 429 if($test==0){ 430 rename($old, $new.'.mp3'); 431 move($new.'.mp3',$TPE1.'/'.$TALB.'/'.$new.'.mp3') if $sortToDir==1; 432 } 433 } 434 if($recursively==1){ 435 foreach(@dirs){ 436 if($_ ne '.' && $_ ne '..'){ 437 chdir($_); 438 search_dir($working_dir.'/'.$_, %params); 439 chdir('..'); 440 } 441 } 442 } 443 } 444 445 sub rename_music_files{ 446 my %params=syntaxCheck(@_); 447 my $working_dir=cwd; 448 search_dir($working_dir, %params); 449 chdir($working_dir); 450 } 451 452 rename_music_files(@ARGV);