#!/usr/bin/env python
### mp3order - bring the mp3s of an album into the correct sequence
### Copyright (C) 2000  Arne Zellentin <arne@unix-ag.org>

### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.

### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.

### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software
### Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

### If you want to comment on this program, contact me: arne@unix-ag.org ###
### Visit the homepage: http://www.home.unix-ag.org/arne/jack/

### see CHANGELOG for recent changes in this program
### see TODO if you want to see what needs to be implemented

prog_version = "0.02"
prog_name = "mp3order"
DEBUG = 1

from string import split, replace, upper, strip
import os, sys, posix, re
from os import path

def get_cddb(s):
    # get the title:
    r = ".*\<title\>(.*)\<\/title\>.*"
    m = re.match(r, s, re.M | re.I | re.S)
    album = strip(m.group(1))
    # now cut out the ordered list:
    r = ".*\<OL\>(.*)\<\/OL\>.*"
    m = re.match(r, s, re.M | re.I | re.S)
    s = m.group(1)
    # split the tracks delimited be the list-item tag
    s = split(strip(s), "<LI>")[1:]
    for i in range(len(s)):
        x = s[i]
        # remove all html tags
        r = "\<[^>]*\>"
        x = strip(re.sub(r, "", x))
        # we want no "/"s in filenames
        x = replace(x, "/" , "-")
        # unescape special chars
        for j in (("&#038;", "&"), ("&#043;", "+")):
            x = replace(x, j[0], j[1])
        s[i] = x
    x = [album,]
    x.extend(s)
    return x

def magic(s):
    r = ".*cddb\.com.*"
    if re.match(r, s, re.M | re.S):
        return "cddb"
    else:
        return ""

def defile(f):
    x = []
    for i in f:
        r = "^[0-9][0-9]\."
        if re.match(r, i):
            i = i[3:]
        if upper(i[-4:]) == ".MP3":
            i = i[:-4]
        else:
            print "not and MP3:", i
            sys.exit(1)
        x.append(i)
    return x

def enfile(f, num):
    x = "%02i.%s.mp3" % (j, f)
    return x

def simplify(l, start=0):
    x = l[:start]
    for i in l[start:]:
        i = split(i, "(")[0]
        for kill in (" ", "&", "-", "_", "'", ":", "(", ")", "."):
            i = replace(i, kill, "")
        x.append(upper(i))
    return x

def cmp2(x, y):
    if x[0] == y[0]:
        return 0
    if x[0] > y[0]:
        return 1
    return -1

def search_hard(what, list, mask, max_lendiff = 4, min_char_matching = 4, min_matching = 0.5):
    l = len(what)
    candidates = []
    for i in range(len(list)):
        if mask[i]:
            continue
        candidates.append(abs(l - len(list[i])), i)
    if not candidates:
        return -1
    candidates.sort(cmp2)
    if DEBUG > 1: print candidates
    for i in candidates:
        if i[0] > max_lendiff:
            continue

    # candidates: [(size_diff, num in list), ... ]
    
    # pass 1: count similarities
        what_is_larger = len(what) >= len(list[i[1]])
        killchars = 0
        while killchars <= 1:
            check = min(l, len(list[i[1]]))
            if what_is_larger:
                what2 = what
                for k in range(check - killchars):
                    what2 = what[:k] + what[k+killchars:]
                    sim = 0
                    for j in range(check-killchars):
                        if what2[j] == list[i[1]][j]:
                            sim = sim + 1
                    prob = float(sim) / float(check)
                    if sim >= min_char_matching and prob >= min_matching:
                        if DEBUG > 2: print list[i[1]], prob, i
                        return i[1]
                    else:
                        if DEBUG > 2: print "sorry.", prob
            killchars = killchars + 1
    return -1

# parse argv

tocfile = ""
prefer_tocinfo = 1
argv = sys.argv[1:]
files = []
read_stdin = 0
force_type = ""
while argv:
    i = argv[0]
    if i == "-f":
        tocfile = argv[1]
        del argv[1]
    if i == "-F":
        force_type = argv[1]
        del argv[1]
    elif i == "--":
        files.append(argv[1])
        del argv[1]
    elif i == "-i":
        read_stdin = 1
    elif i == "-t":
        prefer_tocinfo = 1
    elif i == "-o":
        prefer_tocinfo = 0
    else:
        if path.exists(i):
            files.append(i)
    del argv[0]

if not files:
    print "USAGE: " + prog_name + " [-n|-t] [-i|-f <tocfile>] [-F <format>] [-- <file>] files..."
    print "    -t : prefer tocfile names (default)"
    print "    -o : prefer original names"
    print "    -i : read toc from stdin"
    print "    -f : read toc from specified file"
    print "    -F : force input format to <format> (currently only cddb)"
    sys.exit(1)

if read_stdin:
    print "give raw track titles in the correct order, end with blank line"
    toc = ["unknown album",]
    while 1:
        l = strip(raw_input())
        if not l:
            break
        toc.append(l)
else:
    f = open(tocfile)
    s = f.read()
    f.close()
    type = magic(s)
    if force_type:
        type = force_type
    if type == "cddb":
        if DEBUG: print tocfile, "is CDDB"
        toc = get_cddb(s)
    else:
        print "unknown toc:", tocfile
        sys.exit(3)


files2 = defile(files)

files3 = simplify(files2)
if DEBUG > 1: print files3

toc2 = simplify(toc, 1)
if DEBUG > 1: print toc2

# now try to match the files:

repl = []
f_stat = [0,] * len(files)
t_stat = [0,] * len(toc)
for i in range(len(files)):
    ok = 0
    for j in range(1, len(toc)):
        if files3[i] == toc2[j]:
            if not ok:
                if prefer_tocinfo:
                    x = toc[j]
                else:
                    x = files2[i]
                x = enfile(x, i)
                repl.append(x)
                ok = 1
                f_stat[i] = 1
                t_stat[j] = 1
            else:
                print "ambigous:", files[i]
                sys.exit(2)
    if not ok:
        x = "unknown." + files2[i]
        repl.append(x)

# check for missing files:
missing = 0
for j in range(1, len(toc)):
    if t_stat[j] == 0:
        if DEBUG > 1: print "hard:", toc2[j], files3
        i = search_hard(toc2[j], files3, f_stat)
        if i >= 0:
            t_stat[j] = 2
            f_stat[i] = 2
            if prefer_tocinfo:
                x = toc[j]
            else:
                x = files2[i]
            x = enfile(x, i)
            repl[i] = x
        else:
            print "missing:", toc[j]
            missing = missing + 1

# check for superflous files:

# why?

print
print "-" * 80
unrelated = 0
for i in range(len(files)):
    if f_stat[i] == 0:
        print "unrelated file:", files[i]
        unrelated = unrelated + 1
print "#" * 80 * (unrelated > 0)

renames = []
guessed = 0
for i in range(len(files)):
    if f_stat[i]:
        print "           <>", files[i]
        if f_stat[i] == 2:
            guessed = guessed + 1
            if files[i] == repl[i]:
                print "GUES/nop> ",
            else:
                print "GUESSED > ",
                renames.append(files[i], repl[i])
        else:
            if files[i] == repl[i]:
                print "no-op   > ",
            else:
                print "      --> ",
                renames.append(files[i], repl[i])
        print repl[i]
if not renames:
    print "nothing to do."
    sys.exit(0)

x = raw_input("Do it?" + (" [%i guessed]" % guessed) * (guessed > 0) + (" [%i unrelated]" % unrelated) * (unrelated > 0) + (" [%i missing]" % missing) * (missing > 0) + " (y/n) ")

if x != "y":
    sys.exit(0)

for i in renames:
    os.rename(i[0], i[1])

print "Ok, %i files renamed." % len(renames)