#!/usr/bin/python
# -*- coding: ISO-8859-1 -*-

# Copyright (C) 2007 Milo Casagrande <milo@ubuntu.com>
#
# This program is free software; you can redistribuite it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version
#
# This program is distribuited in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA, 02110-1301 USA.

'''
Programma per la conversione della newsletter italiana in puro
formato testo, da usare per spedire il contenuto della newsletter
via mail.
'''

import sys, re, xmlrpclib
import codecs
import string

alpha = string.letters

PROGVER = "0.0.3"

WIKIBASE = "http://wiki.ubuntu-it.org/" # URL del wiki italiano
WIKIWORLD = "http://wiki.ubuntu.com/"   # URL del wiki internazionale
NEWSBASE = "NewsletterItaliana/"        # stringa di base per la newsletter
BREAK = "[[BR]]\n"
CATEGORY = "CategoryComunita"
TYPE = "<type 'dict'>"

ANNO = ""    # anno della newsletter
NUMERO = ""  # numero della nesletter


def get_newsletter():
    """
    Funzione per leggere il testo della newsletter direttamente online
    """
    global NEWSBASE, ANNO, NUMERO
    
    wiki = xmlrpclib.ServerProxy(WIKIBASE+"?action=xmlrpc2")

    pagina = NEWSBASE + ANNO + "." + NUMERO

    pagedata = wiki.getPage(pagina)

    tipo = str(type(pagedata))

    if tipo == TYPE:
        print "*** Errore: la pagina non esiste!"
        sys.exit(2)
    else:
        read_newsletter(pagedata)    

def read_newsletter(paginawiki):
    """
    Funzione per leggere la newsletter e per iniziare la magia
    @paginawiki: stringa con la pagina recuperata online
    """
    global ANNO, NUMERO

    inputfile = "/tmp/" + ANNO + NUMERO + ".txt"
    outputfile = ANNO + NUMERO + ".txt"

    try:
        infile = codecs.open(inputfile, 'wr', 'utf-8')
    except IOError, e:
        print "*** Errore nell'aprire il file %s" % inputfile
        sys.exit(2)

    # apriamo e chiudiamo il file in codifica utf-8
    infile.write(paginawiki)
    infile.close()

    try:
        infile = open(inputfile, 'r')
    except IOError, e:
        print "*** Errore nell'aprire il file %s" % inputfile
        sys.exit(2)

    try:
        outfile = open(outputfile, 'w')
    except IOError, e:
        print "*** Errore nella'aprire il file %s" % outputfile
        sys.exit(2)

    temp = ""
    towrite = ""

    while True:
        string = infile.readline()
 
        if string == "": # siamo alla fine del file
            False
            break
        
        towrite = check_string(string)
        
        if towrite == None:
            pass
        else:
            temp += towrite


    outfile.write(temp)
    infile.close()
    outfile.close()

    print "Newsletter creata nel file %s" % outputfile

def check_string(string):
    u"""Qui c'� tutta la magia...
    Ritorna la stringa analizzata e possibilmente a posto

    @string: stringa da anilizzare
    """
    exp = []
    nexp = []
    nnexp = []

    # la riga dei permessi viene tolta
    if re.findall('\#acl',string) != []:
        return None
    # la riga iniziale viene tolta
    elif re.findall('\#LANGUAGE', string) != []:
        return None
    # qualsiasi tabella viene tolta
    elif re.findall('\|\|\<table', string) != []:
        return None
    # tolte le linee orizzontali
    elif re.findall('\-{4,6}', string) != []:
        return None
    # qualsiasi immagine
    elif re.findall('attachment', string) != []:
        return None
    elif re.findall('\[\[Immagine\(.*?\]\]', string) != []:
        return None
    # tutti i titoli vengono mantenuti
    elif re.findall('\={1,3}\s.*?\s\={1,3}', string) != []:
        return string
    # un qualsiasi a capo viene tolto e sostituito
    elif re.findall('\[\[BR\]\]\\n', string) != []:
	string = string.replace(BREAK, "\n")
    elif re.match(CATEGORY, string):
        return None

    # tutti gli apici - qui c'� il problema degli apostrofi, anche quelli vengono tolti
    exp = re.findall('\'{2,5}.*?\'{2,5}', string)

    if exp != []:
        for word in exp:
            nexp.append(word.replace("'", ""))

        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])

    # tutti gli apici inversi
    exp = re.findall('\`{1,2}', string)
    
    if exp != []:
        string = string.replace("`", "")

    exp = []
    nexp = []

    # link esterni
    exp = re.findall('\[[^wiki:][^0-9][^\.][^\:][^0-9].*?[^:]\]', string)

    if exp != []:
        nnexp = replace_square(exp)

        newstring = ""
        
        for word in nnexp:
            splitted = word.split()
            for split in splitted[1:]:
                newstring += split + " "

            newstring += "(" + splitted[0] + " )"
            nexp.append(newstring)
            newstring = ""

        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])
            
    exp = []
    nexp = []
    nnexp = []

    # link interni al wiki tipo [:Pagina:Nome]
    exp = re.findall('\[\:+.*?\:+.*?\]', string)

    if exp != []:
        nnexp = replace_square(exp)

        newstring = ""

        for word in nnexp:
            splitted = word.split(":")
            for split in splitted[2:]:
                newstring += split + " "

            newstring += "(" + WIKIBASE + splitted[1] + " )"
            nexp.append(newstring)
            newstring = ""
        
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])

    exp = []
    nexp = []
    nnexp = []

    # link interni al wiki tipo [:Pagina]
    exp = re.findall('\[\:.*?\]', string)

    if exp != []:
        nnexp = replace_square(exp)

        newstring = ""

        for word in nnexp:
            splitted = word.split(":")
            for split in splitted[1:]:
                newstring += split + " "

            newstring += "(" + WIKIBASE + splitted[1] + " )"
            nexp.append(newstring)
            newstring = ""
        
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])


    exp = []
    nexp = []
    nnexp = []

    # link al wiki internazionale tipo [wiki:Ubuntu/Pagina Nome]
    exp = re.findall('\[wiki:Ubuntu/.*?\s.*?\]', string)

    if exp != []:
        nnexp = replace_square(exp)

        newstring = ""

        for word in nnexp:
            splitted = word.split()
            for split in splitted[1:]:
                newstring += split + " "

            # separazione del vero nome della pagina
            base = splitted[0].split("/")
            
            newstring += "(" + WIKIWORLD + base[1]  + " )"
            nexp.append(newstring)
            newstring = ""
        
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])    

        
    return string

def replace_square(exp):
    """
    Funzione per togliere le parentesi quadre
    @exp: la lista contenente le stringhe da cui togliere le parentesi
    """
    nsq = []
    nnsq = []
    
    for word in exp:
        nsq.append(word.replace("[", ""))

    for word in nsq:
        nnsq.append(word.replace("]", ""))

    return nnsq

def main():
    global ANNO, NUMERO, BUGINFO

    print "Welcome to the Jungle version %s!" % PROGVER

    while True:
        ANNO = raw_input("Inserisci l'anno della newsletter: ")

        for i in ANNO:
            if i in alpha:
                print "Dato errato!"
            
        if len(ANNO) > 4 or len(ANNO) < 4:
            print "Hai inserito un anno sbagliato!"
        else:
            break

    while True:
        NUMERO = raw_input("Inserisci il numero della newsletter: ")

        # controllare se � vuoto?

        for i in NUMERO:
            if i in alpha:
                print "Dato errato!"

        lung = len(NUMERO)

        if lung > 3:
            print "Numero della newsletter inesistente!"
            True
            continue
        elif lung == 2:
            NUMERO = "0" + NUMERO
            False
            break
        elif lung == 1:
            NUMERO = "00" + NUMERO
            False
            break


    get_newsletter()


if __name__ == "__main__":
    main()
    sys.exit(0)