<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#!/usr/bin/python
# -*- coding: UTF-8 -*-

# Copyright (C) 2007 Milo Casagrande &lt;milo@ubuntu.com&gt;
# Copyright (C) 2011 Milo Casagrande &lt;milo@ubuntu.com&gt;
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA, 02110-1301 USA.

'''
Simple script to convert the Italian newsletter into pure text
cleaning all the MoinMoin wiki syntax, in order to send the newsletter
via email.
'''

import sys
import re
import xmlrpclib
import codecs
import string
import os

req_version = (2,6)
cur_version = sys.version_info

if (cur_version[0] &gt; req_version[0] or 
    (cur_version[0] == req_version[0] and cur_version[1] &gt; cur_version[1])):
    print u"You need to have Python v.2.6"
    sys.exit(2)
else:
    from optparse import OptionParser

alpha = string.letters

prog_ver = "0.0.4"

# URL of the Italian wiki
wiki_base = "http://wiki.ubuntu-it.org/"

# URL of the International wiki
wiki_world = "http://wiki.ubuntu.com/"

# Base string for the newsletter
new_base = "NewsletterItaliana/"

line_break = "&lt;&lt;BR&gt;&gt;\n"
wiki_category = "CategoryComunita"
dict_type = "&lt;type 'dict'&gt;"

# Default name for the output file
default_output = "newsletter-output.txt"

# Used for the email version
comment = "##"

# The year of the newsletter
year = ""

# The number of the newsletter
number = ""

# XML-RPC after the upgrade to version 1.9 of the Italian wiki
# is not working anymore. Keep the function in case we have time
# to enable it again
def get_newsletter():
    """
    Read the newsletter wiki text directly online
    
    Needs XML-RPC enabled on the wiki.
    """
    global new_base, year, number
    
    wiki = xmlrpclib.ServerProxy(wiki_base+"?action=xmlrpc2")

    pagina = new_base + year + "." + number

    pagedata = wiki.getPage(pagina)

    page_type = str(type(pagedata))

    if page_type == dict_type:
        print "*** Error: page does not exist."
        sys.exit(2)
    else:
        read_newsletter(pagedata)    

def read_newsletter(options):
    """
    Open the input file, create the output file and do the parsing
    """
    inputfile = options.inputfile
    
    if (options.outputfile != default_output):
        outputfile = os.path.abspath(options.outputfile)
    else:
        outputfile = os.path.expanduser("~" + os.sep + options.outputfile)

    try:
        infile = open(inputfile, 'r')
    except IOError, e:
        print "*** Error opening input file %s" % inputfile
        sys.exit(2)

    try:
        outfile = open(outputfile, 'w')
    except IOError, e:
        print "*** Error opening output file %s" % outputfile
        sys.exit(2)

    temp = ""
    towrite = ""

    print "Reading newsletter text from %s..." % inputfile
    while True:
        string = infile.readline()
 
        if string == "": # EOF
            False
            break
        
        towrite = check_string(string)
        
        if towrite == None:
            pass
        else:
            temp += towrite

    print "Writing output file..."

    outfile.write(temp)
    infile.close()
    outfile.close()

    print "Newsletter created in %s." % outputfile

def check_string(string):
    u"""Check the string and return it cleaned from all
    the wiki syntax
    
    @string: the string to analyze
    """
    exp = []
    nexp = []
    nnexp = []

    # Remove the ACL string
    if re.findall('\#acl',string) != []:
        return None
    # Remove the format string
    elif re.findall('\#format', string) != []:
        return None
    # Remove the language string
    elif re.findall('\#LANGUAGE', string) != []:
        return None
    # Remove all tables
    elif re.findall('\|\|\&lt;table', string) != []:
        return None
    # Remove all horizontal rules
    elif re.findall('\-{4,6}', string) != []:
        return None
    # Line for e-mail version is kept
    elif re.findall('##Per la versione in linea', string) != []:
        string = string.replace(comment, "")
    # Remove commented lines
    elif re.findall('^#{2,2}', string) != []:
        return None
    # Remove all attachments
    elif re.findall('attachment', string) != []:
        return None
    # Remove all images
    # TODO should we process the string and keep the link to the image?
    elif re.findall('&lt;&lt;Immagine\(.*?&gt;&gt;', string) != []:
        return None
    # Remove the index macro
    elif re.findall('&lt;&lt;Indice\(?.*?&gt;&gt;', string) != []:
        return None
    # Titles are kept
    elif re.findall('\={1,3}\s.*?\s\={1,3}', string) != []:
        return ("\n") + string
    # Each break/newline is substituted with the real newline
    elif re.findall('&lt;&lt;BR&gt;&gt;\\n', string) != []:
        string = string.replace(line_break, "\n")
    # Remove the category
    elif re.match(wiki_category, string):
        return None

    # Remove all single quotes from the string, they have to be at least two
    exp = re.findall('\'{2,5}.*?\'{2,5}', string)

    if exp != []:
        for word in exp:
            nexp.append(word.replace("'", ""))

        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])

    # Remove all back-quotes
    exp = re.findall('\`{1,2}', string)
    
    if exp != []:
        string = string.replace("`", "")

    exp = []
    nexp = []

    # Remove multiple blank lines
    exp = re.findall('^$\\n', string)
   
    if exp:
        for word in exp:
            nexp.append(word.replace("\n", ""))

        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])

    # Remove unuseful exclamation marks
    exp = re.findall('\s\!', string)

    if exp:
        string = string.replace("!", "")

    exp = []
    nexp = []

    # Look for all the http links
    exp = re.findall('\[{2,2}http[s]*\:/{2,2}[|:*\w\S]+\s*\|\s*[\#*\(*\)*\:*,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
    
    if exp != []:
        nnexp = replace_square(exp)
        
        newstring = ""
        
        for word in nnexp:
            splitted = word.split("|")
            for split in splitted[1:]:
                newstring += split + " "
                
            newstring += "( " + splitted[0].strip() + " )"
            nexp.append(newstring)
            newstring = ""
            
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])
            
    exp = []
    nexp = []
    nnexp = []
    
    # Look for the wiki links
    exp = re.findall('\[{2,2}(?!http[s]*\:/{2,2})(?!Ubuntu\:)[\w\S\d]+\s*\|\s*[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
    
    if exp != []:
        nnexp = replace_square(exp)
        
        newstring = ""
        
        for word in nnexp:
            splitted = word.split("|")
            for split in splitted[1:]:
                newstring += split + " "
                
            newstring += "( " + wiki_base + splitted[0].strip() + " )"
            nexp.append(newstring)
            newstring = ""
            
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])

    exp = []
    nexp = []
    nnexp = []

    # Link to the international wiki
    exp = re.findall('\[{2,2}(?!http[s]*:/{2,2})Ubuntu\:[\w\S\d]+\s*\|[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)

    if exp != []:
        nnexp = replace_square(exp)

        newstring = ""

        for word in nnexp:
            splitted = word.split("|")
            for split in splitted[1:]:
                newstring += split + " "

            # Separate the real name of the page
            base = splitted[0].split(":")
            
            newstring += "( " + wiki_world + base[1].strip()  + " )"
            nexp.append(newstring)
            newstring = ""
        
        for i in range(len(exp)):
            string = string.replace(exp[i], nexp[i])    

        
    return string

def replace_square(exp):
    """
    Remove the square brackets from the string
    @exp: the list with the strings to clean
    """
    nsq = []
    nnsq = []
    
    for word in exp:
        nsq.append(word.replace("[[", ""))

    for word in nsq:
        nnsq.append(word.replace("]]", ""))

    return nnsq

def get_newsletter_number():
    global year, number

    while True:
        year = raw_input("Please insert the newsletter year: ")

        for i in year:
            if i in alpha:
                print "Bad data!"
            
        if len(year) &gt; 4 or len(year) &lt; 4:
            print "The year you typed is wrong."
        else:
            break

    while True:
        number = raw_input("Please insert the newsletter number: ")

        # TODO: check if it is empty?

        for i in number:
            if i in alpha:
                print "Bad data!"

        lung = len(number)

        if lung &gt; 3:
            print "Invalid newsletter number."
            True
            continue
        elif lung == 2:
            number = "0" + number
            False
            break
        elif lung == 1:
            number = "00" + number
            False
            break

def define_optionparser():
    usage = "Usage: %prog [option] arg..."
    version = "%prog " + prog_ver
    
    parser = OptionParser(usage=usage, version=version)
    
    parser.add_option("-i", "--input", metavar="FILE", action="store", type="string", dest="inputfile", help="the input file to read")
    parser.add_option("-o", "--output", metavar="FILE", action="store", type="string", dest="outputfile", help="the name of the output file; default value is 'newsletter-out.txt' and will be written in the user dir", default=default_output)

    (options, args) = parser.parse_args()
    
    if (len(sys.argv[1:]) == 0):
        parser.error("you need to specify the input file.")
        sys.exit(2)
    elif (options.inputfile == None):
        parser.error("you need to specify the input file.")
        sys.exit(2)
    
    return options

def main():
    # Temporally removed since we cannot download the newsletter directly anymore 
    # get_newsletter_number()
    
    options = define_optionparser()
    
    read_newsletter(options)    

if __name__ == "__main__":
    main()
    sys.exit(0)
</pre></body></html>