attachment:newsletter-ml-py2.py di GruppoPromozione/SocialMedia/Newsletter/LineeGuidaEditori

Allegato "newsletter-ml-py2.py"

   1 #!/usr/bin/python
   2 # -*- coding: UTF-8 -*-
   3 
   4 # Copyright (C) 2007 Milo Casagrande <milo@ubuntu.com>
   5 # Copyright (C) 2011 Milo Casagrande <milo@ubuntu.com>
   6 #
   7 # This program is free software; you can redistribute it and/or modify it
   8 # under the terms of the GNU General Public License as published by the Free
   9 # Software Foundation; either version 2 of the License, or (at your option)
  10 # any later version
  11 #
  12 # This program is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  15 # more details
  16 #
  17 # You should have received a copy of the GNU General Public License along with
  18 # this program; if not, write to the Free Software Foundation, Inc.,
  19 # 51 Franklin Street, Fifth Floor, Boston, MA, 02110-1301 USA.
  20 
  21 '''
  22 Simple script to convert the Italian newsletter into pure text
  23 cleaning all the MoinMoin wiki syntax, in order to send the newsletter
  24 via email.
  25 '''
  26 
  27 import sys
  28 import re
  29 import xmlrpclib
  30 import codecs
  31 import string
  32 import os
  33 
  34 req_version = (2,6)
  35 cur_version = sys.version_info
  36 
  37 if (cur_version[0] > req_version[0] or 
  38     (cur_version[0] == req_version[0] and cur_version[1] > cur_version[1])):
  39     print u"You need to have Python v.2.6"
  40     sys.exit(2)
  41 else:
  42     from optparse import OptionParser
  43 
  44 alpha = string.letters
  45 
  46 prog_ver = "0.0.4"
  47 
  48 # URL of the Italian wiki
  49 wiki_base = "http://wiki.ubuntu-it.org/"
  50 
  51 # URL of the International wiki
  52 wiki_world = "http://wiki.ubuntu.com/"
  53 
  54 # Base string for the newsletter
  55 new_base = "NewsletterItaliana/"
  56 
  57 line_break = "<<BR>>\n"
  58 wiki_category = "CategoryComunita"
  59 dict_type = "<type 'dict'>"
  60 
  61 # Default name for the output file
  62 default_output = "newsletter-output.txt"
  63 
  64 # Used for the email version
  65 comment = "##"
  66 
  67 # The year of the newsletter
  68 year = ""
  69 
  70 # The number of the newsletter
  71 number = ""
  72 
  73 # XML-RPC after the upgrade to version 1.9 of the Italian wiki
  74 # is not working anymore. Keep the function in case we have time
  75 # to enable it again
  76 def get_newsletter():
  77     """
  78     Read the newsletter wiki text directly online
  79     
  80     Needs XML-RPC enabled on the wiki.
  81     """
  82     global new_base, year, number
  83     
  84     wiki = xmlrpclib.ServerProxy(wiki_base+"?action=xmlrpc2")
  85 
  86     pagina = new_base + year + "." + number
  87 
  88     pagedata = wiki.getPage(pagina)
  89 
  90     page_type = str(type(pagedata))
  91 
  92     if page_type == dict_type:
  93         print "*** Error: page does not exist."
  94         sys.exit(2)
  95     else:
  96         read_newsletter(pagedata)    
  97 
  98 def read_newsletter(options):
  99     """
 100     Open the input file, create the output file and do the parsing
 101     """
 102     inputfile = options.inputfile
 103     
 104     if (options.outputfile != default_output):
 105         outputfile = os.path.abspath(options.outputfile)
 106     else:
 107         outputfile = os.path.expanduser("~" + os.sep + options.outputfile)
 108 
 109     try:
 110         infile = open(inputfile, 'r')
 111     except IOError, e:
 112         print "*** Error opening input file %s" % inputfile
 113         sys.exit(2)
 114 
 115     try:
 116         outfile = open(outputfile, 'w')
 117     except IOError, e:
 118         print "*** Error opening output file %s" % outputfile
 119         sys.exit(2)
 120 
 121     temp = ""
 122     towrite = ""
 123 
 124     print "Reading newsletter text from %s..." % inputfile
 125     while True:
 126         string = infile.readline()
 127  
 128         if string == "": # EOF
 129             False
 130             break
 131         
 132         towrite = check_string(string)
 133         
 134         if towrite == None:
 135             pass
 136         else:
 137             temp += towrite
 138 
 139     print "Writing output file..."
 140 
 141     outfile.write(temp)
 142     infile.close()
 143     outfile.close()
 144 
 145     print "Newsletter created in %s." % outputfile
 146 
 147 def check_string(string):
 148     u"""Check the string and return it cleaned from all
 149     the wiki syntax
 150     
 151     @string: the string to analyze
 152     """
 153     exp = []
 154     nexp = []
 155     nnexp = []
 156 
 157     # Remove the ACL string
 158     if re.findall('\#acl',string) != []:
 159         return None
 160     # Remove the format string
 161     elif re.findall('\#format', string) != []:
 162         return None
 163     # Remove the language string
 164     elif re.findall('\#LANGUAGE', string) != []:
 165         return None
 166     # Remove all tables
 167     elif re.findall('\|\|\<table', string) != []:
 168         return None
 169     # Remove all horizontal rules
 170     elif re.findall('\-{4,6}', string) != []:
 171         return None
 172     # Line for e-mail version is kept
 173     elif re.findall('##Per la versione in linea', string) != []:
 174         string = string.replace(comment, "")
 175     # Remove commented lines
 176     elif re.findall('^#{2,2}', string) != []:
 177         return None
 178     # Remove all attachments
 179     elif re.findall('attachment', string) != []:
 180         return None
 181     # Remove all images
 182     # TODO should we process the string and keep the link to the image?
 183     elif re.findall('<<Immagine\(.*?>>', string) != []:
 184         return None
 185     # Remove the index macro
 186     elif re.findall('<<Indice\(?.*?>>', string) != []:
 187         return None
 188     # Titles are kept
 189     elif re.findall('\={1,3}\s.*?\s\={1,3}', string) != []:
 190         return ("\n") + string
 191     # Each break/newline is substituted with the real newline
 192     elif re.findall('<<BR>>\\n', string) != []:
 193         string = string.replace(line_break, "\n")
 194     # Remove the category
 195     elif re.match(wiki_category, string):
 196         return None
 197 
 198     # Remove all single quotes from the string, they have to be at least two
 199     exp = re.findall('\'{2,5}.*?\'{2,5}', string)
 200 
 201     if exp != []:
 202         for word in exp:
 203             nexp.append(word.replace("'", ""))
 204 
 205         for i in range(len(exp)):
 206             string = string.replace(exp[i], nexp[i])
 207 
 208     # Remove all back-quotes
 209     exp = re.findall('\`{1,2}', string)
 210     
 211     if exp != []:
 212         string = string.replace("`", "")
 213 
 214     exp = []
 215     nexp = []
 216 
 217     # Remove multiple blank lines
 218     exp = re.findall('^$\\n', string)
 219    
 220     if exp:
 221         for word in exp:
 222             nexp.append(word.replace("\n", ""))
 223 
 224         for i in range(len(exp)):
 225             string = string.replace(exp[i], nexp[i])
 226 
 227     # Remove unuseful exclamation marks
 228     exp = re.findall('\s\!', string)
 229 
 230     if exp:
 231         string = string.replace("!", "")
 232 
 233     exp = []
 234     nexp = []
 235 
 236     # Look for all the http links
 237     exp = re.findall('\[{2,2}http[s]*\:/{2,2}[|:*\w\S]+\s*\|\s*[\#*\(*\)*\:*,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 238     
 239     if exp != []:
 240         nnexp = replace_square(exp)
 241         
 242         newstring = ""
 243         
 244         for word in nnexp:
 245             splitted = word.split("|")
 246             for split in splitted[1:]:
 247                 newstring += split + " "
 248                 
 249             newstring += "( " + splitted[0].strip() + " )"
 250             nexp.append(newstring)
 251             newstring = ""
 252             
 253         for i in range(len(exp)):
 254             string = string.replace(exp[i], nexp[i])
 255             
 256     exp = []
 257     nexp = []
 258     nnexp = []
 259     
 260     # Look for the wiki links
 261     exp = re.findall('\[{2,2}(?!http[s]*\:/{2,2})(?!Ubuntu\:)[\w\S\d]+\s*\|\s*[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 262     
 263     if exp != []:
 264         nnexp = replace_square(exp)
 265         
 266         newstring = ""
 267         
 268         for word in nnexp:
 269             splitted = word.split("|")
 270             for split in splitted[1:]:
 271                 newstring += split + " "
 272                 
 273             newstring += "( " + wiki_base + splitted[0].strip() + " )"
 274             nexp.append(newstring)
 275             newstring = ""
 276             
 277         for i in range(len(exp)):
 278             string = string.replace(exp[i], nexp[i])
 279 
 280     exp = []
 281     nexp = []
 282     nnexp = []
 283 
 284     # Link to the international wiki
 285     exp = re.findall('\[{2,2}(?!http[s]*:/{2,2})Ubuntu\:[\w\S\d]+\s*\|[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 286 
 287     if exp != []:
 288         nnexp = replace_square(exp)
 289 
 290         newstring = ""
 291 
 292         for word in nnexp:
 293             splitted = word.split("|")
 294             for split in splitted[1:]:
 295                 newstring += split + " "
 296 
 297             # Separate the real name of the page
 298             base = splitted[0].split(":")
 299             
 300             newstring += "( " + wiki_world + base[1].strip()  + " )"
 301             nexp.append(newstring)
 302             newstring = ""
 303         
 304         for i in range(len(exp)):
 305             string = string.replace(exp[i], nexp[i])    
 306 
 307         
 308     return string
 309 
 310 def replace_square(exp):
 311     """
 312     Remove the square brackets from the string
 313     @exp: the list with the strings to clean
 314     """
 315     nsq = []
 316     nnsq = []
 317     
 318     for word in exp:
 319         nsq.append(word.replace("[[", ""))
 320 
 321     for word in nsq:
 322         nnsq.append(word.replace("]]", ""))
 323 
 324     return nnsq
 325 
 326 def get_newsletter_number():
 327     global year, number
 328 
 329     while True:
 330         year = raw_input("Please insert the newsletter year: ")
 331 
 332         for i in year:
 333             if i in alpha:
 334                 print "Bad data!"
 335             
 336         if len(year) > 4 or len(year) < 4:
 337             print "The year you typed is wrong."
 338         else:
 339             break
 340 
 341     while True:
 342         number = raw_input("Please insert the newsletter number: ")
 343 
 344         # TODO: check if it is empty?
 345 
 346         for i in number:
 347             if i in alpha:
 348                 print "Bad data!"
 349 
 350         lung = len(number)
 351 
 352         if lung > 3:
 353             print "Invalid newsletter number."
 354             True
 355             continue
 356         elif lung == 2:
 357             number = "0" + number
 358             False
 359             break
 360         elif lung == 1:
 361             number = "00" + number
 362             False
 363             break
 364 
 365 def define_optionparser():
 366     usage = "Usage: %prog [option] arg..."
 367     version = "%prog " + prog_ver
 368     
 369     parser = OptionParser(usage=usage, version=version)
 370     
 371     parser.add_option("-i", "--input", metavar="FILE", action="store", type="string", dest="inputfile", help="the input file to read")
 372     parser.add_option("-o", "--output", metavar="FILE", action="store", type="string", dest="outputfile", help="the name of the output file; default value is 'newsletter-out.txt' and will be written in the user dir", default=default_output)
 373 
 374     (options, args) = parser.parse_args()
 375     
 376     if (len(sys.argv[1:]) == 0):
 377         parser.error("you need to specify the input file.")
 378         sys.exit(2)
 379     elif (options.inputfile == None):
 380         parser.error("you need to specify the input file.")
 381         sys.exit(2)
 382     
 383     return options
 384 
 385 def main():
 386     # Temporally removed since we cannot download the newsletter directly anymore 
 387     # get_newsletter_number()
 388     
 389     options = define_optionparser()
 390     
 391     read_newsletter(options)    
 392 
 393 if __name__ == "__main__":
 394     main()
 395     sys.exit(0)
Allegati

Per riferirsi agli allegati di una pagina, usare attachment:NOME_FILE, come mostrato qui sotto nell'elenco degli allegati. NON usare l'URL che si trova in corrispondenza del collegamento [scarica], potrebbe cambiare in futuro.
Non è consentito inserire allegati su questa pagina.