attachment:newsletter-ml.py di GruppoPromozione/SocialMedia/Newsletter/LineeGuidaEditori

Allegato "newsletter-ml.py"

   1 #!/usr/bin/python3
   2 # -*- coding: UTF-8 -*-
   3 
   4 # Copyright (C) 2007 Milo Casagrande <milo@ubuntu.com>
   5 # Copyright (C) 2011 Milo Casagrande <milo@ubuntu.com>
   6 # Copyright (C) 2023 Mattia Rizzolo <mapreri@ubuntu.com>
   7 #
   8 # This program is free software; you can redistribute it and/or modify it
   9 # under the terms of the GNU General Public License as published by the Free
  10 # Software Foundation; either version 2 of the License, or (at your option)
  11 # any later version
  12 #
  13 # This program is distributed in the hope that it will be useful, but
  14 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  15 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  16 # more details
  17 #
  18 # You should have received a copy of the GNU General Public License along with
  19 # this program; if not, write to the Free Software Foundation, Inc.,
  20 # 51 Franklin Street, Fifth Floor, Boston, MA, 02110-1301 USA.
  21 
  22 '''
  23 Simple script to convert the Italian newsletter into pure text
  24 cleaning all the MoinMoin wiki syntax, in order to send the newsletter
  25 via email.
  26 '''
  27 
  28 import sys
  29 import re
  30 import string
  31 import os
  32 import xmlrpc.client
  33 from optparse import OptionParser
  34 
  35 alpha = string.ascii_letters
  36 
  37 prog_ver = "0.0.5"
  38 
  39 # URL of the Italian wiki
  40 wiki_base = "https://wiki.ubuntu-it.org/"
  41 
  42 # URL of the International wiki
  43 wiki_world = "https://wiki.ubuntu.com/"
  44 
  45 # Base string for the newsletter
  46 new_base = "NewsletterItaliana/"
  47 
  48 line_break = "<<BR>>\n"
  49 wiki_category = "CategoryComunita"
  50 dict_type = "<type 'dict'>"
  51 
  52 # Default name for the output file
  53 default_output = "newsletter-output.txt"
  54 
  55 # Used for the email version
  56 comment = "##"
  57 
  58 # The year of the newsletter
  59 year = ""
  60 
  61 # The number of the newsletter
  62 number = ""
  63 
  64 
  65 # XML-RPC after the upgrade to version 1.9 of the Italian wiki
  66 # is not working anymore. Keep the function in case we have time
  67 # to enable it again
  68 def get_newsletter():
  69     """
  70     Read the newsletter wiki text directly online
  71 
  72     Needs XML-RPC enabled on the wiki.
  73     """
  74     global new_base, year, number
  75 
  76     wiki = xmlrpc.client.ServerProxy(wiki_base+"?action=xmlrpc2")
  77 
  78     pagina = new_base + year + "." + number
  79 
  80     pagedata = wiki.getPage(pagina)
  81 
  82     page_type = str(type(pagedata))
  83 
  84     if page_type == dict_type:
  85         print("*** Error: page does not exist.")
  86         sys.exit(2)
  87     else:
  88         read_newsletter(pagedata)
  89 
  90 
  91 def read_newsletter(options):
  92     """
  93     Open the input file, create the output file and do the parsing
  94     """
  95     inputfile = options.inputfile
  96 
  97     if (options.outputfile != default_output):
  98         outputfile = os.path.abspath(options.outputfile)
  99     else:
 100         outputfile = os.path.expanduser("~" + os.sep + options.outputfile)
 101 
 102     try:
 103         infile = open(inputfile, 'r')
 104     except IOError as e:
 105         print("*** Error opening input file %s: %s" % (inputfile, e))
 106         sys.exit(2)
 107 
 108     try:
 109         outfile = open(outputfile, 'w')
 110     except IOError as e:
 111         print("*** Error opening output file %s: %s" % (outputfile, e))
 112         sys.exit(2)
 113 
 114     temp = ""
 115     towrite = ""
 116 
 117     print("Reading newsletter text from %s..." % inputfile)
 118     while True:
 119         string = infile.readline()
 120 
 121         if string == "":  # EOF
 122             False
 123             break
 124 
 125         towrite = check_string(string)
 126 
 127         if towrite is None:
 128             pass
 129         else:
 130             temp += towrite
 131 
 132     # Last line with links to social media accounts (special cased, as the wiki has a table we removed)
 133     temp += "Facebook ( https://www.facebook.com/ubuntu.it ), "
 134     temp += "Twitter ( https://twitter.com/ubuntuit ), "
 135     temp += "YouTube ( https://youtube.com/ubuntuitpromozione ) e "
 136     temp += "Telegram ( https://telegram.me/ubuntuit ).\n"
 137     print("Writing output file...")
 138 
 139     outfile.write(temp)
 140     infile.close()
 141     outfile.close()
 142 
 143     print("Newsletter created in %s." % outputfile)
 144 
 145 
 146 def check_string(string):
 147     u"""Check the string and return it cleaned from all
 148     the wiki syntax
 149 
 150     @string: the string to analyze
 151     """
 152     exp = []
 153     nexp = []
 154     nnexp = []
 155 
 156     # Remove the ACL string
 157     if re.findall(r'^#acl', string):
 158         return None
 159     # Remove the format string
 160     elif re.findall(r'^#format', string):
 161         return None
 162     # Remove the language string
 163     elif re.findall(r'^#LANGUAGE', string):
 164         return None
 165     # Remove all tables
 166     elif re.findall(r'^\|\|\<(?:table|row)style', string):
 167         return None
 168     # Remove all horizontal rules
 169     elif re.findall(r'-{4,6}', string):
 170         return None
 171     # Line for e-mail version is kept
 172     elif re.findall(r'##Per la versione in linea', string):
 173         string = string.replace(comment, "")
 174     # Remove commented lines
 175     elif re.findall('^#{2,2}', string):
 176         return None
 177     # Remove all attachments
 178     elif re.findall('attachment', string):
 179         return None
 180     # Remove all images
 181     # TODO should we process the string and keep the link to the image?
 182     elif re.findall(r'<<Immagine\(.*?>>', string):
 183         return None
 184     # Remove the index macro
 185     elif re.findall(r'<<Indice\(?.*?>>', string):
 186         return None
 187     # Remove included pieces (like the header lines)
 188     elif re.findall(r'^<<Include\(', string):
 189         return None
 190     # Titles are kept
 191     elif re.findall(r'={1,3}\s.*?\s={1,3}', string):
 192         return ("\n") + string
 193     # Each break/newline is substituted with the real newline
 194     elif re.findall('<<BR>>\\n', string):
 195         string = string.replace(line_break, "\n")
 196     # Remove the category
 197     elif re.match(wiki_category, string):
 198         return None
 199 
 200     # Remove all single quotes from the string, they have to be at least two
 201     exp = re.findall('\'{2,5}.*?\'{2,5}', string)
 202 
 203     if exp != []:
 204         for word in exp:
 205             nexp.append(word.replace("'", ""))
 206 
 207         for i in range(len(exp)):
 208             string = string.replace(exp[i], nexp[i])
 209 
 210     # Remove all back-quotes
 211     exp = re.findall(r'`{1,2}', string)
 212 
 213     if exp != []:
 214         string = string.replace("`", "")
 215 
 216     exp = []
 217     nexp = []
 218 
 219     # Remove multiple blank lines
 220     exp = re.findall('^$\\n', string)
 221 
 222     if exp:
 223         for word in exp:
 224             nexp.append(word.replace("\n", ""))
 225 
 226         for i in range(len(exp)):
 227             string = string.replace(exp[i], nexp[i])
 228 
 229     # Remove unuseful exclamation marks
 230     exp = re.findall(r'\s!', string)
 231 
 232     if exp:
 233         string = string.replace("!", "")
 234 
 235     exp = []
 236     nexp = []
 237 
 238     # Look for all the http links
 239     exp = re.findall(r'\[{2,2}http[s]*\:/{2,2}[|:*\w\S]+\s*\|\s*[\#*\(*\)*\:*,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 240 
 241     if exp != []:
 242         nnexp = replace_square(exp)
 243 
 244         newstring = ""
 245 
 246         for word in nnexp:
 247             splitted = word.split("|")
 248             for split in splitted[1:]:
 249                 newstring += split + " "
 250 
 251             newstring += "( " + splitted[0].strip() + " )"
 252             nexp.append(newstring)
 253             newstring = ""
 254 
 255         for i in range(len(exp)):
 256             string = string.replace(exp[i], nexp[i])
 257 
 258     exp = []
 259     nexp = []
 260     nnexp = []
 261 
 262     # Look for the wiki links
 263     # exp = re.findall(r'\[{2,2}(?!http[s]*\:/{2,2})(?!Ubuntu\:)[\w\S\d]+\s*\|\s*[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 264     exp = re.findall(r"\[{2}\s*(?!Ubuntu:)\S+\s*(?:\|\s*[\s\w\d,’'.+-]+)?\]{2}", string)
 265 
 266     if exp:
 267         nnexp = replace_square(exp)
 268 
 269         newstring = ""
 270 
 271         for word in nnexp:
 272             splitted = word.split("|")
 273             if len(splitted) == 1:
 274                 newstring = word.strip() + " "
 275             else:
 276                 for split in splitted[1:]:
 277                     newstring += split + " "
 278 
 279             newstring += "( " + wiki_base + splitted[0].strip() + " )"
 280             nexp.append(newstring)
 281             newstring = ""
 282 
 283         for i in range(len(exp)):
 284             string = string.replace(exp[i], nexp[i])
 285 
 286     exp = []
 287     nexp = []
 288     nnexp = []
 289 
 290     # Link to the international wiki
 291     exp = re.findall(r'\[{2,2}(?!http[s]*:/{2,2})Ubuntu\:[\w\S\d]+\s*\|[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
 292 
 293     if exp != []:
 294         nnexp = replace_square(exp)
 295 
 296         newstring = ""
 297 
 298         for word in nnexp:
 299             splitted = word.split("|")
 300             for split in splitted[1:]:
 301                 newstring += split + " "
 302 
 303             # Separate the real name of the page
 304             base = splitted[0].split(":")
 305 
 306             newstring += "( " + wiki_world + base[1].strip() + " )"
 307             nexp.append(newstring)
 308             newstring = ""
 309 
 310         for i in range(len(exp)):
 311             string = string.replace(exp[i], nexp[i])
 312 
 313     return string
 314 
 315 
 316 def replace_square(exp):
 317     """
 318     Remove the square brackets from the string
 319     @exp: the list with the strings to clean
 320     """
 321     nsq = []
 322     nnsq = []
 323 
 324     for word in exp:
 325         nsq.append(word.replace("[[", ""))
 326 
 327     for word in nsq:
 328         nnsq.append(word.replace("]]", ""))
 329 
 330     return nnsq
 331 
 332 
 333 def get_newsletter_number():
 334     global year, number
 335 
 336     while True:
 337         year = input("Please insert the newsletter year: ")
 338 
 339         for i in year:
 340             if i in alpha:
 341                 print("Bad data!")
 342 
 343         if len(year) > 4 or len(year) < 4:
 344             print("The year you typed is wrong.")
 345         else:
 346             break
 347 
 348     while True:
 349         number = input("Please insert the newsletter number: ")
 350 
 351         # TODO: check if it is empty?
 352 
 353         for i in number:
 354             if i in alpha:
 355                 print("Bad data!")
 356 
 357         lung = len(number)
 358 
 359         if lung > 3:
 360             print("Invalid newsletter number.")
 361             True
 362             continue
 363         elif lung == 2:
 364             number = "0" + number
 365             False
 366             break
 367         elif lung == 1:
 368             number = "00" + number
 369             False
 370             break
 371 
 372 
 373 def define_optionparser():
 374     usage = "Usage: %prog [option] arg..."
 375     version = "%prog " + prog_ver
 376 
 377     parser = OptionParser(usage=usage, version=version)
 378 
 379     parser.add_option("-i", "--input", metavar="FILE", action="store", type="string", dest="inputfile", help="the input file to read")
 380     parser.add_option("-o", "--output", metavar="FILE", action="store", type="string", dest="outputfile", help="the name of the output file; default value is 'newsletter-out.txt' and will be written in the user dir", default=default_output)
 381 
 382     (options, args) = parser.parse_args()
 383 
 384     if len(sys.argv[1:]) == 0:
 385         parser.error("you need to specify the input file.")
 386         sys.exit(2)
 387     elif options.inputfile is None:
 388         parser.error("you need to specify the input file.")
 389         sys.exit(2)
 390 
 391     return options
 392 
 393 
 394 def main():
 395     # Removed since we cannot download the newsletter directly anymore
 396     # get_newsletter_number()
 397 
 398     options = define_optionparser()
 399 
 400     read_newsletter(options)
 401 
 402 
 403 if __name__ == "__main__":
 404     main()
 405     sys.exit(0)
Allegati

Per riferirsi agli allegati di una pagina, usare attachment:NOME_FILE, come mostrato qui sotto nell'elenco degli allegati. NON usare l'URL che si trova in corrispondenza del collegamento [scarica], potrebbe cambiare in futuro.
Non è consentito inserire allegati su questa pagina.