Allegato "newsletter-ml.py"
Scarica 1 #!/usr/bin/python3
2 # -*- coding: UTF-8 -*-
3
4 # Copyright (C) 2007 Milo Casagrande <milo@ubuntu.com>
5 # Copyright (C) 2011 Milo Casagrande <milo@ubuntu.com>
6 # Copyright (C) 2023 Mattia Rizzolo <mapreri@ubuntu.com>
7 #
8 # This program is free software; you can redistribute it and/or modify it
9 # under the terms of the GNU General Public License as published by the Free
10 # Software Foundation; either version 2 of the License, or (at your option)
11 # any later version
12 #
13 # This program is distributed in the hope that it will be useful, but
14 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 # more details
17 #
18 # You should have received a copy of the GNU General Public License along with
19 # this program; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA, 02110-1301 USA.
21
22 '''
23 Simple script to convert the Italian newsletter into pure text
24 cleaning all the MoinMoin wiki syntax, in order to send the newsletter
25 via email.
26 '''
27
28 import sys
29 import re
30 import string
31 import os
32 import xmlrpc.client
33 from optparse import OptionParser
34
35 alpha = string.ascii_letters
36
37 prog_ver = "0.0.5"
38
39 # URL of the Italian wiki
40 wiki_base = "https://wiki.ubuntu-it.org/"
41
42 # URL of the International wiki
43 wiki_world = "https://wiki.ubuntu.com/"
44
45 # Base string for the newsletter
46 new_base = "NewsletterItaliana/"
47
48 line_break = "<<BR>>\n"
49 wiki_category = "CategoryComunita"
50 dict_type = "<type 'dict'>"
51
52 # Default name for the output file
53 default_output = "newsletter-output.txt"
54
55 # Used for the email version
56 comment = "##"
57
58 # The year of the newsletter
59 year = ""
60
61 # The number of the newsletter
62 number = ""
63
64
65 # XML-RPC after the upgrade to version 1.9 of the Italian wiki
66 # is not working anymore. Keep the function in case we have time
67 # to enable it again
68 def get_newsletter():
69 """
70 Read the newsletter wiki text directly online
71
72 Needs XML-RPC enabled on the wiki.
73 """
74 global new_base, year, number
75
76 wiki = xmlrpc.client.ServerProxy(wiki_base+"?action=xmlrpc2")
77
78 pagina = new_base + year + "." + number
79
80 pagedata = wiki.getPage(pagina)
81
82 page_type = str(type(pagedata))
83
84 if page_type == dict_type:
85 print("*** Error: page does not exist.")
86 sys.exit(2)
87 else:
88 read_newsletter(pagedata)
89
90
91 def read_newsletter(options):
92 """
93 Open the input file, create the output file and do the parsing
94 """
95 inputfile = options.inputfile
96
97 if (options.outputfile != default_output):
98 outputfile = os.path.abspath(options.outputfile)
99 else:
100 outputfile = os.path.expanduser("~" + os.sep + options.outputfile)
101
102 try:
103 infile = open(inputfile, 'r')
104 except IOError as e:
105 print("*** Error opening input file %s: %s" % (inputfile, e))
106 sys.exit(2)
107
108 try:
109 outfile = open(outputfile, 'w')
110 except IOError as e:
111 print("*** Error opening output file %s: %s" % (outputfile, e))
112 sys.exit(2)
113
114 temp = ""
115 towrite = ""
116
117 print("Reading newsletter text from %s..." % inputfile)
118 while True:
119 string = infile.readline()
120
121 if string == "": # EOF
122 False
123 break
124
125 towrite = check_string(string)
126
127 if towrite is None:
128 pass
129 else:
130 temp += towrite
131
132 # Last line with links to social media accounts (special cased, as the wiki has a table we removed)
133 temp += "Facebook ( https://www.facebook.com/ubuntu.it ), "
134 temp += "Twitter ( https://twitter.com/ubuntuit ), "
135 temp += "YouTube ( https://youtube.com/ubuntuitpromozione ) e "
136 temp += "Telegram ( https://telegram.me/ubuntuit ).\n"
137 print("Writing output file...")
138
139 outfile.write(temp)
140 infile.close()
141 outfile.close()
142
143 print("Newsletter created in %s." % outputfile)
144
145
146 def check_string(string):
147 u"""Check the string and return it cleaned from all
148 the wiki syntax
149
150 @string: the string to analyze
151 """
152 exp = []
153 nexp = []
154 nnexp = []
155
156 # Remove the ACL string
157 if re.findall(r'^#acl', string):
158 return None
159 # Remove the format string
160 elif re.findall(r'^#format', string):
161 return None
162 # Remove the language string
163 elif re.findall(r'^#LANGUAGE', string):
164 return None
165 # Remove all tables
166 elif re.findall(r'^\|\|\<(?:table|row)style', string):
167 return None
168 # Remove all horizontal rules
169 elif re.findall(r'-{4,6}', string):
170 return None
171 # Line for e-mail version is kept
172 elif re.findall(r'##Per la versione in linea', string):
173 string = string.replace(comment, "")
174 # Remove commented lines
175 elif re.findall('^#{2,2}', string):
176 return None
177 # Remove all attachments
178 elif re.findall('attachment', string):
179 return None
180 # Remove all images
181 # TODO should we process the string and keep the link to the image?
182 elif re.findall(r'<<Immagine\(.*?>>', string):
183 return None
184 # Remove the index macro
185 elif re.findall(r'<<Indice\(?.*?>>', string):
186 return None
187 # Remove included pieces (like the header lines)
188 elif re.findall(r'^<<Include\(', string):
189 return None
190 # Titles are kept
191 elif re.findall(r'={1,3}\s.*?\s={1,3}', string):
192 return ("\n") + string
193 # Each break/newline is substituted with the real newline
194 elif re.findall('<<BR>>\\n', string):
195 string = string.replace(line_break, "\n")
196 # Remove the category
197 elif re.match(wiki_category, string):
198 return None
199
200 # Remove all single quotes from the string, they have to be at least two
201 exp = re.findall('\'{2,5}.*?\'{2,5}', string)
202
203 if exp != []:
204 for word in exp:
205 nexp.append(word.replace("'", ""))
206
207 for i in range(len(exp)):
208 string = string.replace(exp[i], nexp[i])
209
210 # Remove all back-quotes
211 exp = re.findall(r'`{1,2}', string)
212
213 if exp != []:
214 string = string.replace("`", "")
215
216 exp = []
217 nexp = []
218
219 # Remove multiple blank lines
220 exp = re.findall('^$\\n', string)
221
222 if exp:
223 for word in exp:
224 nexp.append(word.replace("\n", ""))
225
226 for i in range(len(exp)):
227 string = string.replace(exp[i], nexp[i])
228
229 # Remove unuseful exclamation marks
230 exp = re.findall(r'\s!', string)
231
232 if exp:
233 string = string.replace("!", "")
234
235 exp = []
236 nexp = []
237
238 # Look for all the http links
239 exp = re.findall(r'\[{2,2}http[s]*\:/{2,2}[|:*\w\S]+\s*\|\s*[\#*\(*\)*\:*,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
240
241 if exp != []:
242 nnexp = replace_square(exp)
243
244 newstring = ""
245
246 for word in nnexp:
247 splitted = word.split("|")
248 for split in splitted[1:]:
249 newstring += split + " "
250
251 newstring += "( " + splitted[0].strip() + " )"
252 nexp.append(newstring)
253 newstring = ""
254
255 for i in range(len(exp)):
256 string = string.replace(exp[i], nexp[i])
257
258 exp = []
259 nexp = []
260 nnexp = []
261
262 # Look for the wiki links
263 # exp = re.findall(r'\[{2,2}(?!http[s]*\:/{2,2})(?!Ubuntu\:)[\w\S\d]+\s*\|\s*[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
264 exp = re.findall(r"\[{2}\s*(?!Ubuntu:)\S+\s*(?:\|\s*[\s\w\d,’'.+-]+)?\]{2}", string)
265
266 if exp:
267 nnexp = replace_square(exp)
268
269 newstring = ""
270
271 for word in nnexp:
272 splitted = word.split("|")
273 if len(splitted) == 1:
274 newstring = word.strip() + " "
275 else:
276 for split in splitted[1:]:
277 newstring += split + " "
278
279 newstring += "( " + wiki_base + splitted[0].strip() + " )"
280 nexp.append(newstring)
281 newstring = ""
282
283 for i in range(len(exp)):
284 string = string.replace(exp[i], nexp[i])
285
286 exp = []
287 nexp = []
288 nnexp = []
289
290 # Link to the international wiki
291 exp = re.findall(r'\[{2,2}(?!http[s]*:/{2,2})Ubuntu\:[\w\S\d]+\s*\|[,*\{*\}*+*\w\s\d.-]+\]{2,2}', string)
292
293 if exp != []:
294 nnexp = replace_square(exp)
295
296 newstring = ""
297
298 for word in nnexp:
299 splitted = word.split("|")
300 for split in splitted[1:]:
301 newstring += split + " "
302
303 # Separate the real name of the page
304 base = splitted[0].split(":")
305
306 newstring += "( " + wiki_world + base[1].strip() + " )"
307 nexp.append(newstring)
308 newstring = ""
309
310 for i in range(len(exp)):
311 string = string.replace(exp[i], nexp[i])
312
313 return string
314
315
316 def replace_square(exp):
317 """
318 Remove the square brackets from the string
319 @exp: the list with the strings to clean
320 """
321 nsq = []
322 nnsq = []
323
324 for word in exp:
325 nsq.append(word.replace("[[", ""))
326
327 for word in nsq:
328 nnsq.append(word.replace("]]", ""))
329
330 return nnsq
331
332
333 def get_newsletter_number():
334 global year, number
335
336 while True:
337 year = input("Please insert the newsletter year: ")
338
339 for i in year:
340 if i in alpha:
341 print("Bad data!")
342
343 if len(year) > 4 or len(year) < 4:
344 print("The year you typed is wrong.")
345 else:
346 break
347
348 while True:
349 number = input("Please insert the newsletter number: ")
350
351 # TODO: check if it is empty?
352
353 for i in number:
354 if i in alpha:
355 print("Bad data!")
356
357 lung = len(number)
358
359 if lung > 3:
360 print("Invalid newsletter number.")
361 True
362 continue
363 elif lung == 2:
364 number = "0" + number
365 False
366 break
367 elif lung == 1:
368 number = "00" + number
369 False
370 break
371
372
373 def define_optionparser():
374 usage = "Usage: %prog [option] arg..."
375 version = "%prog " + prog_ver
376
377 parser = OptionParser(usage=usage, version=version)
378
379 parser.add_option("-i", "--input", metavar="FILE", action="store", type="string", dest="inputfile", help="the input file to read")
380 parser.add_option("-o", "--output", metavar="FILE", action="store", type="string", dest="outputfile", help="the name of the output file; default value is 'newsletter-out.txt' and will be written in the user dir", default=default_output)
381
382 (options, args) = parser.parse_args()
383
384 if len(sys.argv[1:]) == 0:
385 parser.error("you need to specify the input file.")
386 sys.exit(2)
387 elif options.inputfile is None:
388 parser.error("you need to specify the input file.")
389 sys.exit(2)
390
391 return options
392
393
394 def main():
395 # Removed since we cannot download the newsletter directly anymore
396 # get_newsletter_number()
397
398 options = define_optionparser()
399
400 read_newsletter(options)
401
402
403 if __name__ == "__main__":
404 main()
405 sys.exit(0)
Allegati
Per riferirsi agli allegati di una pagina, usare attachment:NOME_FILE, come mostrato qui sotto nell'elenco degli allegati. NON usare l'URL che si trova in corrispondenza del collegamento [scarica], potrebbe cambiare in futuro.Non è consentito inserire allegati su questa pagina.