Skip to content

Commit f2683e7

Browse files
authored
Merge pull request #47 from c2corg/polish-clean-rc
Fine polish clean recent changes
2 parents eeea9bd + d19b7ef commit f2683e7

6 files changed

Lines changed: 79 additions & 47 deletions

File tree

campbot/__main__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
CampBot, Python bot framework for camptocamp.org
33
44
Usage:
5-
campbot clean_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
5+
campbot clean_rc <days> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
66
campbot report_rc <days> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>]
7-
campbot clean <url_or_file> <langs> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
7+
campbot clean <url_or_file> <lang> <thread_url> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
88
campbot report <url_or_file> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>]
99
campbot contribs [--out=<filename>] [--starts=<start_date>] [--ends=<end_date>] [--delay=<seconds>]
1010
campbot export <url> [--out=<filename>] [--delay=<seconds>]
@@ -28,7 +28,7 @@
2828
filename is also accepted, and must be like :
2929
123 | r
3030
456 | w
31-
<langs> is comma-separated lang identifiers, like fr,de for french and german.
31+
<lang> is a lang identifier, like fr for french.
3232
report Make quality report on documents.
3333
contribs Export all contribution in a CSV file. <start_date> and <end_date> are like 2018-05-12
3434
export Export all documents in a CSV file.
@@ -91,7 +91,10 @@ def main(args):
9191

9292
elif args["clean_rc"]:
9393
get_campbot(args).clean_recent_changes(
94-
days=int(args["<days>"]), lang="fr", ask_before_saving=not args["--batch"],
94+
days=float(args["<days>"]),
95+
lang=args["<lang>"],
96+
ask_before_saving=not args["--batch"],
97+
thread_url=args["<thread_url>"],
9598
)
9699

97100
elif args["report"]:
@@ -102,8 +105,9 @@ def main(args):
102105
elif args["clean"]:
103106
get_campbot(args).clean(
104107
args["<url_or_file>"],
105-
langs=args["<langs>"].split(","),
108+
lang=args["<lang>"],
106109
ask_before_saving=not args["--batch"],
110+
thread_url=args["<thread_url>"],
107111
clean_bbcode=args["--bbcode"],
108112
)
109113

campbot/checkers.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,6 @@ def report_recent_changes(bot, days, lang, thread_url):
202202
tests = get_fixed_tests(lang)
203203
tests += get_re_tests(bot.forum.get_post(url=thread_url), lang)
204204

205-
logging.info(f"Get modified documents from {oldest_date} to {newest_date}")
206205
items = bot.get_modified_documents(
207206
lang=lang, oldest_date=oldest_date, newest_date=newest_date
208207
).values()

campbot/core.py

Lines changed: 62 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import requests
88
from datetime import datetime, timedelta
99
from dateutil import parser
10-
from collections import OrderedDict
10+
from collections import OrderedDict, defaultdict
1111
import pytz
1212
import logging
1313
import time
@@ -523,24 +523,24 @@ def _get_documents_from_url(self, url):
523523
constructor, filters = _parse_filter(url)
524524
return self.wiki.get_documents(filters, constructor=constructor)
525525

526-
def clean(self, url_or_filename, langs, ask_before_saving=True, clean_bbcode=False):
526+
def clean(
527+
self, url_or_filename, lang, ask_before_saving, thread_url, clean_bbcode=False
528+
):
527529
"""
528530
Clean a set of document.
529531
530532
:param url_or_filename: Camptocamp.org URL, or filename
531-
:param langs: comma-separated list of lang identifiers
533+
:param lang: lang identifier
532534
:param ask_before_saving: Boolean
533535
:param clean_bbcode: Boolean
534536
535537
"""
536538

537-
assert len(langs) != 0
538-
539539
documents = self.get_documents(url_or_filename)
540-
processors = get_automatic_replacments(self, clean_bbcode)
540+
report_header = f"Clean documents from `{url_or_filename}`"
541541

542542
self._process_documents(
543-
documents, processors, langs, ask_before_saving, excluded_ids=[996571,]
543+
documents, lang, ask_before_saving, report_header, thread_url, clean_bbcode
544544
)
545545

546546
def report(self, url_or_filename, lang):
@@ -581,48 +581,77 @@ def report(self, url_or_filename, lang):
581581
print("\n".join(stdout_report))
582582

583583
def _process_documents(
584-
self, documents, processors, langs, ask_before_saving=True, excluded_ids=None
584+
self,
585+
documents,
586+
lang,
587+
ask_before_saving,
588+
report_header,
589+
thread_url,
590+
clean_bbcode=False,
585591
):
586592

587-
for document in documents:
593+
excluded_document_ids = [
594+
996571, # article with all automatic corrections
595+
]
596+
597+
processors = get_automatic_replacments(self, clean_bbcode=clean_bbcode)
588598

599+
report = defaultdict(int)
600+
601+
for document in documents:
589602
if "redirects_to" in document:
590-
pass # document id is not available...
603+
continue # document id is not available...
604+
605+
document_url = document.get_url()
606+
report[f"Inspected"] += 1
591607

592-
elif excluded_ids is not None and document.document_id in excluded_ids:
608+
if document.document_id in excluded_document_ids:
593609
pass
594610

595611
elif document.get("protected", False) and not self.moderator:
596-
print("{} is a protected".format(document.get_url()))
612+
logging.info(f"{document_url} is protected")
613+
report["Skipped because protected"] += 1
597614

598615
elif document.is_personal() and not self.moderator:
599-
print("{} is a personal".format(document.get_url()))
616+
logging.info(f"{document_url} is a personal document")
617+
report["Skipped because is not CC-BY-SA"] += 1
600618

601619
elif not document.is_valid():
602-
print(
603-
"{} : {}".format(
604-
document.get_url(), document.get_invalidity_reason()
605-
)
606-
)
620+
reason = document.get_invalidity_reason()
621+
logging.info(f"{document_url} : {reason}")
622+
report[f"Skipped because {reason}"] += 1
607623

608624
else:
609625
messages = []
610626
must_save = False
611627

612628
for processor in processors:
613629
if processor.ready_for_production:
614-
if processor(document, langs):
630+
if processor(document, [lang,]):
615631
messages.append(processor.comment)
616632
must_save = True
617633

618634
if must_save:
619635
comment = ", ".join(messages)
620636
try:
621-
document.save(comment, ask_before_saving=ask_before_saving)
622-
except Exception as e:
623-
print(
624-
"Error while saving {} :\n{}".format(document.get_url(), e)
637+
new_document = document.save(
638+
comment, ask_before_saving=ask_before_saving
625639
)
640+
except Exception as e:
641+
report["Unexpcted error"] += 1
642+
logging.error(f"Error while saving {document_url} :\n{e}")
643+
else:
644+
if new_document is None:
645+
report["Skipped by bot owner"] += 1
646+
else:
647+
report["Corrected"] += 1
648+
649+
log_report = "\n".join(
650+
[f"* `{bucket}`: {count}" for bucket, count in report.items()]
651+
)
652+
self.forum.post_message(f"### {report_header}\n\n{log_report}", thread_url)
653+
654+
return report
626655

627656
def export(self, url, filename=None):
628657
"""
@@ -719,6 +748,10 @@ def write(**kwargs):
719748
def get_modified_documents(
720749
self, lang, oldest_date=None, newest_date=None, excluded_users=()
721750
):
751+
logging.info(
752+
f"Get modified documents from {oldest_date} to {newest_date} in lang:{lang}"
753+
)
754+
722755
result = OrderedDict()
723756
for contrib in self.wiki.get_contributions(
724757
oldest_date=oldest_date, newest_date=newest_date
@@ -738,15 +771,11 @@ def get_modified_documents(
738771

739772
return result
740773

741-
def clean_recent_changes(self, days, lang, ask_before_saving):
774+
def clean_recent_changes(self, days, lang, ask_before_saving, thread_url):
742775
newest_date = utils.today().replace(hour=0, minute=0, second=0, microsecond=0)
743776
oldest_date = newest_date - timedelta(days=days)
744777

745-
excluded_ids = [
746-
996571,
747-
]
748-
749-
processors = get_automatic_replacments(self)
778+
report_header = f"Clean recent change from `{oldest_date}` to `{newest_date}`"
750779

751780
def get_documents():
752781

@@ -761,12 +790,11 @@ def get_documents():
761790
document_id, document_type=document_type
762791
)
763792

764-
if document_id not in excluded_ids:
765-
yield document
793+
yield document
766794

767-
print("Fix recent changes")
768-
self._process_documents(get_documents(), processors, [lang,], ask_before_saving)
769-
print("Fix recent changes finished")
795+
self._process_documents(
796+
get_documents(), lang, ask_before_saving, report_header, thread_url
797+
)
770798

771799
def get_new_contributors(self, contrib_threshold=20, outings_threshold=15):
772800
with open("contributors.txt", "r") as f:

campbot/objects.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from __future__ import print_function, unicode_literals, division
2626

2727
import re
28+
import logging
2829
from .differ import get_diff_report
2930

3031

@@ -289,7 +290,7 @@ def save(self, message, ask_before_saving=True):
289290
if _input("Save {} : {}, y/[n] ?\n".format(self.get_url(), message)) != "y":
290291
return None
291292
else:
292-
print("Saving {} : {}".format(self.get_url(), message))
293+
logging.info(f"Saving {self.get_url()} : {message}")
293294

294295
return self._campbot.wiki.put(
295296
"/{}/{}".format(self.url_path, self.document_id),

docs/CLI/clean.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ Command line
99

1010
.. code-block:: bash
1111
12-
campbot clean <url> <langs> --login=<login> --password=<password> [--delay=<seconds>] [--bbcode]
12+
campbot clean <url> <lang> --login=<login> --password=<password> [--delay=<seconds>] [--bbcode]
1313
1414
Options and arguments
1515
---------------------
1616

1717
* ``<url>`` is like https://www.camptocamp.org/routes#w=940468 : all routes associated to waypoint 940468 will be cleaned. Shorthand ``routes#w=940468`` is accepted.
18-
* ``<langs>`` is a comma-saprated list of langs, like fr,de. Clean procedure will impacts only this langs.
18+
* ``<lang>`` is a lang identifier, like fr or de. Clean procedure will impacts only this lang.
1919

2020
Clean processors
2121
----------------

docs/CLI/index.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ Here isthe help you get by typing ``campbot`` in you command line tool :
88
CampBot, Python bot framework for camptocamp.org
99
1010
Usage:
11-
campbot clean_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
12-
campbot report_rc <days> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
13-
campbot clean <url_or_file> <langs> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
11+
campbot clean_rc <days> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
12+
campbot report_rc <days> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch]
13+
campbot clean <url_or_file> <lang> [--login=<login>] [--password=<password>] [--delay=<seconds>] [--batch] [--bbcode]
1414
campbot contribs [--out=<filename>] [--starts=<start_date>] [--ends=<end_date>] [--delay=<seconds>]
1515
campbot export <url> [--out=<filename>] [--delay=<seconds>]
1616
@@ -33,7 +33,7 @@ Here isthe help you get by typing ``campbot`` in you command line tool :
3333
filename is also accepted, and must be like :
3434
123 | r
3535
456 | w
36-
<langs> is comma-separated lang identifiers, like fr,de for french and german.
36+
<lang> is a lang identifiers, like fr for french.
3737
contribs Export all contribution in a CSV file. <start_date> and <end_date> are like 2018-05-12
3838
export Export all documents in a CSV file.
3939
<url> is like https://www.camptocamp.org/outings#u=2, or, simplier, outings#u=2

0 commit comments

Comments
 (0)