From fa1fee4964be18436ef59280480d0f9df99ccaad Mon Sep 17 00:00:00 2001 From: "Arnaud V." Date: Fri, 13 Dec 2013 22:21:47 +0100 Subject: [PATCH 1/4] Update and rename README to README.md --- README => README.md | 8 ++++++++ 1 file changed, 8 insertions(+) rename README => README.md (91%) diff --git a/README b/README.md similarity index 91% rename from README rename to README.md index f26ef74..415d366 100644 --- a/README +++ b/README.md @@ -1,3 +1,11 @@ +Wikipedia CLI +========= + + +@AnirudhBhat + + + This is quick weekend hack made to access Wikipedia from command line. From c6c3e31b5d9ffd84955ef281af595d5b4072408b Mon Sep 17 00:00:00 2001 From: "Arnaud V." Date: Fri, 13 Dec 2013 22:27:19 +0100 Subject: [PATCH 2/4] Update README.md --- README.md | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 415d366..d05b9cb 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,6 @@ Wikipedia CLI ========= +Command line interface to access wikipedia pages. -@AnirudhBhat - - - -This is quick weekend hack made to access Wikipedia from command line. - - -Its written in python.To compile it just run 'python cliwiki.py'(without quotes) from your terminal and then enter the search title to get Wikipedia page on your terminal! - - -It can also be given one of following 3 command line arguments. - -1)potd -2)featured -3)onthisday - -this is just a quick hack and there's lot of crappy code in it.So please feel free to fork and send pull request if u want to improve it. +This version is a fork of [AnirudhBhat](https://github.com/AnirudhBhat)'s and now run on python 3. From 5e95afc85f3cebf4d83742fcd1dbd8ec00ecefb4 Mon Sep 17 00:00:00 2001 From: Arnaud V Date: Fri, 13 Dec 2013 22:29:07 +0100 Subject: [PATCH 3/4] Moved to python 3, added docstrings, updated display, now show only urls for interesting links --- climovie.py | 45 ---------- cliwiki.py | 249 +++++++++++++++++++++++++++------------------------- 2 files changed, 128 insertions(+), 166 deletions(-) delete mode 100755 climovie.py mode change 100644 => 100755 cliwiki.py diff --git a/climovie.py b/climovie.py deleted file mode 100755 index 6aca2aa..0000000 --- a/climovie.py +++ /dev/null @@ -1,45 +0,0 @@ -#! /usr/bin/env python - -from __future__ import print_function -import urllib2 -import json - -API_KEY = "c5b633xj3ats73tmf9cez333" - -base_url = "http://api.rottentomatoes.com/api/public/v1.0" - - -type = "/lists/movies/box_office.lson?apikey=" - - -url = base_url + type + API_KEY - -result = json.load(urllib2.urlopen(url)) - -k=0 -z=0 - -for i in result["movies"]: - print(i["title"]) - print("SYNOPSIS",end="\n") - print(i["synopsis"],end="\n") - print("TRAILER",end="\n") - print(i["links"]["clips"],end="\n") - print("CASTS",end="\n") - for j in result["movies"][k]["abridged_cast"]: - print(j["name"]) - print(j["characters"],end="\n\n\n") - #for a in result["movies"]: - print("CRITICS_RATING:",end=" ") - print(result["movies"][z]["ratings"]["critics_rating"]) - print("CRITICS_SCORE:",end=" ") - print(result["movies"][z]["ratings"]["critics_score"]) - print("AUDIENCE_RATING:",end=" ") - print(result["movies"][z]["ratings"]["audience_rating"]) - print("AUDIENCE_SCORE:",end=" ") - print(result["movies"][z]["ratings"]["audience_score"],end="\n\n\n") - print("\t-----------------------------------------------------------") - #print(i["abridged_cast"][j]["name"]) - #print(i["abridged_cast"][j]["characters"],end="\n\n\n") - z=z+1 - k=k+1 diff --git a/cliwiki.py b/cliwiki.py old mode 100644 new mode 100755 index b87b854..d1303dd --- a/cliwiki.py +++ b/cliwiki.py @@ -1,180 +1,187 @@ -#! /usr/bin/env python +#! /usr/bin/env python3 + +""" CLI to access wikipedia informations """ -from __future__ import print_function import json -import urllib2 +import urllib.request import sys import re +# import argparse +# **** Global Variables **** +BASE_URL = "http://en.wikipedia.org/w/api.php?" +ACTION = "action=query" +FORMAT = "&format=json" +TITLES = "&titles=" -KEY = 0 -base_url = "http://en.wikipedia.org/w/api.php?" -action = "action=query" +# **** Functions **** +def get_title(): + """ Ask the user for a title and store the input """ -Format = "&format=json" + title = input('Enter the title you want to search --> \n') + global TITLES + TITLES += title.replace(' ','_') -titles="&titles=" +def wiki_search(): + """ Search function """ + prop = "&prop=extracts" + plaintext = "&explaintext" + section_format = "&exsectionformat=plain" + try: + url = (BASE_URL + ACTION + TITLES + + prop + plaintext + section_format + FORMAT) -def get_title(): - title = raw_input('enter the title you want to search\n') - title = title.replace(' ','_') - global titles - titles = titles + title + # open url, read content (bytes), convert in string via decode() + result = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) + + key = list(result['query']['pages'].keys())[0][0:] + + print(result['query']['pages'][key]['extract']) + + except KeyError: + print('No wikipedia page for that title. ' + 'Wikipedia search titles are case sensitive.') def url_and_displaytitle(): - print('\ntitle and url for this wikipedia site',end="\n") - global base_url - global action - global titles - global Format - prop = "&prop=info" - inprop = "&inprop=url|displaytitle" - url = base_url + action + titles + prop + inprop + Format - result = json.load(urllib2.urlopen(url)) - key = result['query']['pages'].keys() - global KEY - KEY = (key[0][:]) - print(result['query']['pages'][str(KEY)]['title']) - print(result['query']['pages'][str(KEY)]['fullurl']) - print('\t-------------------\t') - + """ Display URL and Title for the page """ -def interesting_links(): - print('\nyou may also be interested in the following links',end="\n") - global base_url - global Format - global action - global titles - prop = "&prop=extlinks" - try: - url = base_url + action + titles + prop + Format - result =json.load(urllib2.urlopen(url)) - key = result['query']['pages'].keys() - key = key[0][0:] - j = 0 - offset = result['query-continue']['extlinks']['eloffset'] - while j < offset: - print(result['query']['pages'][str(key)]['extlinks'][j]) - j=j+1 - except: - print('sorry,couldn\'t find any links') + print('\n\nTitle and url for this Wikipedia page: \n') + prop_inprop = "&prop=info&inprop=url|displaytitle" + url = BASE_URL + ACTION + TITLES + prop_inprop + FORMAT + # open url, read content (bytes), convert in string via decode() + result = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) -#def interwiki_links(): - # print('inter wiki links found for this search',end="\n") - # base_url - # action - # titles - # prop = "&prop=iwlinks" - # url = base_url + action + titles + prop - # print(url) - # result = urllib2.urlopen(url) - # for i in result: - # print(i) + # In python 3 dict_keys are not indexable, so we need to use list() + key = list(result['query']['pages'].keys())[0][:] + print('\t'+result['query']['pages'][key]['title']) + print('\t'+result['query']['pages'][key]['fullurl']) + print('\n\t-------------------\t') -def wiki_search(): - global base_url - global action - global titles - global Format - prop = "&prop=extracts" - plaintext = "&explaintext" - section_format = "&exsectionformat=plain" +def interesting_links(): + + """Fonction displaying related links => Interest on the CLI ?""" + + print('\nYou may also be interested in the following links: \n') + + prop = "&prop=extlinks" + try: - url = base_url + action + titles + prop + plaintext + section_format + Format - result = json.load(urllib2.urlopen(url)) - key = result['query']['pages'].keys() - key = key[0][0:] - print(result['query']['pages'][str(key)]['extract'],end="\n") - except: - print('oops!,no wikipedia page for that title.Wikipedia search titles are case Sensitive...') - + url = BASE_URL + ACTION + TITLES + prop + FORMAT + + # open url, read content (bytes), convert in string via decode() + result = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) + + key = list(result['query']['pages'].keys())[0][0:] + + offset = result['query-continue']['extlinks']['eloffset'] + for j in range(0, offset): + + # ['*'] => elements of ....[j] are dict, and their keys are '*' + print('\t'+result['query']['pages'][key]['extlinks'][j]['*']) + + except KeyError: + print("Sorry, we couldn't find any links.") def images(): - print('\nall images related to this search',end="\n") + """ Get images urls """ + + print('\nAll images related to this search : \n') image_url = "http://en.wikipedia.org/wiki/" - global base_url - global Format - global action - global titles + prop = "&prop=images" - url = base_url + action + titles + prop + Format - result = json.load(urllib2.urlopen(url)) - key = result['query']['pages'].keys() - key = key[0][0:] + + url = BASE_URL + ACTION + TITLES + prop + FORMAT + + # open url, read content (bytes), convert in string via decode() + result = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) + + key = list(result['query']['pages'].keys())[0][0:] + try: - i = 1 - while(i): - Image = str(result['query']['pages'][str(key)]['images'][i]['title']) - image = image_url + Image.replace(' ','_') - print(image) - i=i+1 - except: - print('\t------------------\t',end="\n") - pass - + for i in range(1, len(result['query']['pages'][key]['images'])): + + image = result['query']['pages'][key]['images'][i]['title'] + image = image_url + image.replace(' ','_') + print('\t'+image) + print('\n\t------------------\t') + except KeyError: + print('\n\t------------------\t') def featured_feed(): - global base_url - Format = "&format=json" - action = "&action=featuredfeed" - try: - feed = "&feed=" + str(sys.argv[1]) - url = base_url + action + feed + Format - print(url) - result = urllib2.urlopen(url).read() - res1 = re.compile('(.*)') - res2 = re.compile('(.*)en') - Result1 = re.findall(res1,result) - Result2 = re.findall(res2,result) - for i in enumerate(zip(Result1,Result2)): - print(i) - except: - print('error!') + """Featured Feed""" + + ACTION = "&action=featuredfeed" + + feed = "&feed=" + sys.argv[1] + url = BASE_URL + ACTION + feed + FORMAT + + print(url) + + result = urllib.request.urlopen(url).read().decode('utf-8') - - + res1 = re.compile('(.*)') + res2 = re.compile('(.*)en') + result1 = re.findall(res1, result) + result2 = re.findall(res2, result) + for i in enumerate(zip(result1, result2)): + print(i) -if len(sys.argv) < 2: - get_title() - wiki_search() - url_and_displaytitle() - images() - #interwiki_links() - interesting_links() -else: - featured_feed() +#def interwiki_links(): + # print('inter wiki links found for this search',end="\n") + # BASE_URL + # ACTION + # TITLES + # prop = "&prop=iwlinks" + # url = BASE_URL + ACTION + TITLES + prop + # print(url) + # result = urllib2.urlopen(url) + # for i in result: + # print(i) +def main(): + """ Main function """ + if len(sys.argv) < 2: + get_title() + wiki_search() + url_and_displaytitle() + images() + #interwiki_links() + interesting_links() + else: + featured_feed() +if __name__ == "__main__": + main() From 2d4e9e12b666a5f8f257f87e960d92ee3132f0f2 Mon Sep 17 00:00:00 2001 From: Arnaud V Date: Sat, 14 Dec 2013 12:55:59 +0100 Subject: [PATCH 4/4] Added separate aguments, corrected some display problem, removed the prompt (search now passed as argument) --- README.md | 20 ++++++++- cliwiki.py | 127 +++++++++++++++++++++++++++++++++-------------------- 2 files changed, 99 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index d05b9cb..266d4d9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,24 @@ Wikipedia CLI ========= -Command line interface to access wikipedia pages. +Command line interface to access wikipedia pages. This version is a fork of [AnirudhBhat](https://github.com/AnirudhBhat)'s and now run on python 3. + + +## Usage +~~~bash +$ ./cliwiki.py -h +usage: cliwiki.py [-h] [-d | -f | -p] search + +Access Wikipedia from Command Line + +positional arguments: + search Page to search for on Wikipedia + +optional arguments: + -h, --help show this help message and exit + -d, --today Display URLs for the "On this day" pages + -f, --featured Display the featured articles URLs + -p, --picture Display URLs for the "Picture of the day" pages +~~~ \ No newline at end of file diff --git a/cliwiki.py b/cliwiki.py index d1303dd..f67645b 100755 --- a/cliwiki.py +++ b/cliwiki.py @@ -4,9 +4,8 @@ import json import urllib.request -import sys import re -# import argparse +import argparse # **** Global Variables **** @@ -19,15 +18,6 @@ # **** Functions **** -def get_title(): - """ Ask the user for a title and store the input """ - - title = input('Enter the title you want to search --> \n') - global TITLES - TITLES += title.replace(' ','_') - - - def wiki_search(): """ Search function """ @@ -105,13 +95,12 @@ def interesting_links(): def images(): """ Get images urls """ - print('\nAll images related to this search : \n') image_url = "http://en.wikipedia.org/wiki/" - prop = "&prop=images" - url = BASE_URL + ACTION + TITLES + prop + FORMAT + print('\nAll images related to this search : \n') + # open url, read content (bytes), convert in string via decode() result = json.loads(urllib.request.urlopen(url).read().decode('utf-8')) @@ -121,7 +110,7 @@ def images(): for i in range(1, len(result['query']['pages'][key]['images'])): image = result['query']['pages'][key]['images'][i]['title'] - image = image_url + image.replace(' ','_') + image = image_url + image.replace(' ', '_') print('\t'+image) print('\n\t------------------\t') @@ -132,54 +121,98 @@ def images(): -def featured_feed(): +def featured_feed(feed): """Featured Feed""" - ACTION = "&action=featuredfeed" + url = BASE_URL + "&action=featuredfeed" + "&feed=" + feed + FORMAT + + result = urllib.request.urlopen(url).read().decode('utf-8') + + re_title = re.compile('(.*)') + re_links = re.compile('(.*)en') + + result1 = re.findall(re_title, result) + result2 = re.findall(re_links, result) + + print('\n') + + for desc, url in zip(result1, result2): + print(desc + ':\t ' + url) - feed = "&feed=" + sys.argv[1] - url = BASE_URL + ACTION + feed + FORMAT - print(url) - result = urllib.request.urlopen(url).read().decode('utf-8') - res1 = re.compile('(.*)') - res2 = re.compile('(.*)en') +def interwiki_links(): + """ Inter wiki links """ - result1 = re.findall(res1, result) - result2 = re.findall(res2, result) + print('Inter wiki links found for this search: ') - for i in enumerate(zip(result1, result2)): - print(i) + url = BASE_URL + ACTION + TITLES + "&prop=iwlinks"+ FORMAT + print(url) + + # TODO: parse the json, match it with a dict containing + # url to append depending on the key returned in the url, + # and then only show the resulting urls + # result = urllib.request.urlopen(url).read().decode('utf-8') -#def interwiki_links(): - # print('inter wiki links found for this search',end="\n") - # BASE_URL - # ACTION - # TITLES - # prop = "&prop=iwlinks" - # url = BASE_URL + ACTION + TITLES + prop - # print(url) - # result = urllib2.urlopen(url) - # for i in result: - # print(i) + # for i in reslut: + # print(i) def main(): """ Main function """ - if len(sys.argv) < 2: - get_title() - wiki_search() - url_and_displaytitle() - images() - #interwiki_links() - interesting_links() - else: - featured_feed() + + # Gestion des paramètres + parser = argparse.ArgumentParser(description = + "Access Wikipedia from Command Line") + + parser.add_argument('search', help = "Page to search for on Wikipedia") + + group = parser.add_mutually_exclusive_group() + + group.add_argument('-d', '--today', + action = 'store_const', + const = 'onthisday', + help='Display URLs for the "On this day" pages') + + group.add_argument('-f', '--featured', + action = 'store_const', + const = 'featured', + help = 'Display the featured articles URLs') + + group.add_argument('-p', '--picture', + action = 'store_const', + const = 'potd', + help='Display URLs for the "Picture of the day" pages') + + args = parser.parse_args() + + try: + if args.search : + + global TITLES + TITLES += args.search.replace(' ','_') + + wiki_search() + url_and_displaytitle() + images() + interesting_links() + # interwiki_links() + + elif args.featured: + featured_feed(args.featured) + + elif args.picture: + featured_feed(args.picture) + + elif args.today: + featured_feed(args.today) + + except KeyboardInterrupt: + print('\n\n Program interrupted')