|
| 1 | +import argparse |
| 2 | + |
| 3 | +import urllib3 |
| 4 | + |
| 5 | +# Here is the command I use to verify all libraries installed with pip. |
| 6 | +# python license_checker_v2.py --dependencies $(cut -d '=' -f 1 <<< $(pip freeze)) |
| 7 | +# Alternatively, if you are processing directly a requirements.txt file, you can use the following command to parse |
| 8 | +# awk -F'[>=<]' '{print $1}' requirements.txt |
| 9 | + |
| 10 | +parser = argparse.ArgumentParser() |
| 11 | +parser.add_argument('--dependencies', nargs='+', required=True, |
| 12 | + help="A list of python library name you want to check the license of.") |
| 13 | +parser.add_argument('--accepted_licenses', nargs='*', |
| 14 | + help="A list of license which are considered acceptable for your project.", |
| 15 | + default=["Apache Software License", "Apache 2.0", "BSD", "ZLIB", "MIT", "Unlicense", "CC0", "CC-BY","PSF", "MPL", "Mozilla Public License 2.0"]) |
| 16 | +parser.add_argument('--forbidden_licenses', nargs='*', |
| 17 | + help="A list of license which are considered problematic for your project.", |
| 18 | + default=["GNU", "GPL", "Commons Clause", "BY-N"]) |
| 19 | +args = parser.parse_args() |
| 20 | + |
| 21 | +python_dependencies = args.dependencies |
| 22 | +pypi_pages = {} |
| 23 | + |
| 24 | +for library_name in python_dependencies: |
| 25 | + url = f"https://pypi.org/project/{library_name}/" |
| 26 | + http_pool = urllib3.connection_from_url(url) |
| 27 | + result = http_pool.urlopen('GET', url) |
| 28 | + html_page = result.data.decode('utf-8') |
| 29 | + pypi_pages[library_name] = html_page |
| 30 | + |
| 31 | + |
| 32 | +def parse_html(html_page): |
| 33 | + lines = html_page.split('\n') |
| 34 | + |
| 35 | + for i, l in enumerate(lines): |
| 36 | + if ("<a href=" in l) and ("License:" in l): |
| 37 | + return lines[i + 1].split('::')[1].strip('\n') |
| 38 | + |
| 39 | + for i, l in enumerate(lines): |
| 40 | + if "<strong>License:</strong> " in l: |
| 41 | + return lines[i].replace("<p><strong>License:</strong> ", "").replace('</p>', '') |
| 42 | + |
| 43 | + raise ValueError("Unable to find license in html page") |
| 44 | + |
| 45 | + |
| 46 | +unknown_licenses = [] |
| 47 | +library_license_dict = {} |
| 48 | +accepted_libraries = [] |
| 49 | +refused_libraries = [] |
| 50 | +maybe_libraries = [] |
| 51 | + |
| 52 | + |
| 53 | +def is_license_in_list(license, license_list): |
| 54 | + for l in license_list: |
| 55 | + if l.lower() in license.lower(): |
| 56 | + return True |
| 57 | + return False |
| 58 | + |
| 59 | + |
| 60 | +for library_name in python_dependencies: |
| 61 | + try: |
| 62 | + library_license = parse_html(pypi_pages[library_name]) |
| 63 | + library_license_dict[library_name] = library_license |
| 64 | + print(f"{library_name}: {library_license}") |
| 65 | + # First checks if its refused_licenses, then if its in accepted_licenses, else add in the maybe list |
| 66 | + # TODO : Should use regex instead? |
| 67 | + |
| 68 | + if is_license_in_list(library_license, args.forbidden_licenses): |
| 69 | + refused_libraries.append(library_name) |
| 70 | + elif is_license_in_list(library_license, args.accepted_licenses): |
| 71 | + accepted_libraries.append(library_name) |
| 72 | + else: |
| 73 | + maybe_libraries.append(library_name) |
| 74 | + |
| 75 | + except Exception as e: |
| 76 | + print(f"{library_name}: {e}") |
| 77 | + unknown_licenses.append(library_name) |
| 78 | + |
| 79 | + |
| 80 | +def plurial(lst, _if='s', _else=''): |
| 81 | + return _if if len(lst) > 1 else _else |
| 82 | + |
| 83 | +if len(unknown_licenses) > 0: |
| 84 | + print(f"Couldn't find the license{plurial(unknown_licenses)} of the following dependencies: {unknown_licenses}") |
| 85 | + |
| 86 | +print(f"\nThe following dependenc{plurial(accepted_libraries, 'y', 'ies')} have an accepted license: {accepted_libraries}") |
| 87 | + |
| 88 | +if len(refused_libraries) > 0: |
| 89 | + print(f"The following dependencie{plurial(refused_libraries, 'y', 'ies')} have forbidden license(s):") |
| 90 | + for library_name in refused_libraries: |
| 91 | + print(f" {library_name}: {library_license_dict[library_name]}") |
| 92 | + |
| 93 | +if len(maybe_libraries) > 0: |
| 94 | + print(f"The following dependencie{plurial(maybe_libraries, 'y', 'ies')} have license which needs to be reviewed: ") |
| 95 | + for library_name in maybe_libraries: |
| 96 | + print(f" {library_name}: {library_license_dict[library_name]}") |
| 97 | + |
| 98 | + |
| 99 | +assert len(refused_libraries) == 0 and len(maybe_libraries) == 0 and len(unknown_licenses) == 0 |
0 commit comments