From 95ceccb87a76720452fc6626a94ff9820fe7a2c3 Mon Sep 17 00:00:00 2001 From: TtheBC01 Date: Tue, 3 Aug 2021 05:06:37 +0000 Subject: [PATCH 1/5] fixed cli functions --- galileo_sdk/galileo_cli/jobs.py | 2 +- galileo_sdk/galileo_cli/lzs.py | 6 +++--- galileo_sdk/galileo_cli/missions.py | 8 ++++---- galileo_sdk/galileo_cli/profiles.py | 12 ++---------- galileo_sdk/galileo_cli/stations.py | 2 +- 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/galileo_sdk/galileo_cli/jobs.py b/galileo_sdk/galileo_cli/jobs.py index da75fdd..1c24a0e 100644 --- a/galileo_sdk/galileo_cli/jobs.py +++ b/galileo_sdk/galileo_cli/jobs.py @@ -122,7 +122,7 @@ def ls(index, id, receiver, sid, user_ids, status, page, items, head, spinner = Halo("Retrieving information", spinner="dots").start() #Testing purpose - my_id = galileo.profiles.self().userid + my_id = galileo.profiles.self().user_id user_ids = list(user_ids) + [my_id] receiver_ids = list(receiver) + galileo.lz.list_lz(user_ids=[my_id]) spinner.stop() diff --git a/galileo_sdk/galileo_cli/lzs.py b/galileo_sdk/galileo_cli/lzs.py index 5091785..a6de4a0 100644 --- a/galileo_sdk/galileo_cli/lzs.py +++ b/galileo_sdk/galileo_cli/lzs.py @@ -38,13 +38,13 @@ def ls(lz_ids, userid, page=0, items=10, everything=False): spinner.start() self = galileo.profiles.self() - userids = [] + user_ids = [] if not everything: - userids.append(self.userid) + user_ids.append(self.user_id) lzs = galileo.lz.list_lz( lz_ids=list(lz_ids), - user_ids=list(userids), + user_ids=list(user_ids), page=page, items=items, ) diff --git a/galileo_sdk/galileo_cli/missions.py b/galileo_sdk/galileo_cli/missions.py index 50cf194..9402d52 100644 --- a/galileo_sdk/galileo_cli/missions.py +++ b/galileo_sdk/galileo_cli/missions.py @@ -37,7 +37,7 @@ def missions(): ) @click.option( "-u", - "--userid", + "--user_id", type=str, multiple=True, help="Filter by userids, can provide multiple options.", @@ -52,7 +52,7 @@ def missions(): '--head', type=int, help="Number of Missions to display.") - def ls(index, id, short, name, userid, page, items, head): + def ls(index, id, short, name, user_id, page, items, head): """ List the Missions in your Galileo profile. """ @@ -60,11 +60,11 @@ def ls(index, id, short, name, userid, page, items, head): self = galileo.profiles.self() spinner.stop() spinner = Halo("Retrieving your Mission", spinner="dot").start() - userid += (self.userid, ) + user_id += (self.user_id, ) missions = galileo.missions.list_missions( mission_ids=list(id), names=list(name), - user_ids=list(userid), + user_ids=list(user_id), page=page, items=items, ) diff --git a/galileo_sdk/galileo_cli/profiles.py b/galileo_sdk/galileo_cli/profiles.py index ed91a1d..33a848d 100644 --- a/galileo_sdk/galileo_cli/profiles.py +++ b/galileo_sdk/galileo_cli/profiles.py @@ -43,13 +43,6 @@ def self(): multiple=True, help="Filter by partial usernames, can provide multiple options.", ) - @click.option( - "-k", - "--publickey", - type=str, - multiple=True, - help="Filter by public key, can provide multiple options.", - ) @click.option("--page", type=int, help="Filter by page number.") @click.option( "--items", @@ -57,7 +50,7 @@ def self(): help="Filter by number of items in the page.", ) @click.option('-n', '--head', type=int, help="Number of items to display.") - def ls(index, id, username, partialname, publickey, page, items, head): + def ls(index, id, username, partialname, page, items, head): """ List of all the profiles. """ @@ -66,7 +59,6 @@ def ls(index, id, username, partialname, publickey, page, items, head): user_ids=list(id), usernames=list(username), partial_usernames=list(partialname), - public_keys=list(publickey), page=page, items=items, ) @@ -83,7 +75,7 @@ def ls(index, id, username, partialname, publickey, page, items, head): users_list = [user.__dict__ for user in users_list] users_df = pandas.json_normalize(users_list) - users_df = users_df[["username", "userid", "lz_ids"]] + users_df = users_df[["username", "user_id", "lz_ids"]] spinner.stop() if head: diff --git a/galileo_sdk/galileo_cli/stations.py b/galileo_sdk/galileo_cli/stations.py index 515b0f0..4bc895f 100644 --- a/galileo_sdk/galileo_cli/stations.py +++ b/galileo_sdk/galileo_cli/stations.py @@ -93,7 +93,7 @@ def ls(index, id, name, mid, user_role, volume, description, page, items): stations_list = [station.__dict__ for station in stations_list] stations_df = pandas.json_normalize(stations_list) stations_df = stations_df[[ - "stationid", "name", "description", "users", "lz_ids", "volumes" + "station_id", "name", "description", "users", "lz_ids", "volumes" ]] spinner.stop() click.echo(stations_df) From 17a34e20de7c1b0289b8b255819e46dff1fcc790 Mon Sep 17 00:00:00 2001 From: Conrad Bailey Date: Fri, 30 Jul 2021 20:29:11 -0700 Subject: [PATCH 2/5] Updated configuration settings for LZs on slurm systems --- docs/src/landing_zone_slurm.rst | 75 ++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/docs/src/landing_zone_slurm.rst b/docs/src/landing_zone_slurm.rst index 810172b..3595bd1 100644 --- a/docs/src/landing_zone_slurm.rst +++ b/docs/src/landing_zone_slurm.rst @@ -28,10 +28,11 @@ very difficult for a containerized Landing Zone to communicate with the Slurm controller. For this reason we suggest admins use the binary linked above on Slurm systems. -It is also assumed by the LZ that ``/tmp`` belongs to a distributed -file system that is readable and writable by every node in the -cluster. You can override this location by setting the ``$TMPDIR``, -``$TEMP``, or ``$TMP`` environment variables. +It is also assumed by the LZ that the work directory (`-w`, +`--work-dir`) belongs to a distributed file system that is readable +and writable by every node in the cluster. You can influence the +default location ( ``/tmp``) by setting the ``$TMPDIR``, ``$TEMP``, or +``$TMP`` environment variables. Admins should read through the `Slurm Configuration`_ documentation to make sure memory, CPUs, and GPUs are allocated to jobs in mutually @@ -95,23 +96,18 @@ about these cgroups: after updating the file. Next you must tell Slurm to use cgroups for its task management -services. In slurm.conf make sure the task plugin is -``TaskPlugin=task/cgroup`` and that the select type parameters +services. In slurm.conf make sure ``TaskPlugin`` is ``task/cgroup`` or +``task/affinity,task/cgroup``, and that the select type parameters includes memory, e.g. ``SelectTypeParameters=CR_Core_Memory``. Any of ``CR_Core_Memory``, ``CR_CPU_Memory``, or ``CR_Socket_Memory`` will work; consult the slurm.conf `man page `_ to make a decision. Bear -in mind the *CPU* settings in Galileo get translated to ``srun ... -n -`` on the cluster. ``CR_Memory`` is discouraged since Galileo -does attempt to set constraints on CPU/core usage. +in mind the *CPU* settings in Galileo may get translated to ``srun +... -n `` on the cluster. ``CR_Memory`` is discouraged since +Galileo does attempt to set constraints on CPU/core usage. -Finally make sure that the cgroups are configured to enforce this -constraint in cgroup.conf. - -.. code-block:: bash - - ConstrainRAMSpace=yes - ConstrainSwapSpace=yes +Also check our `cgroups.conf Recommendations`_ for options affecting +memory allotment and enforcement. CPUs/Cores ~~~~~~~~~~ @@ -128,14 +124,14 @@ Non-distributed jobs translate the value into these flags .. code-block:: bash - --ntasks 1 --cpus-per-task + --ntasks 1 --cpus-per-task --mem Distributed jobs assume a one CPU default for tasks and translate the value into these flags .. code-block:: bash - --ntasks + --ntasks --cpus-per-task 1 --mem-per-cpu How those flags affect actual hardware usage depends on your Slurm configuration. In particular, in *slurm.conf*, the value of @@ -146,12 +142,8 @@ refer to the `slurm.conf man page `_ to investigate the hardware implications of each of these. -As noted in our `Memory`_ section, we -recommend ``task/cgroup`` for the value of ``TaskPlugin``. If that is -the case, then we need to ensure the cgroup enforces the core -constraints in cgroup.conf with ``ConstrainCores=yes``. It is also -advisable to set ``TaskAffinity=yes`` in cgroup.conf to ensure tasks -are bound to their allocated cores. +Also check our `cgroups.conf Recommendations`_ for options affecting +core allotment and affinity. .. _slurm_gpus: @@ -162,11 +154,36 @@ Galileo's support for GPU management in Slurm clusters hinges on the ``select/cons_tres`` plugin introduced in Slurm version 19.05. Admins should consult the `Slurm documentation `_ for correctly configuring this -feature. To ensure that jobs receive exclusive access to the GPUs -alloted to them, and only those GPUs, ``TaskPlugin`` should be set to -``task/cgroup`` in slurm.conf and ``ConstrainDevices=yes`` should be -included in cgroup.conf. See the `Memory`_ and `CPUs/Cores`_ sections -for more considerations regarding ``task/cgroup``. +feature. + +Also check our `cgroups.conf Recommendations`_ for options affecting +GPU allotment. + +cgroups.conf Recommendations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Setting ``TaskPlugin`` to ``task/cgroup`` enables a number of +cgroup.conf constraints should be considered: + +* ``ConstrainCores``: + Ensures jobs get exclusive access to the cores allotted to them. + +* ``ConstrainRAMSpace``, ``ConstrainKmemSpace``, and ``ConstrainSwapSpace``: + Ensures jobs will be preempted if they exceed their allotted + memory. Note that ``OverMemoryKill`` in *slurm.conf* is similar, but + operates on a per-process basis instead of a per-job basis. That + would be inappropriate because a job could use twice its allotted + memory by utilizing two processes. + +* ``ConstrainDevices``: + Ensures jobs get exclusive access to the GPUs allotted to them. + +* ``TaskAffinity=no``, ``ConstrainCores=yes``, and ``TaskPlugin=task/affinity,task/cgroup``: + Officially recommended by slurm for setting default task affinity as + well as exclusive core allotment. + +Be sure to check the `official documentation +`_ for other +considerations. How to Run the Landing Zone Daemon ---------------------------------- From 42a55cc0e8e68d72c079d6b1b9578c99353bd47a Mon Sep 17 00:00:00 2001 From: Conrad Bailey Date: Thu, 5 Aug 2021 07:29:46 +0000 Subject: [PATCH 3/5] Added documentation for the LZ's --stations flag allow autojoining to stations at LZ startup --- docs/src/landing_zone_main.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/src/landing_zone_main.rst b/docs/src/landing_zone_main.rst index 334b956..257551b 100644 --- a/docs/src/landing_zone_main.rst +++ b/docs/src/landing_zone_main.rst @@ -113,6 +113,14 @@ Command Line Flags location, then it will be utilized and the LZ will authenticate automatically without prompting the user. +``--stations`` ```` + At startup the Landing Zone will attempt to add itself to all of the + stations represented by the station ids provided to this flag. If an + attempt fails for any reason, then the Landing Zone will remain in + any stations it was successfully added to before the failure, it + will not attempt to add itself to any subsequent station ids in the + list, and the LZ will exit. + .. _landing_zone_main-tunnnel-overrides: ``--cloudflare-cert `` and ``--tunnel-hostname `` From a0e703167a29a359d96b3de7e080807cb78d0298 Mon Sep 17 00:00:00 2001 From: TtheBC01 Date: Tue, 10 Aug 2021 03:20:14 +0000 Subject: [PATCH 4/5] added notification script to setyp.py --- galileo_sdk/galileo_sdk.py | 7 +++++++ setup.py | 35 ++++++++++++++++------------------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/galileo_sdk/galileo_sdk.py b/galileo_sdk/galileo_sdk.py index d9a72e1..4de8440 100644 --- a/galileo_sdk/galileo_sdk.py +++ b/galileo_sdk/galileo_sdk.py @@ -45,6 +45,13 @@ NAMESPACE = "/galileo/user_interface/v1" +def notify(): + if len(sys.argv) > 1: + GalileoSdk.send_notification(GalileoSdk, sys.argv[1]) + else: + print("No message given for notification.") + + class GalileoSdk: def __init__( self, diff --git a/setup.py b/setup.py index 192a90c..c2582c4 100644 --- a/setup.py +++ b/setup.py @@ -13,20 +13,10 @@ if is_py3: install_requires = [ - "requests>=2.21.0", - "python-socketio[client]==4.3.1", - "python-engineio==3.9.0", - "chardet", - "mock", - "pathlib", - "termcolor==1.1.0", - "colorama==0.4.3", - "pyfiglet", - "click==7.1.2", - "click-shell", - "pandas", - "halo", - "curses-menu" + "requests>=2.21.0", "python-socketio[client]==4.3.1", + "python-engineio==3.9.0", "chardet", "mock", "pathlib", + "termcolor==1.1.0", "colorama==0.4.3", "pyfiglet", "click==7.1.2", + "click-shell", "pandas", "halo", "curses-menu" ] else: install_requires = [ @@ -46,8 +36,7 @@ def run(self): if tag != VERSION: info = "Git tag: {0} does not match the version of this app: {1}".format( - tag, VERSION - ) + tag, VERSION) sys.exit(info) @@ -57,7 +46,8 @@ def run(self): license="MIT", author="Hypernet Labs", author_email="galileo@hypernetlabs.io", - long_description="Galileo is a hub for modeling, simulations, and data analysis that functions as a quick and " + long_description= + "Galileo is a hub for modeling, simulations, and data analysis that functions as a quick and " "easy portal to cloud resources. The application streamlines computing infrastructure, " "saving engineers and researchers weeks of cloud setup time. Team and station features allow " "teams to collaborate efficiently by sharing projects and results, flexibly controlling " @@ -67,7 +57,12 @@ def run(self): "jobs, accepting jobs, and accepting members.", url="https://hypernetlabs.io/galileo/", packages=["galileo_sdk"], - entry_points={"console_scripts": ["galileo-cli = galileo_sdk.galileo_cli.cli:main",]}, + entry_points={ + "console_scripts": [ + "galileo-cli = galileo_sdk.galileo_cli.cli:main", + "galileo-notify = galileo_sdk.galileo_sdk:notify" + ] + }, package_data={ "galileo_sdk": [ "sdk/**", @@ -85,5 +80,7 @@ def run(self): install_requires=install_requires, extras_require={"docs": ["sphinx>=2.2.0", "sphinx-material"]}, tests_require=["pytest-runner", "pytest"], - cmdclass={"verify": VerifyVersionCommand,}, + cmdclass={ + "verify": VerifyVersionCommand, + }, ) From 48faca04b99109a7ce417a4209d3ae0f162ac73a Mon Sep 17 00:00:00 2001 From: TtheBC01 Date: Tue, 10 Aug 2021 03:58:34 +0000 Subject: [PATCH 5/5] bumped minor version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c2582c4..ff78114 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup from setuptools.command.install import install -VERSION = "1.0.0" +VERSION = "1.0.1" _ver = sys.version_info