Skip to content

Commit c5d906f

Browse files
committed
stop calling grafana directly from python lib, use api in gateway instead
1 parent 8046a0a commit c5d906f

1 file changed

Lines changed: 23 additions & 273 deletions

File tree

dotscience/__init__.py

Lines changed: 23 additions & 273 deletions
Original file line numberDiff line numberDiff line change
@@ -257,13 +257,13 @@ def __init__(self):
257257
self._root = os.getenv('DOTSCIENCE_PROJECT_DOT_ROOT', default=os.getcwd())
258258
self._dotmesh_client = None
259259
self._hostname = None
260-
self._grafana_hostname = None
261-
self._grafana_auth = None
262260
self._auth = None
263261
self._cached_project = None
264262
self._project_name = None
263+
self._deployment = None
264+
self._deployer = None
265265

266-
def connect(self, username, apikey, project, hostname, g_hostname, g_username, g_apikey):
266+
def connect(self, username, apikey, project, hostname):
267267
# TODO: Make this fail if we're in a mode other than 'remote' mode.
268268
# TODO: make publish etc fail if we're not connected in remote mode.
269269
if not project:
@@ -275,9 +275,6 @@ def connect(self, username, apikey, project, hostname, g_hostname, g_username, g
275275
)
276276
self._hostname = hostname
277277

278-
self._grafana_hostname = g_hostname
279-
self._grafana_auth = (g_username, g_apikey)
280-
281278
self._auth = (username, apikey)
282279
self._project_name = project
283280
print("Checking connection... ", end="")
@@ -410,12 +407,12 @@ def _publish_remote_run(self, build, deploy):
410407
ret["dashboard"] = dashboard
411408
print("done")
412409
print(" -> Dashboard: %s\n" % (dashboard,))
413-
410+
414411
print("Waiting for model endpoint to become active", end="")
415412
sys.stdout.flush()
416413
self._wait_active()
417414
print(" done")
418-
415+
419416
print("=== Dotscience publish complete ===\n")
420417
return ret
421418

@@ -665,6 +662,8 @@ def _build_docker_image_on_hub(self):
665662
print(".", end="")
666663
sys.stdout.flush()
667664
time.sleep(1.0)
665+
if build is not None:
666+
print("BUILD:", build)
668667
if build is not None and build["status"] == "failed":
669668
raise Exception("Build failed: %s", build)
670669
if attempt == 60:
@@ -691,6 +690,7 @@ def _deploy_to_kube(self):
691690
if len(managed) == 0:
692691
raise Exception("Can't deploy - no managed deployers found")
693692
deployer = managed[0]
693+
self._deployer = deployer
694694
body = {
695695
# TODO fill this in
696696
"name": self._project_name.replace('-', ''),
@@ -725,6 +725,8 @@ def _deploy_to_kube(self):
725725
return "https://"+deployment.json()["host"]+"/v1/models/model:predict"
726726

727727
def _wait_active(self):
728+
if self._deployment is None:
729+
raise Exception("tried to wait for model to become active when no self._deployment was set")
728730
attempt = 0
729731
the_exc = None
730732
while attempt < 120:
@@ -749,264 +751,22 @@ def _wait_active(self):
749751
else:
750752
raise Exception("Unable to load error")
751753

752-
753754
def _setup_grafana(self):
755+
if self._deployment is None:
756+
raise Exception("tried to set up dashboard when no self._deployment was set")
757+
if self._deployer is None:
758+
raise Exception("tried to set up dashboard when no self._deployer was set")
759+
deployer_id = self._deployer["id"]
754760
deployment_id = self._deployment["id"]
755-
756-
dashboard = {
757-
"annotations": {
758-
"list": [
759-
{
760-
"builtIn": 1,
761-
"datasource": "-- Grafana --",
762-
"enable": True,
763-
"hide": True,
764-
"iconColor": "rgba(0, 211, 255, 1)",
765-
"name": "Annotations & Alerts",
766-
"type": "dashboard"
767-
}
768-
]
769-
},
770-
"editable": True,
771-
"gnetId": None,
772-
"graphTooltip": 0,
773-
# Create new dashboard (https://grafana.com/docs/http_api/dashboard/)
774-
"id": None,
775-
"links": [],
776-
"panels": [
777-
{
778-
"aliasColors": {},
779-
"bars": False,
780-
"dashLength": 10,
781-
"dashes": False,
782-
"fill": 1,
783-
"gridPos": {
784-
"h": 9,
785-
"w": 24,
786-
"x": 0,
787-
"y": 0
788-
},
789-
"id": 2,
790-
"legend": {
791-
"avg": False,
792-
"current": False,
793-
"max": False,
794-
"min": False,
795-
"show": True,
796-
"total": False,
797-
"values": False
798-
},
799-
"lines": True,
800-
"linewidth": 1,
801-
"links": [],
802-
"nullPointMode": "None",
803-
"paceLength": 10,
804-
"percentage": False,
805-
"pointradius": 2,
806-
"points": False,
807-
"renderer": "flot",
808-
"seriesOverrides": [],
809-
"stack": False,
810-
"steppedLine": False,
811-
"targets": [
812-
{
813-
"expr": f"sum(rate(model_predictions{{deployment_id=\"{deployment_id}\"}}[1m])) by (class)",
814-
"format": "time_series",
815-
"intervalFactor": 1,
816-
"legendFormat": "{{class}}",
817-
"refId": "A"
818-
}
819-
],
820-
"thresholds": [],
821-
"timeFrom": None,
822-
"timeRegions": [],
823-
"timeShift": None,
824-
"title": "Prediction rate (requests per minute)",
825-
"tooltip": {
826-
"shared": True,
827-
"sort": 2,
828-
"value_type": "individual"
829-
},
830-
"type": "graph",
831-
"xaxis": {
832-
"buckets": None,
833-
"mode": "time",
834-
"name": None,
835-
"show": True,
836-
"values": []
837-
},
838-
"yaxes": [
839-
{
840-
"format": "short",
841-
"label": None,
842-
"logBase": 1,
843-
"max": None,
844-
"min": None,
845-
"show": True
846-
},
847-
{
848-
"format": "short",
849-
"label": None,
850-
"logBase": 1,
851-
"max": None,
852-
"min": None,
853-
"show": True
854-
}
855-
],
856-
"yaxis": {
857-
"align": False,
858-
"alignLevel": None
859-
}
860-
},
861-
{
862-
"aliasColors": {},
863-
"bars": False,
864-
"dashLength": 10,
865-
"dashes": False,
866-
"fill": 1,
867-
"gridPos": {
868-
"h": 8,
869-
"w": 24,
870-
"x": 0,
871-
"y": 9
872-
},
873-
"id": 4,
874-
"legend": {
875-
"avg": False,
876-
"current": False,
877-
"max": False,
878-
"min": False,
879-
"show": True,
880-
"total": False,
881-
"values": False
882-
},
883-
"lines": True,
884-
"linewidth": 1,
885-
"links": [],
886-
"nullPointMode": "None",
887-
"paceLength": 10,
888-
"percentage": False,
889-
"pointradius": 2,
890-
"points": False,
891-
"renderer": "flot",
892-
"seriesOverrides": [],
893-
"stack": False,
894-
"steppedLine": False,
895-
"targets": [
896-
{
897-
"expr": f"histogram_quantile(0.95, sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\"{deployment_id}\"}}[1m])) by (le)) * 1e3",
898-
"format": "time_series",
899-
"intervalFactor": 1,
900-
"legendFormat": "95th percentile",
901-
"refId": "A"
902-
},
903-
{
904-
"expr": f"histogram_quantile(0.5, sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\"{deployment_id}\"}}[1m])) by(le)) * 1e3",
905-
"format": "time_series",
906-
"intervalFactor": 1,
907-
"legendFormat": "median",
908-
"refId": "B"
909-
},
910-
{
911-
"expr": f"sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\"{deployment_id}\"}}[1m])) / sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\"{deployment_id}\"}}[5m])) * 1e3",
912-
"format": "time_series",
913-
"intervalFactor": 1,
914-
"legendFormat": "mean",
915-
"refId": "C"
916-
}
917-
],
918-
"thresholds": [],
919-
"timeFrom": None,
920-
"timeRegions": [],
921-
"timeShift": None,
922-
"title": "Latencies",
923-
"tooltip": {
924-
"shared": True,
925-
"sort": 0,
926-
"value_type": "individual"
927-
},
928-
"type": "graph",
929-
"xaxis": {
930-
"buckets": None,
931-
"mode": "time",
932-
"name": None,
933-
"show": True,
934-
"values": []
935-
},
936-
"yaxes": [
937-
{
938-
"format": "short",
939-
"label": None,
940-
"logBase": 1,
941-
"max": None,
942-
"min": None,
943-
"show": True
944-
},
945-
{
946-
"format": "short",
947-
"label": None,
948-
"logBase": 1,
949-
"max": None,
950-
"min": None,
951-
"show": True
952-
}
953-
],
954-
"yaxis": {
955-
"align": False,
956-
"alignLevel": None
957-
}
958-
}
959-
],
960-
"refresh": "5s",
961-
"schemaVersion": 18,
962-
"style": "dark",
963-
"tags": [],
964-
"templating": {
965-
"list": []
966-
},
967-
"time": {
968-
"from": "now-5m",
969-
"to": "now"
970-
},
971-
"timepicker": {
972-
"refresh_intervals": [
973-
"5s",
974-
"10s",
975-
"30s",
976-
"1m",
977-
"5m",
978-
"15m",
979-
"30m",
980-
"1h",
981-
"2h",
982-
"1d"
983-
],
984-
"time_options": [
985-
"5m",
986-
"15m",
987-
"1h",
988-
"6h",
989-
"12h",
990-
"24h",
991-
"2d",
992-
"7d",
993-
"30d"
994-
]
995-
},
996-
"timezone": "",
997-
"title": f"Monitoring model {self._project_name.replace('-', '')}",
998-
"uid": ''.join(random.choices(string.ascii_letters + string.digits, k=9)),
999-
"version": 3
1000-
}
1001-
new_dashboard = requests.post(
1002-
self._grafana_hostname+"/api/dashboards/db",
1003-
auth=self._grafana_auth,
1004-
json=dict(dashboard=dashboard, folderId=0, overwrite=True),
761+
grafana = requests.post(
762+
# "/v2/deployers/{id}/deployments/{deploymentId}/dashboard"
763+
self._hostname+f"/v2/deployers/{deployer_id}/deployments/{deployment_id}/dashboard",
764+
json={},
765+
auth=self._auth,
1005766
)
1006767
# TODO check status code
1007-
self._dashboard = new_dashboard.json()
1008-
#print("new dashboard", self._dashboard)
1009-
return self._grafana_hostname+self._dashboard['url']
768+
dashboard = grafana.json()
769+
return dashboard['dashboardURL']
1010770

1011771
# Proxy things through to the current run
1012772
def start(self, description = None):
@@ -1194,22 +954,12 @@ def parameter(label, value):
1194954
def debug():
1195955
_defaultDS.debug()
1196956

1197-
def connect(username, apikey, project, hostname="",
1198-
g_hostname="", g_username="", g_apikey=""):
957+
def connect(username, apikey, project, hostname=""):
1199958
# Allow defaulting on empty string e.g. from env
1200959
if not hostname:
1201960
hostname = "https://cloud.dotscience.com"
1202-
# g_ for grafana
1203-
if not g_hostname:
1204-
g_hostname = "https://playground-grafana.dotscience.com"
1205-
# default to same creds as dotscience cloud, but overridable for
1206-
# development
1207-
if not g_username and not g_apikey:
1208-
g_username = "playground"
1209-
g_apikey = "password"
1210961
_defaultDS.connect(
1211962
username, apikey, project, hostname,
1212-
g_hostname, g_username, g_apikey,
1213963
)
1214964

1215965
from ._version import get_versions

0 commit comments

Comments
 (0)