@@ -257,13 +257,13 @@ def __init__(self):
257257 self ._root = os .getenv ('DOTSCIENCE_PROJECT_DOT_ROOT' , default = os .getcwd ())
258258 self ._dotmesh_client = None
259259 self ._hostname = None
260- self ._grafana_hostname = None
261- self ._grafana_auth = None
262260 self ._auth = None
263261 self ._cached_project = None
264262 self ._project_name = None
263+ self ._deployment = None
264+ self ._deployer = None
265265
266- def connect (self , username , apikey , project , hostname , g_hostname , g_username , g_apikey ):
266+ def connect (self , username , apikey , project , hostname ):
267267 # TODO: Make this fail if we're in a mode other than 'remote' mode.
268268 # TODO: make publish etc fail if we're not connected in remote mode.
269269 if not project :
@@ -275,9 +275,6 @@ def connect(self, username, apikey, project, hostname, g_hostname, g_username, g
275275 )
276276 self ._hostname = hostname
277277
278- self ._grafana_hostname = g_hostname
279- self ._grafana_auth = (g_username , g_apikey )
280-
281278 self ._auth = (username , apikey )
282279 self ._project_name = project
283280 print ("Checking connection... " , end = "" )
@@ -410,12 +407,12 @@ def _publish_remote_run(self, build, deploy):
410407 ret ["dashboard" ] = dashboard
411408 print ("done" )
412409 print (" -> Dashboard: %s\n " % (dashboard ,))
413-
410+
414411 print ("Waiting for model endpoint to become active" , end = "" )
415412 sys .stdout .flush ()
416413 self ._wait_active ()
417414 print (" done" )
418-
415+
419416 print ("=== Dotscience publish complete ===\n " )
420417 return ret
421418
@@ -665,6 +662,8 @@ def _build_docker_image_on_hub(self):
665662 print ("." , end = "" )
666663 sys .stdout .flush ()
667664 time .sleep (1.0 )
665+ if build is not None :
666+ print ("BUILD:" , build )
668667 if build is not None and build ["status" ] == "failed" :
669668 raise Exception ("Build failed: %s" , build )
670669 if attempt == 60 :
@@ -691,6 +690,7 @@ def _deploy_to_kube(self):
691690 if len (managed ) == 0 :
692691 raise Exception ("Can't deploy - no managed deployers found" )
693692 deployer = managed [0 ]
693+ self ._deployer = deployer
694694 body = {
695695 # TODO fill this in
696696 "name" : self ._project_name .replace ('-' , '' ),
@@ -725,6 +725,8 @@ def _deploy_to_kube(self):
725725 return "https://" + deployment .json ()["host" ]+ "/v1/models/model:predict"
726726
727727 def _wait_active (self ):
728+ if self ._deployment is None :
729+ raise Exception ("tried to wait for model to become active when no self._deployment was set" )
728730 attempt = 0
729731 the_exc = None
730732 while attempt < 120 :
@@ -749,264 +751,22 @@ def _wait_active(self):
749751 else :
750752 raise Exception ("Unable to load error" )
751753
752-
753754 def _setup_grafana (self ):
755+ if self ._deployment is None :
756+ raise Exception ("tried to set up dashboard when no self._deployment was set" )
757+ if self ._deployer is None :
758+ raise Exception ("tried to set up dashboard when no self._deployer was set" )
759+ deployer_id = self ._deployer ["id" ]
754760 deployment_id = self ._deployment ["id" ]
755-
756- dashboard = {
757- "annotations" : {
758- "list" : [
759- {
760- "builtIn" : 1 ,
761- "datasource" : "-- Grafana --" ,
762- "enable" : True ,
763- "hide" : True ,
764- "iconColor" : "rgba(0, 211, 255, 1)" ,
765- "name" : "Annotations & Alerts" ,
766- "type" : "dashboard"
767- }
768- ]
769- },
770- "editable" : True ,
771- "gnetId" : None ,
772- "graphTooltip" : 0 ,
773- # Create new dashboard (https://grafana.com/docs/http_api/dashboard/)
774- "id" : None ,
775- "links" : [],
776- "panels" : [
777- {
778- "aliasColors" : {},
779- "bars" : False ,
780- "dashLength" : 10 ,
781- "dashes" : False ,
782- "fill" : 1 ,
783- "gridPos" : {
784- "h" : 9 ,
785- "w" : 24 ,
786- "x" : 0 ,
787- "y" : 0
788- },
789- "id" : 2 ,
790- "legend" : {
791- "avg" : False ,
792- "current" : False ,
793- "max" : False ,
794- "min" : False ,
795- "show" : True ,
796- "total" : False ,
797- "values" : False
798- },
799- "lines" : True ,
800- "linewidth" : 1 ,
801- "links" : [],
802- "nullPointMode" : "None" ,
803- "paceLength" : 10 ,
804- "percentage" : False ,
805- "pointradius" : 2 ,
806- "points" : False ,
807- "renderer" : "flot" ,
808- "seriesOverrides" : [],
809- "stack" : False ,
810- "steppedLine" : False ,
811- "targets" : [
812- {
813- "expr" : f"sum(rate(model_predictions{{deployment_id=\" { deployment_id } \" }}[1m])) by (class)" ,
814- "format" : "time_series" ,
815- "intervalFactor" : 1 ,
816- "legendFormat" : "{{class}}" ,
817- "refId" : "A"
818- }
819- ],
820- "thresholds" : [],
821- "timeFrom" : None ,
822- "timeRegions" : [],
823- "timeShift" : None ,
824- "title" : "Prediction rate (requests per minute)" ,
825- "tooltip" : {
826- "shared" : True ,
827- "sort" : 2 ,
828- "value_type" : "individual"
829- },
830- "type" : "graph" ,
831- "xaxis" : {
832- "buckets" : None ,
833- "mode" : "time" ,
834- "name" : None ,
835- "show" : True ,
836- "values" : []
837- },
838- "yaxes" : [
839- {
840- "format" : "short" ,
841- "label" : None ,
842- "logBase" : 1 ,
843- "max" : None ,
844- "min" : None ,
845- "show" : True
846- },
847- {
848- "format" : "short" ,
849- "label" : None ,
850- "logBase" : 1 ,
851- "max" : None ,
852- "min" : None ,
853- "show" : True
854- }
855- ],
856- "yaxis" : {
857- "align" : False ,
858- "alignLevel" : None
859- }
860- },
861- {
862- "aliasColors" : {},
863- "bars" : False ,
864- "dashLength" : 10 ,
865- "dashes" : False ,
866- "fill" : 1 ,
867- "gridPos" : {
868- "h" : 8 ,
869- "w" : 24 ,
870- "x" : 0 ,
871- "y" : 9
872- },
873- "id" : 4 ,
874- "legend" : {
875- "avg" : False ,
876- "current" : False ,
877- "max" : False ,
878- "min" : False ,
879- "show" : True ,
880- "total" : False ,
881- "values" : False
882- },
883- "lines" : True ,
884- "linewidth" : 1 ,
885- "links" : [],
886- "nullPointMode" : "None" ,
887- "paceLength" : 10 ,
888- "percentage" : False ,
889- "pointradius" : 2 ,
890- "points" : False ,
891- "renderer" : "flot" ,
892- "seriesOverrides" : [],
893- "stack" : False ,
894- "steppedLine" : False ,
895- "targets" : [
896- {
897- "expr" : f"histogram_quantile(0.95, sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\" { deployment_id } \" }}[1m])) by (le)) * 1e3" ,
898- "format" : "time_series" ,
899- "intervalFactor" : 1 ,
900- "legendFormat" : "95th percentile" ,
901- "refId" : "A"
902- },
903- {
904- "expr" : f"histogram_quantile(0.5, sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\" { deployment_id } \" }}[1m])) by(le)) * 1e3" ,
905- "format" : "time_series" ,
906- "intervalFactor" : 1 ,
907- "legendFormat" : "median" ,
908- "refId" : "B"
909- },
910- {
911- "expr" : f"sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\" { deployment_id } \" }}[1m])) / sum(rate(interceptor_request_duration_milliseconds_bucket{{deployment_id=\" { deployment_id } \" }}[5m])) * 1e3" ,
912- "format" : "time_series" ,
913- "intervalFactor" : 1 ,
914- "legendFormat" : "mean" ,
915- "refId" : "C"
916- }
917- ],
918- "thresholds" : [],
919- "timeFrom" : None ,
920- "timeRegions" : [],
921- "timeShift" : None ,
922- "title" : "Latencies" ,
923- "tooltip" : {
924- "shared" : True ,
925- "sort" : 0 ,
926- "value_type" : "individual"
927- },
928- "type" : "graph" ,
929- "xaxis" : {
930- "buckets" : None ,
931- "mode" : "time" ,
932- "name" : None ,
933- "show" : True ,
934- "values" : []
935- },
936- "yaxes" : [
937- {
938- "format" : "short" ,
939- "label" : None ,
940- "logBase" : 1 ,
941- "max" : None ,
942- "min" : None ,
943- "show" : True
944- },
945- {
946- "format" : "short" ,
947- "label" : None ,
948- "logBase" : 1 ,
949- "max" : None ,
950- "min" : None ,
951- "show" : True
952- }
953- ],
954- "yaxis" : {
955- "align" : False ,
956- "alignLevel" : None
957- }
958- }
959- ],
960- "refresh" : "5s" ,
961- "schemaVersion" : 18 ,
962- "style" : "dark" ,
963- "tags" : [],
964- "templating" : {
965- "list" : []
966- },
967- "time" : {
968- "from" : "now-5m" ,
969- "to" : "now"
970- },
971- "timepicker" : {
972- "refresh_intervals" : [
973- "5s" ,
974- "10s" ,
975- "30s" ,
976- "1m" ,
977- "5m" ,
978- "15m" ,
979- "30m" ,
980- "1h" ,
981- "2h" ,
982- "1d"
983- ],
984- "time_options" : [
985- "5m" ,
986- "15m" ,
987- "1h" ,
988- "6h" ,
989- "12h" ,
990- "24h" ,
991- "2d" ,
992- "7d" ,
993- "30d"
994- ]
995- },
996- "timezone" : "" ,
997- "title" : f"Monitoring model { self ._project_name .replace ('-' , '' )} " ,
998- "uid" : '' .join (random .choices (string .ascii_letters + string .digits , k = 9 )),
999- "version" : 3
1000- }
1001- new_dashboard = requests .post (
1002- self ._grafana_hostname + "/api/dashboards/db" ,
1003- auth = self ._grafana_auth ,
1004- json = dict (dashboard = dashboard , folderId = 0 , overwrite = True ),
761+ grafana = requests .post (
762+ # "/v2/deployers/{id}/deployments/{deploymentId}/dashboard"
763+ self ._hostname + f"/v2/deployers/{ deployer_id } /deployments/{ deployment_id } /dashboard" ,
764+ json = {},
765+ auth = self ._auth ,
1005766 )
1006767 # TODO check status code
1007- self ._dashboard = new_dashboard .json ()
1008- #print("new dashboard", self._dashboard)
1009- return self ._grafana_hostname + self ._dashboard ['url' ]
768+ dashboard = grafana .json ()
769+ return dashboard ['dashboardURL' ]
1010770
1011771 # Proxy things through to the current run
1012772 def start (self , description = None ):
@@ -1194,22 +954,12 @@ def parameter(label, value):
1194954def debug ():
1195955 _defaultDS .debug ()
1196956
1197- def connect (username , apikey , project , hostname = "" ,
1198- g_hostname = "" , g_username = "" , g_apikey = "" ):
957+ def connect (username , apikey , project , hostname = "" ):
1199958 # Allow defaulting on empty string e.g. from env
1200959 if not hostname :
1201960 hostname = "https://cloud.dotscience.com"
1202- # g_ for grafana
1203- if not g_hostname :
1204- g_hostname = "https://playground-grafana.dotscience.com"
1205- # default to same creds as dotscience cloud, but overridable for
1206- # development
1207- if not g_username and not g_apikey :
1208- g_username = "playground"
1209- g_apikey = "password"
1210961 _defaultDS .connect (
1211962 username , apikey , project , hostname ,
1212- g_hostname , g_username , g_apikey ,
1213963 )
1214964
1215965from ._version import get_versions
0 commit comments