@@ -11,6 +11,7 @@ import play.api.{Configuration, Logger}
1111import java .nio .file ._
1212import java .util
1313import javax .inject .{Inject , Singleton }
14+ import scala .annotation .tailrec
1415import scala .concurrent .duration .{DurationInt , FiniteDuration }
1516import scala .jdk .CollectionConverters ._
1617import scala .util .{Failure , Success , Try }
@@ -34,6 +35,27 @@ class K8sJobRunnerSrv(
3435
3536 lazy val logger : Logger = Logger (getClass)
3637
38+ /** Kubernetes label values must be ≤63 chars and start/end with [A-Za-z0-9] (see label value validation). */
39+ private def isKubernetesLabelAlphanum (c : Char ): Boolean =
40+ (c >= 'a' && c <= 'z' ) || (c >= 'A' && c <= 'Z' ) || (c >= '0' && c <= '9' )
41+
42+ private def kubernetesLabelValue (raw : String ): String = {
43+ @ tailrec
44+ def trimEnds (s : String ): String = {
45+ if (s.isEmpty) s
46+ else if (! isKubernetesLabelAlphanum(s.head)) trimEnds(s.tail)
47+ else if (! isKubernetesLabelAlphanum(s.last)) trimEnds(s.init)
48+ else s
49+ }
50+ val trimmed = trimEnds(raw)
51+ if (trimmed.isEmpty) " 0"
52+ else if (trimmed.length <= 63 ) trimmed
53+ else trimEnds(trimmed.take(63 )) match {
54+ case t if t.nonEmpty => t
55+ case _ => " 0"
56+ }
57+ }
58+
3759 lazy val isAvailable : Boolean =
3860 Try {
3961 val ver = client.getVersion
@@ -55,10 +77,11 @@ class K8sJobRunnerSrv(
5577 // make the default longer than likely values, but still not infinite
5678 val timeout_or_default = timeout.getOrElse(8 .hours)
5779 // https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
58- // FIXME: this collapses case, jeopardizing the uniqueness of the identifier.
59- // LDH: lowercase, digits, hyphens.
60- val ldh_jobid = job.id.toLowerCase().replace('_' , '-' )
80+ // RFC 1123 names: must start/end with alphanumeric; same trimming as label values.
81+ val ldh_jobid = kubernetesLabelValue(job.id.toLowerCase().replace('_' , '-' ))
6182 val kjobName = " neuron-job-" + ldh_jobid
83+ val jobLabel = kubernetesLabelValue(job.id)
84+ val workerLabel = kubernetesLabelValue(job.workerId())
6285 val pvcvs = new PersistentVolumeClaimVolumeSourceBuilder ()
6386 .withClaimName(persistentVolumeClaimName.get)
6487 .withReadOnly(false )
@@ -69,8 +92,8 @@ class K8sJobRunnerSrv(
6992 .withNewMetadata()
7093 .withName(kjobName)
7194 .withLabels(Map (
72- " cortex-job-id" -> job.id ,
73- " cortex-worker-id" -> job.workerId() ,
95+ " cortex-job-id" -> jobLabel ,
96+ " cortex-worker-id" -> workerLabel ,
7497 " cortex-neuron-job" -> " true" ).asJava)
7598 .endMetadata()
7699 .withNewSpec()
@@ -124,7 +147,7 @@ class K8sJobRunnerSrv(
124147 s " image : $dockerImage\n " +
125148 s " mount : pvc $persistentVolumeClaimName subdir $relativeJobDirectory as /job " +
126149 created_env.map(ev => s " \n env : ${ev.getName} = ${ev.getValue}" ).mkString)
127- val ended_kjob = client.batch().v1().jobs().withLabel(" cortex-job-id" , job.id )
150+ val ended_kjob = client.batch().v1().jobs().withLabel(" cortex-job-id" , jobLabel )
128151 .waitUntilCondition(x => Option (x).flatMap(j =>
129152 Option (j.getStatus).flatMap(s =>
130153 Some (s.getConditions.asScala.map(_.getType).exists(t =>
@@ -140,7 +163,7 @@ class K8sJobRunnerSrv(
140163 }
141164 // let's find the job by the attribute we know is fundamentally
142165 // unique, rather than one constructed from it
143- val deleted : util.List [StatusDetails ] = client.batch().v1().jobs().withLabel(" cortex-job-id" , job.id ).delete()
166+ val deleted : util.List [StatusDetails ] = client.batch().v1().jobs().withLabel(" cortex-job-id" , jobLabel ).delete()
144167 if (! deleted.isEmpty) {
145168 logger.info(s " Deleted Kubernetes Job for job ${job.id}" )
146169 } else {
0 commit comments