diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 7343b932c..2c96e44c7 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -31,8 +31,7 @@ jobs: - name: Add Helm repos run: | helm repo add elasticsearch https://helm.elastic.co - helm repo add neo4j https://neo4j-contrib.github.io/neo4j-helm - helm repo add neo4j-community https://equinor.github.io/helm-charts/charts + helm repo add neo4j https://helm.neo4j.com/neo4j helm repo add mysql https://charts.bitnami.com/bitnami helm repo add cp-helm-charts https://confluentinc.github.io/cp-helm-charts helm repo add kafka https://charts.bitnami.com/bitnami diff --git a/README.md b/README.md index 49027c0cb..337d0cd7d 100644 --- a/README.md +++ b/README.md @@ -32,22 +32,19 @@ The main components are powered by 4 external dependencies: - Kafka - Local DB (MySQL, Postgres, MariaDB) - Search Index (Elasticsearch) -- Graph Index (Supports either Neo4j or Elasticsearch) +- Graph Index (Supports either Elasticsearch or Neo4j) The dependencies must be deployed before deploying Datahub. We created a separate [chart](https://github.com/acryldata/datahub-helm/tree/master/charts/prerequisites) for deploying the dependencies with example configuration. They could also be deployed -separately on-prem or leveraged as managed services. To remove your dependency on Neo4j, -set enabled to false in the `datahub-kubernetes/prerequisites/values.yaml` file. -Then, override the `graph_service_impl` field in `datahub-kubernetes/datahub/values.yaml` to -have the value `elasticsearch` instead of `neo4j`. +separately on-prem or leveraged as managed services. ## Quickstart Assuming kubectl context points to the correct kubernetes cluster, first create kubernetes secrets that contain MySQL and Neo4j passwords. ```(shell) -kubectl create secret generic mysql-secrets --from-literal=mysql-root-password=datahub -kubectl create secret generic neo4j-secrets --from-literal=neo4j-password=datahub +kubectl create secret generic mysql-secrets --from-literal=mysql-root-password=datahub --from-literal=mysql-password=datahub +kubectl create secret generic neo4j-secrets --from-literal=neo4j-password=datahub --from-literal=NEO4J_AUTH=neo4j/datahub ``` The above commands sets the passwords to "datahub" as an example. Change to any password of choice. @@ -79,7 +76,7 @@ elasticsearch-master-0 1/1 Running 0 prerequisites-cp-schema-registry-cf79bfccf-kvjtv 2/2 Running 1 63m prerequisites-kafka-0 1/1 Running 2 62m prerequisites-mysql-0 1/1 Running 1 62m -prerequisites-neo4j-community-0 1/1 Running 0 52m +prerequisites-neo4j-0 1/1 Running 0 52m prerequisites-zookeeper-0 1/1 Running 0 62m ``` @@ -109,7 +106,7 @@ elasticsearch-master-0 1/1 Running 0 prerequisites-cp-schema-registry-cf79bfccf-kvjtv 2/2 Running 1 99m prerequisites-kafka-0 1/1 Running 2 97m prerequisites-mysql-0 1/1 Running 1 97m -prerequisites-neo4j-community-0 1/1 Running 0 88m +prerequisites-neo4j-0 1/1 Running 0 88m prerequisites-zookeeper-0 1/1 Running 0 97m ``` diff --git a/charts/datahub/Chart.yaml b/charts/datahub/Chart.yaml index 5c32de242..0f5897209 100644 --- a/charts/datahub/Chart.yaml +++ b/charts/datahub/Chart.yaml @@ -4,33 +4,33 @@ description: A Helm chart for LinkedIn DataHub type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.187 +version: 0.4.10 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: 0.11.0 +appVersion: 0.13.2 dependencies: - name: datahub-gms - version: 0.2.151 + version: 0.2.167 repository: file://./subcharts/datahub-gms condition: datahub-gms.enabled - name: datahub-frontend - version: 0.2.141 + version: 0.2.157 repository: file://./subcharts/datahub-frontend condition: datahub-frontend.enabled - name: datahub-mae-consumer - version: 0.2.147 + version: 0.2.158 repository: file://./subcharts/datahub-mae-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-mce-consumer - version: 0.2.150 + version: 0.2.160 repository: file://./subcharts/datahub-mce-consumer condition: global.datahub_standalone_consumers_enabled - name: datahub-ingestion-cron - version: 0.2.136 + version: 0.2.143 repository: file://./subcharts/datahub-ingestion-cron condition: datahub-ingestion-cron.enabled - name: acryl-datahub-actions - version: 0.2.138 + version: 0.2.144 repository: file://./subcharts/acryl-datahub-actions condition: acryl-datahub-actions.enabled maintainers: diff --git a/charts/datahub/README.md b/charts/datahub/README.md index 986a84bd4..e23b2ccfd 100644 --- a/charts/datahub/README.md +++ b/charts/datahub/README.md @@ -18,155 +18,184 @@ helm install datahub datahub/datahub --values <> ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| datahub-frontend.enabled | bool | `true` | Enable Datahub Front-end | -| datahub-frontend.image.repository | string | `"linkedin/datahub-frontend-react"` | Image repository for datahub-frontend | -| datahub-frontend.image.tag | string | `"v0.11.0"` | Image tag for datahub-frontend | -| datahub-frontend.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-frontend | -| datahub-gms.enabled | bool | `true` | Enable GMS | -| datahub-gms.image.repository | string | `"linkedin/datahub-gms"` | Image repository for datahub-gms | -| datahub-gms.image.tag | string | `"v0.11.0"` | Image tag for datahub-gms | -| datahub-gms.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-gms | -| datahub-mae-consumer.image.repository | string | `"linkedin/datahub-mae-consumer"` | Image repository for datahub-mae-consumer | -| datahub-mae-consumer.image.tag | string | `"v0.11.0"` | Image tag for datahub-mae-consumer | -| datahub-mae-consumer.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-mae-consumer | -| datahub-mce-consumer.image.repository | string | `"linkedin/datahub-mce-consumer"` | Image repository for datahub-mce-consumer | -| datahub-mce-consumer.image.tag | string | `"v0.11.0"` | Image tag for datahub-mce-consumer | -| datahub-mce-consumer.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-mce-consumer | -| datahub-ingestion-cron.enabled | bool | `false` | Enable cronjob for periodic ingestion | -| datahubUpgrade.podSecurityContext | object | `{}` | Pod security context for datahubUpgrade jobs | -| datahubUpgrade.securityContext | object | `{}` | Container security context for datahubUpgrade jobs | -| datahubUpgrade.podAnnotations | object | `{}` | Pod annotations for datahubUpgrade jobs | -| datahubUpgrade.cleanupJob.resources | object | '{}' | Kube Resource definitions for the datahub upgrade job 'cleanupJob' | -| datahubUpgrade.cleanupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| datahubUpgrade.restoreIndices.resources | object | '{}' | Kube Resource definitions for the datahub upgrade job 'restore indices' | -| datahubUpgrade.restoreIndices.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| elasticsearchSetupJob.enabled | bool | `true` | Enable setup job for elasicsearch | -| elasticsearchSetupJob.image.repository | string | `"linkedin/datahub-elasticsearch-setup"` | Image repository for elasticsearchSetupJob | -| elasticsearchSetupJob.image.tag | string | `"v0.11.0"` | Image repository for elasticsearchSetupJob | -| elasticsearchSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for elasticsearchSetupJob | -| elasticsearchSetupJob.resources | object | '{}' | Kube Resource definitions for elasticsearchSetupJob | -| elasticsearchSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for elasticsearchSetupJob | -| elasticsearchSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for elasticsearchSetupJob | -| elasticsearchSetupJob.podAnnotations | object | `{}` | Pod annotations for elasticsearchSetupJob | -| elasticsearchSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| kafkaSetupJob.enabled | bool | `true` | Enable setup job for kafka | -| kafkaSetupJob.image.repository | string | `"linkedin/datahub-kafka-setup"` | Image repository for kafkaSetupJob | -| kafkaSetupJob.image.tag | string | `"v0.11.0"` | Image repository for kafkaSetupJob | -| kafkaSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for kafkaSetupJob| -| kafkaSetupJob.resources | object | '{}' | Kube Resource definitions for kafkaSetupJob | -| kafkaSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for kafkaSetupJob | -| kafkaSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for kafkaSetupJob | -| kafkaSetupJob.podAnnotations | object | `{}` | Pod annotations for kafkaSetupJob | -| kafkaSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| mysqlSetupJob.enabled | bool | `false` | Enable setup job for mysql | -| mysqlSetupJob.image.repository | string | `"acryldata/datahub-mysql-setup"` | Image repository for mysqlSetupJob | -| mysqlSetupJob.image.tag | string | `"v0.11.0"` | Image repository for mysqlSetupJob | -| mysqlSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for mysqlSetupJob| -| mysqlSetupJob.resources | object | '{}' | Kube Resource definitions for mysqlSetupJob | -| mysqlSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for mysqlSetupJob | -| mysqlSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for mysqlSetupJob | -| mysqlSetupJob.podAnnotations | object | `{}` | Pod annotations for mysqlSetupJob | -| mysqlSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| postgresqlSetupJob.enabled | bool | `false` | Enable setup job for postgresql | -| postgresqlSetupJob.image.repository | string | `"acryldata/datahub-postgres-setup"` | Image repository for postgresqlSetupJob | -| postgresqlSetupJob.image.tag | string | `"v0.11.0"` | Image repository for postgresqlSetupJob | -| postgresqlSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for postgresqlSetupJob| -| postgresqlSetupJob.resources | object | '{}' | Kube Resource definitions for postgresqlSetupJob | -| postgresqlSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for mysqlSetupJob | -| postgresqlSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for mysqlSetupJob | -| postgresqlSetupJob.podAnnotations | object | `{}` | Pod annotations for mysqlSetupJob | -| postgresqlSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| datahubSystemUpdate.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | -| global.strict_mode | boolean | true | Enables validations in helm charts to ensure features work as expected. Recommended NOT TO CHANGE. | -| global.datahub_standalone_consumers_enabled | boolean | true | Enable standalone consumers for kafka | -| global.datahub_analytics_enabled | boolean | true | Enable datahub usage analytics | -| global.datahub.appVersion | string | `"1.0"` | App version for annotation | -| global.datahub.gms.port | string | `"8080"` | Port of GMS service | -| global.elasticsearch.host | string | `"elasticsearch-master"` | Elasticsearch host name (endpoint) | -| global.elasticsearch.port | string | `"9200"` | Elasticsearch port | -| global.kafka.bootstrap.server | string | `"prerequisites-broker:9092"` | Kafka bootstrap servers (with port) | -| global.kafka.zookeeper.server | string | `"prerequisites-zookeeper:2181"` | Kafka zookeeper servers (with port) | -| global.kafka.topics.metadata_change_event_name | string | `"MetadataChangeEvent_v4"` | Kafka topic name for Metadata Change Events (deprecated) | -| global.kafka.topics.failed_metadata_change_event_name | string | `"FailedMetadataChangeEvent_v4"` | Kafka topic name for Failed Metadata Change events (deprecated) | -| global.kafka.topics.metadata_audit_event_name | string | `"MetadataAuditEvent_v4"` | Kafka topic name for Metadata Audit events (deprecated) | -| global.kafka.topics.datahub_usage_event_name | string | `"DataHubUsageEvent_v1"` | Kafka topic name for DataHub Usage events | -| global.kafka.topics.metadata_change_proposal_topic_name | string | `"MetadataChangeProposal_v1"` | Kafka topic name for Metadata Change Proposal events | -| global.kafka.topics.failed_metadata_change_proposal_topic_name | string | `"FailedMetadataChangeProposal_v1"` | Kafka topic name for Failed Metadata Change Proposal events | -| global.kafka.topics.metadata_change_log_versioned_topic_name | string | `"MetadataChangeLog_Versioned_v1"` | Kafka topic name for Versioned Metadata Change Log events | -| global.kafka.topics.metadata_change_log_timeseries_topic_name | string | `"MetadataChangeLog_Timeseries_v1"` | Kafka topic name for Timeseries Metadata Change Log events | -| global.kafka.topics.platform_event_topic_name | string | `"PlatformEvent_v1"` | Kafka topic name for Platform events | -| global.kafka.schemaregistry.url | string | `` | URL to kafka schema registry if using `KAFKA` type | -| global.neo4j.host | string | `"prerequisites-neo4j:7474"` | Neo4j host address (with port) | -| global.neo4j.uri | string | `"bolt://prerequisites-neo4j"` | Neo4j URI | -| global.neo4j.username | string | `"neo4j"` | Neo4j user name | -| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | Secret that contains the Neo4j password | -| global.neo4j.password.secretKey | string | `"neo4j-password"` | Secret key that contains the Neo4j password | -| global.sql.datasource.driver | string | `"com.mysql.cj.jdbc.Driver"` | Driver for the SQL database | -| global.sql.datasource.host | string | `"prerequisites-mysql:3306"` | SQL database host (with port) | -| global.sql.datasource.hostForMysqlClient | string | `"prerequisites-mysql"` | SQL database host (without port) | -| global.sql.datasource.port | string | `"3306"` | SQL database port | -| global.sql.datasource.url | string | `"jdbc:mysql://prerequisites-mysql:3306/datahub?verifyServerCertificate=false\u0026useSSL=true"` | URL to access SQL database | -| global.sql.datasource.username | string | `"root"` | SQL user name | -| global.sql.datasource.username.secretRef | string | `"mysql-secrets"` | Secret that contains the MySQL username | -| global.sql.datasource.username.secretKey | string | `"mysql-username"` | Secret key that contains the MySQL username | -| global.sql.datasource.password.secretRef | string | `"mysql-secrets"` | Secret that contains the MySQL password | -| global.sql.datasource.password.secretKey | string | `"mysql-password"` | Secret key that contains the MySQL password | -| global.sql.datasource.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value instead | -| global.graph_service_impl | string | `neo4j` | One of `neo4j` or `elasticsearch`. Determines which backend to use for the GMS graph service. Elastic is recommended for a simplified deployment. Neo4j will be the default for now to maintain backwards compatibility | +| Key | Type | Default | Description | +|----------------------------------------------------------------|---------|--------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| +| datahub-frontend.enabled | bool | `true` | Enable Datahub Front-end | +| datahub-frontend.image.repository | string | `"linkedin/datahub-frontend-react"` | Image repository for datahub-frontend | +| datahub-frontend.image.tag | string | `"v0.11.0"` | Image tag for datahub-frontend | +| datahub-frontend.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-frontend | +| datahub-frontend.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahub-frontend.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahub-gms.enabled | bool | `true` | Enable GMS | +| datahub-gms.image.repository | string | `"linkedin/datahub-gms"` | Image repository for datahub-gms | +| datahub-gms.image.tag | string | `"v0.11.0"` | Image tag for datahub-gms | +| datahub-gms.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-gms | +| datahub-gms.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahub-gms.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahub-mae-consumer.image.repository | string | `"linkedin/datahub-mae-consumer"` | Image repository for datahub-mae-consumer | +| datahub-mae-consumer.image.tag | string | `"v0.11.0"` | Image tag for datahub-mae-consumer | +| datahub-mae-consumer.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-mae-consumer | +| datahub-mae-consumer.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahub-mae-consumer.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahub-mce-consumer.image.repository | string | `"linkedin/datahub-mce-consumer"` | Image repository for datahub-mce-consumer | +| datahub-mce-consumer.image.tag | string | `"v0.11.0"` | Image tag for datahub-mce-consumer | +| datahub-mce-consumer.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for datahub-mce-consumer | +| datahub-mce-consumer.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahub-mce-consumer.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahub-ingestion-cron.enabled | bool | `false` | Enable cronjob for periodic ingestion | +| datahubUpgrade.podSecurityContext | object | `{}` | Pod security context for datahubUpgrade jobs | +| datahubUpgrade.securityContext | object | `{}` | Container security context for datahubUpgrade jobs | +| datahubUpgrade.podAnnotations | object | `{}` | Pod annotations for datahubUpgrade jobs | +| datahubUpgrade.cleanupJob.resources | object | '{}' | Kube Resource definitions for the datahub upgrade job 'cleanupJob' | +| datahubUpgrade.cleanupJob.concurrencyPolicy | string | `Allow, Forbid, Replace` | Add concurrencyPolicy for the clean up cron job | +| datahubUpgrade.cleanupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| datahubUpgrade.cleanupJob.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahubUpgrade.cleanupJob.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahubUpgrade.noCodeDataMigration.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahubUpgrade.noCodeDataMigration.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| datahubUpgrade.restoreIndices.resources | object | '{}' | Kube Resource definitions for the datahub upgrade job 'restore indices' | +| datahubUpgrade.restoreIndices.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| datahubUpgrade.restoreIndices.concurrencyPolicy | string | `Allow, Forbid, Replace` | Add concurrencyPolicy for the restoreIndicies cron job | +| datahubUpgrade.restoreIndices.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| datahubUpgrade.restoreIndices.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| elasticsearchSetupJob.enabled | bool | `true` | Enable setup job for elasicsearch | +| elasticsearchSetupJob.image.repository | string | `"linkedin/datahub-elasticsearch-setup"` | Image repository for elasticsearchSetupJob | +| elasticsearchSetupJob.image.tag | string | `"v0.11.0"` | Image repository for elasticsearchSetupJob | +| elasticsearchSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for elasticsearchSetupJob | +| elasticsearchSetupJob.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| elasticsearchSetupJob.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| elasticsearchSetupJob.resources | object | '{}' | Kube Resource definitions for elasticsearchSetupJob | +| elasticsearchSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for elasticsearchSetupJob | +| elasticsearchSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for elasticsearchSetupJob | +| elasticsearchSetupJob.podAnnotations | object | `{}` | Pod annotations for elasticsearchSetupJob | +| elasticsearchSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| kafkaSetupJob.enabled | bool | `true` | Enable setup job for kafka | +| kafkaSetupJob.image.repository | string | `"linkedin/datahub-kafka-setup"` | Image repository for kafkaSetupJob | +| kafkaSetupJob.image.tag | string | `"v0.11.0"` | Image repository for kafkaSetupJob | +| kafkaSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for kafkaSetupJob | +| kafkaSetupJob.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| kafkaSetupJob.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| kafkaSetupJob.resources | object | '{}' | Kube Resource definitions for kafkaSetupJob | +| kafkaSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for kafkaSetupJob | +| kafkaSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for kafkaSetupJob | +| kafkaSetupJob.podAnnotations | object | `{}` | Pod annotations for kafkaSetupJob | +| kafkaSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| mysqlSetupJob.enabled | bool | `false` | Enable setup job for mysql | +| mysqlSetupJob.image.repository | string | `"acryldata/datahub-mysql-setup"` | Image repository for mysqlSetupJob | +| mysqlSetupJob.image.tag | string | `"v0.11.0"` | Image repository for mysqlSetupJob | +| mysqlSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for mysqlSetupJob | +| mysqlSetupJob.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| mysqlSetupJob.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| mysqlSetupJob.resources | object | '{}' | Kube Resource definitions for mysqlSetupJob | +| mysqlSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for mysqlSetupJob | +| mysqlSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for mysqlSetupJob | +| mysqlSetupJob.podAnnotations | object | `{}` | Pod annotations for mysqlSetupJob | +| mysqlSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| postgresqlSetupJob.enabled | bool | `false` | Enable setup job for postgresql | +| postgresqlSetupJob.image.repository | string | `"acryldata/datahub-postgres-setup"` | Image repository for postgresqlSetupJob | +| postgresqlSetupJob.image.tag | string | `"v0.11.0"` | Image repository for postgresqlSetupJob | +| postgresqlSetupJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy for postgresqlSetupJob | +| postgresqlSetupJob.image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| postgresqlSetupJob.image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| postgresqlSetupJob.resources | object | '{}' | Kube Resource definitions for postgresqlSetupJob | +| postgresqlSetupJob.podSecurityContext | object | `{"fsGroup": 1000}` | Pod security context for mysqlSetupJob | +| postgresqlSetupJob.securityContext | object | `{"runAsUser": 1000}` | Container security context for mysqlSetupJob | +| postgresqlSetupJob.podAnnotations | object | `{}` | Pod annotations for mysqlSetupJob | +| postgresqlSetupJob.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| datahubSystemUpdate.extraSidecars | list | `[]` | Add additional sidecar containers to the job pod | +| global.strict_mode | boolean | true | Enables validations in helm charts to ensure features work as expected. Recommended NOT TO CHANGE. | +| global.datahub_standalone_consumers_enabled | boolean | false | Enable standalone consumers for kafka | +| global.datahub_analytics_enabled | boolean | true | Enable datahub usage analytics | +| global.datahub.appVersion | string | `"1.0"` | App version for annotation | +| global.datahub.gms.protocol | string | `"http"` | Protocol of GMS service | +| global.datahub.gms.host | string | `"datahub-datahub-gms" | Host of GMS service | +| global.datahub.gms.port | string | `"8080"` | Port of GMS service | +| global.datahub.monitoring.portName | string | `jmx` | Name of Kube port for monitoring | +| global.elasticsearch.host | string | `"elasticsearch-master"` | Elasticsearch host name (endpoint) | +| global.elasticsearch.port | string | `"9200"` | Elasticsearch port | +| global.kafka.bootstrap.server | string | `"prerequisites-broker:9092"` | Kafka bootstrap servers (with port) | +| global.kafka.zookeeper.server | string | `"prerequisites-zookeeper:2181"` | Kafka zookeeper servers (with port) | +| global.kafka.consumer.stopContainerOnDeserializationError | boolean | `true` | Determines whether or not to halt progress when encountering a deserialization error, halting prevents data loss but prevents progress until fixed | +| global.kafka.topics.metadata_change_event_name | string | `"MetadataChangeEvent_v4"` | Kafka topic name for Metadata Change Events (deprecated) | +| global.kafka.topics.failed_metadata_change_event_name | string | `"FailedMetadataChangeEvent_v4"` | Kafka topic name for Failed Metadata Change events (deprecated) | +| global.kafka.topics.metadata_audit_event_name | string | `"MetadataAuditEvent_v4"` | Kafka topic name for Metadata Audit events (deprecated) | +| global.kafka.topics.datahub_usage_event_name | string | `"DataHubUsageEvent_v1"` | Kafka topic name for DataHub Usage events | +| global.kafka.topics.metadata_change_proposal_topic_name | string | `"MetadataChangeProposal_v1"` | Kafka topic name for Metadata Change Proposal events | +| global.kafka.topics.failed_metadata_change_proposal_topic_name | string | `"FailedMetadataChangeProposal_v1"` | Kafka topic name for Failed Metadata Change Proposal events | +| global.kafka.topics.metadata_change_log_versioned_topic_name | string | `"MetadataChangeLog_Versioned_v1"` | Kafka topic name for Versioned Metadata Change Log events | +| global.kafka.topics.metadata_change_log_timeseries_topic_name | string | `"MetadataChangeLog_Timeseries_v1"` | Kafka topic name for Timeseries Metadata Change Log events | +| global.kafka.topics.platform_event_topic_name | string | `"PlatformEvent_v1"` | Kafka topic name for Platform events | +| global.kafka.schemaregistry.url | string | `` | URL to kafka schema registry if using `KAFKA` type | +| global.neo4j.host | string | `"prerequisites-neo4j:7474"` | Neo4j host address (with port) | +| global.neo4j.uri | string | `"bolt://prerequisites-neo4j"` | Neo4j URI | +| global.neo4j.database | string | `"graph.db"` | Neo4J database | +| global.neo4j.username | string | `"neo4j"` | Neo4j user name | +| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | Secret that contains the Neo4j password | +| global.neo4j.password.secretKey | string | `"neo4j-password"` | Secret key that contains the Neo4j password | +| global.sql.datasource.driver | string | `"com.mysql.cj.jdbc.Driver"` | Driver for the SQL database | +| global.sql.datasource.host | string | `"prerequisites-mysql:3306"` | SQL database host (with port) | +| global.sql.datasource.hostForMysqlClient | string | `"prerequisites-mysql"` | SQL database host (without port) | +| global.sql.datasource.port | string | `"3306"` | SQL database port | +| global.sql.datasource.url | string | `"jdbc:mysql://prerequisites-mysql:3306/datahub?verifyServerCertificate=false\u0026useSSL=true"` | URL to access SQL database | +| global.sql.datasource.username | string | `"root"` | SQL user name | +| global.sql.datasource.username.secretRef | string | `"mysql-secrets"` | Secret that contains the MySQL username | +| global.sql.datasource.username.secretKey | string | `"mysql-username"` | Secret key that contains the MySQL username | +| global.sql.datasource.password.secretRef | string | `"mysql-secrets"` | Secret that contains the MySQL password | +| global.sql.datasource.password.secretKey | string | `"mysql-password"` | Secret key that contains the MySQL password | +| global.sql.datasource.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value instead | +| global.graph_service_impl | string | `elasticsearch` | One of `elasticsearch` or `neo4j`. Determines which backend to use for the GMS graph service. Elasticsearch is recommended for a simplified deployment. | ## Optional Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| datahub-gms.sql.datasource.username | string | `root` | SQL username for GMS (overrides global value) | -| datahub-gms.sql.datasource.username.secretRef | string | `"mysql-secrets"` | Secret that contains the GMS SQL username (overrides global value) | -| datahub-gms.sql.datasource.username.secretKey | string | `"mysql-username"` | Secret key that contains the GMS SQL username (overrides global value) | -| datahub-gms.sql.datasource.password.secretRef | string | `"mysql-secrets"` | Secret that contains the GMS SQL password (overrides global value) | -| datahub-gms.sql.datasource.password.secretKey | string | `"mysql-password"` | Secret key that contains the GMS SQL password (overrides global value) | -| datahub-gms.sql.datasource.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for GMS SQL login (overrides global value) | -| mysqlSetupJob.username | string | `root` | SQL username for mysqlSetupJob (overrides global value) | -| mysqlSetupJob.password.secretRef | string | `"mysql-secrets"` | Secret that contains the mysqlSetupJob SQL password (overrides global value) | -| mysqlSetupJob.password.secretKey | string | `"mysql-password"` | Secret key that contains the mysqlSetupJob SQL password (overrides global value) | -| mysqlSetupJob.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for mysqlSetupJob SQL login (overrides global value) | -| postgresqlSetupJob.username | string | `root` | SQL username for postgresqlSetupJob (overrides global value) | -| postgresqlSetupJob.password.secretRef | string | `"mysql-secrets"` | Secret that contains the postgresqlSetupJob SQL password (overrides global value) | -| postgresqlSetupJob.password.secretKey | string | `"mysql-password"` | Secret key that contains the postgresqlSetupJob SQL password (overrides global value) | -| postgresqlSetupJob.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for postgresqlSetupJob SQL login (overrides global value) | -| acryl-datahub-actions.ingestionSecretFiles.name | string | `""` | Name of the k8s secret that holds any secret files (e.g., SSL certificates and private keys) that are used in your ingestion recipes. The keys in the secret will be mounted as individual files under `/etc/datahub/ingestion-secret-files` | -| acryl-datahub-actions.ingestionSecretFiles.defaultMode | string | `""` | The permission mode for the volume that mounts k8s secret under `/etc/datahub/ingestion-secret-files`, default value is 0444 which allows read access by owner, group, and other users | -| global.credentialsAndCertsSecrets.name | string | `""` | Name of the secret that holds SSL certificates (keystores, truststores) | -| global.credentialsAndCertsSecrets.path | string | `"/mnt/certs"` | Path to mount the SSL certificates | -| global.credentialsAndCertsSecrets.secureEnv | map | `{}` | Map of SSL config name and the corresponding value in the secret | -| global.springKafkaConfigurationOverrides | map | `{}` | Map of configuration overrides for accessing kafka | -| global.elasticsearch.useSSL | bool | `false` | Whether to enable SSL for accessing elasticsearch | -| global.elasticsearch.auth.username | string | `""` | Elasticsearch username | -| global.elasticsearch.auth.password.secretRef | string | `""` | Secret that contains the elasticsearch password | -| global.elasticsearch.auth.password.secretKey | string | `""` | Secret key that contains the elasticsearch password | -| global.elasticsearch.auth.password.value | string | `""` | Alternative to using the secret above, uses raw string value instead | -| global.kafka.schemaregistry.type | string | `"INTERNAL"` | Type of schema registry (INTERNAL, KAFKA, or AWS_GLUE) | -| global.kafka.schemaregistry.glue.region | string | `""` | Region of the AWS Glue schema registry | -| global.kafka.schemaregistry.glue.registry | string | `""` | Name of the AWS Glue schema registry | -| datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | -| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `system_client_secret` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `system_client_secret` within a secret named `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.tokenService.signingKey.secretKey | string | `token_service_signing_key` | The key of a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `token_service_signing_key` within a secret named `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.tokenService.salt.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.tokenService.salt.secretKey | string | `token_service_salt` | The key of a secret containing the internal system secret that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `token_service_salt` within a secret named `datahub-auth-secrets`. | -| global.datahub.metadata_service_authentication.provisionSecrets.enabled | bool | `true` | Whether auth secrets (system client secret, token signing key & token service salt) should be provisioned on the first deployment for you. Set this to false if you are overriding `global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef` or `global.datahub.metadata_service_authentication systemClientSecret.secretRef`. | -| global.datahub.metadata_service_authentication.provisionSecrets.autoGenerate | bool | `true` | Whether auth secrets (token signing key, system client secret & token service salt) should be provisioned on the first deployment for you **with a random seed** on the first deployment for you. Set this to false and use `global.datahub.metadata_service_authentication.provisionSecrets.secretValues.*` if you would like to specify the secret values directly. | -| global.datahub.encryptionKey.provisionSecrets.secretValues.secret | string | `` | The system client secret key value to be used if specified directly. | -| global.datahub.encryptionKey.provisionSecrets.secretValues.signingkey | string | `` | The system signing key value to be used if specified directly. | -| global.datahub.encryptionKey.provisionSecrets.secretValues.salt | string | `` | The token service salt value to be used if specified directly. | -| global.datahub.managed_ingestion.enabled | bool | `true` | Whether or not UI-based ingestion experience is enabled. | -| global.datahub.encryptionKey.secretRef | string | `datahub-encryption-secrets` | The reference to a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret named `datahub-encryption-secrets`. | -| global.datahub.encryptionKey.secretKey | string | `encryption_key_secret` | The key of a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `encryption_key_secret` within a secret named `datahub-encryption-secrets`. | -| global.datahub.managed_ingestion.defaultCliVersion | string | `` |0.11.0 This is the version of the DataHub CLI to use for UI ingestion, by default. | -| global.datahub.encryptionKey.provisionSecret.enabled | bool | `true` | Whether an encryption key secret should be provisioned on the first deployment for you. Set this to false if you are overriding global.datahub.encryptionKey.secretRef. | -| global.datahub.encryptionKey.provisionSecret.autoGenerate | bool | `true` | Whether an encryption key secret should be provisioned for you **with a random seed** on the first deployment for you. Set this to false and use `global.datahub.encryptionKey.provisionSecret.secretValues.encryptionKey` if you would like to specify the secret values directly. | -| global.datahub.encryptionKey.provisionSecret.secretValues.encryptionKey | string | `` | The encryption key value to be used if specified directly. | -| global.datahub.enable_retention | bool | `false` | Whether or not to enable retention on local DB | -| global.sql.datasource.hostForpostgresqlClient | string | `""` | SQL database host (without port) when using postgresqlSetupJob | +| Key | Type | Default | Description | +|----------------------------------------------------------------------------------|--------|------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| datahub-gms.sql.datasource.username | string | `root` | SQL username for GMS (overrides global value) | +| datahub-gms.sql.datasource.username.secretRef | string | `"mysql-secrets"` | Secret that contains the GMS SQL username (overrides global value) | +| datahub-gms.sql.datasource.username.secretKey | string | `"mysql-username"` | Secret key that contains the GMS SQL username (overrides global value) | +| datahub-gms.sql.datasource.password.secretRef | string | `"mysql-secrets"` | Secret that contains the GMS SQL password (overrides global value) | +| datahub-gms.sql.datasource.password.secretKey | string | `"mysql-password"` | Secret key that contains the GMS SQL password (overrides global value) | +| datahub-gms.sql.datasource.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for GMS SQL login (overrides global value) | +| mysqlSetupJob.username | string | `root` | SQL username for mysqlSetupJob (overrides global value) | +| mysqlSetupJob.password.secretRef | string | `"mysql-secrets"` | Secret that contains the mysqlSetupJob SQL password (overrides global value) | +| mysqlSetupJob.password.secretKey | string | `"mysql-password"` | Secret key that contains the mysqlSetupJob SQL password (overrides global value) | +| mysqlSetupJob.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for mysqlSetupJob SQL login (overrides global value) | +| postgresqlSetupJob.username | string | `root` | SQL username for postgresqlSetupJob (overrides global value) | +| postgresqlSetupJob.password.secretRef | string | `"mysql-secrets"` | Secret that contains the postgresqlSetupJob SQL password (overrides global value) | +| postgresqlSetupJob.password.secretKey | string | `"mysql-password"` | Secret key that contains the postgresqlSetupJob SQL password (overrides global value) | +| postgresqlSetupJob.password.value | string | `"mysql-password"` | Alternative to using the secret above, uses raw string value for postgresqlSetupJob SQL login (overrides global value) | +| acryl-datahub-actions.ingestionSecretFiles.name | string | `""` | Name of the k8s secret that holds any secret files (e.g., SSL certificates and private keys) that are used in your ingestion recipes. The keys in the secret will be mounted as individual files under `/etc/datahub/ingestion-secret-files` | +| acryl-datahub-actions.ingestionSecretFiles.defaultMode | string | `""` | The permission mode for the volume that mounts k8s secret under `/etc/datahub/ingestion-secret-files`, default value is 0444 which allows read access by owner, group, and other users | +| global.credentialsAndCertsSecrets.name | string | `""` | Name of the secret that holds SSL certificates (keystores, truststores) | +| global.credentialsAndCertsSecrets.path | string | `"/mnt/certs"` | Path to mount the SSL certificates | +| global.credentialsAndCertsSecrets.secureEnv | map | `{}` | Map of SSL config name and the corresponding value in the secret | +| global.springKafkaConfigurationOverrides | map | `{}` | Map of configuration overrides for accessing kafka | +| global.elasticsearch.useSSL | bool | `false` | Whether to enable SSL for accessing elasticsearch | +| global.elasticsearch.auth.username | string | `""` | Elasticsearch username | +| global.elasticsearch.auth.password.secretRef | string | `""` | Secret that contains the elasticsearch password | +| global.elasticsearch.auth.password.secretKey | string | `""` | Secret key that contains the elasticsearch password | +| global.elasticsearch.auth.password.value | string | `""` | Alternative to using the secret above, uses raw string value instead | +| global.kafka.schemaregistry.type | string | `"INTERNAL"` | Type of schema registry (INTERNAL, KAFKA, or AWS_GLUE) | +| global.kafka.schemaregistry.glue.region | string | `""` | Region of the AWS Glue schema registry | +| global.kafka.schemaregistry.glue.registry | string | `""` | Name of the AWS Glue schema registry | +| datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | +| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `system_client_secret` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `system_client_secret` within a secret named `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.tokenService.signingKey.secretKey | string | `token_service_signing_key` | The key of a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `token_service_signing_key` within a secret named `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.tokenService.salt.secretRef | string | `datahub-auth-secrets` | The reference to a secret containing the internal system secret that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret called `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.tokenService.salt.secretKey | string | `token_service_salt` | The key of a secret containing the internal system secret that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `token_service_salt` within a secret named `datahub-auth-secrets`. | +| global.datahub.metadata_service_authentication.provisionSecrets.enabled | bool | `true` | Whether auth secrets (system client secret, token signing key & token service salt) should be provisioned on the first deployment for you. Set this to false if you are overriding `global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef` or `global.datahub.metadata_service_authentication systemClientSecret.secretRef`. | +| global.datahub.metadata_service_authentication.provisionSecrets.autoGenerate | bool | `true` | Whether auth secrets (token signing key, system client secret & token service salt) should be provisioned on the first deployment for you **with a random seed** on the first deployment for you. Set this to false and use `global.datahub.metadata_service_authentication.provisionSecrets.secretValues.*` if you would like to specify the secret values directly. | +| global.datahub.encryptionKey.provisionSecrets.secretValues.secret | string | `` | The system client secret key value to be used if specified directly. | +| global.datahub.encryptionKey.provisionSecrets.secretValues.signingkey | string | `` | The system signing key value to be used if specified directly. | +| global.datahub.encryptionKey.provisionSecrets.secretValues.salt | string | `` | The token service salt value to be used if specified directly. | +| global.datahub.managed_ingestion.enabled | bool | `true` | Whether or not UI-based ingestion experience is enabled. | +| global.datahub.encryptionKey.secretRef | string | `datahub-encryption-secrets` | The reference to a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret named `datahub-encryption-secrets`. | +| global.datahub.encryptionKey.secretKey | string | `encryption_key_secret` | The key of a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. If a secret reference is not provided, a random one will be generated for you in a Kubernetes secret value named `encryption_key_secret` within a secret named `datahub-encryption-secrets`. | +| global.datahub.managed_ingestion.defaultCliVersion | string | `` | 0.11.0 This is the version of the DataHub CLI to use for UI ingestion, by default. | +| global.datahub.encryptionKey.provisionSecret.enabled | bool | `true` | Whether an encryption key secret should be provisioned on the first deployment for you. Set this to false if you are overriding global.datahub.encryptionKey.secretRef. | +| global.datahub.encryptionKey.provisionSecret.autoGenerate | bool | `true` | Whether an encryption key secret should be provisioned for you **with a random seed** on the first deployment for you. Set this to false and use `global.datahub.encryptionKey.provisionSecret.secretValues.encryptionKey` if you would like to specify the secret values directly. | +| global.datahub.encryptionKey.provisionSecret.secretValues.encryptionKey | string | `` | The encryption key value to be used if specified directly. | +| global.datahub.enable_retention | bool | `false` | Whether or not to enable retention on local DB | +| global.sql.datasource.hostForpostgresqlClient | string | `""` | SQL database host (without port) when using postgresqlSetupJob | diff --git a/charts/datahub/quickstart-values-with-neo4j.yaml b/charts/datahub/quickstart-values-with-neo4j.yaml index 2d96a889c..cfd2737a2 100644 --- a/charts/datahub/quickstart-values-with-neo4j.yaml +++ b/charts/datahub/quickstart-values-with-neo4j.yaml @@ -87,9 +87,10 @@ global: url: "http://prerequisites-cp-schema-registry:8081" neo4j: - host: "prerequisites-neo4j-community:7474" - uri: "bolt://prerequisites-neo4j-community" + host: "prerequisites-neo4j:7474" + uri: "bolt://prerequisites-neo4j" username: "neo4j" + database: "graph.db" password: secretRef: neo4j-secrets secretKey: neo4j-password diff --git a/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml b/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml index 7d2f93904..b6caf5c53 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml +++ b/charts/datahub/subcharts/acryl-datahub-actions/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.138 +version: 0.2.144 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. appVersion: 0.0.11 diff --git a/charts/datahub/subcharts/acryl-datahub-actions/README.md b/charts/datahub/subcharts/acryl-datahub-actions/README.md index e08afdbd0..a160efa37 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/README.md +++ b/charts/datahub/subcharts/acryl-datahub-actions/README.md @@ -6,41 +6,43 @@ Current chart version is `0.0.3` ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| affinity | object | `{}` | | -| exporters.jmx.enabled | boolean | false | | -| extraLabels | object | `{}` | Extra labels for deployment configuration | -| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | -| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | -| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | -| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | -| fullnameOverride | string | `"acryl-datahub-actions"` | | -| global.datahub.gms.port | string | `"8080"` | | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"acryldata/datahub-actions"` | | -| image.tag | string | `"v0.0.6"` | | -| imagePullSecrets | list | `[]` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| podAnnotations | object | `{}` | | -| podSecurityContext | object | `{}` | | -| replicaCount | int | `1` | | -| resources | object | `{}` | | -| securityContext | object | `{}` | | -| service.port | int | `9093` | | -| service.nodePort | int | `""` | | -| service.type | string | `"ClusterIP"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.create | bool | `false` | | -| serviceAccount.name | string | `nil` | | -| tolerations | list | `[]` | | -| global.kafka.bootstrap.server | string | `nil` | | -| global.kafka.schemaregistry.url | string | `nil` | | -| actions.kafkaAutoOffsetPolicy | string | `"latest"` | | -| datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | -| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| ingestionSecretFiles.name | string | `""` | Name of the k8s secret that holds any secret files (e.g., SSL certificates and private keys) that are used in your ingestion recipes. The keys in the secret will be mounted as individual files under `/etc/datahub/ingestion-secret-files` | -| ingestionSecretFiles.defaultMode | string | `""` | The permission mode for the volume that mounts k8s secret under `/etc/datahub/ingestion-secret-files`, default value is 0444 which allows read access by owner, group, and other users | +| Key | Type | Default | Description | +|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|-------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | | +| exporters.jmx.enabled | boolean | false | | +| extraLabels | object | `{}` | Extra labels for deployment configuration | +| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | +| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | +| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | +| fullnameOverride | string | `"acryl-datahub-actions"` | | +| global.datahub.gms.port | string | `"8080"` | | +| image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"acryldata/datahub-actions"` | | +| image.tag | string | `"v0.0.6"` | | +| imagePullSecrets | list | `[]` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podSecurityContext | object | `{}` | | +| replicaCount | int | `1` | | +| resources | object | `{}` | | +| securityContext | object | `{}` | | +| service.port | int | `9093` | | +| service.nodePort | int | `""` | | +| service.type | string | `"ClusterIP"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.create | bool | `false` | | +| serviceAccount.name | string | `nil` | | +| tolerations | list | `[]` | | +| global.kafka.bootstrap.server | string | `nil` | | +| global.kafka.schemaregistry.url | string | `nil` | | +| actions.kafkaAutoOffsetPolicy | string | `"latest"` | | +| datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | +| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| ingestionSecretFiles.name | string | `""` | Name of the k8s secret that holds any secret files (e.g., SSL certificates and private keys) that are used in your ingestion recipes. The keys in the secret will be mounted as individual files under `/etc/datahub/ingestion-secret-files` | +| ingestionSecretFiles.defaultMode | string | `""` | The permission mode for the volume that mounts k8s secret under `/etc/datahub/ingestion-secret-files`, default value is 0444 which allows read access by owner, group, and other users | diff --git a/charts/datahub/subcharts/acryl-datahub-actions/templates/_helpers.tpl b/charts/datahub/subcharts/acryl-datahub-actions/templates/_helpers.tpl index 3ca075049..2e0ca72e2 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/templates/_helpers.tpl +++ b/charts/datahub/subcharts/acryl-datahub-actions/templates/_helpers.tpl @@ -61,3 +61,24 @@ Create the name of the service account to use {{ default "default" .Values.serviceAccount.name }} {{- end -}} {{- end -}} + +{{/* +Datahub GMS protocol +*/}} +{{- define "acryl-datahub-actions.datahubGmsProtocol" -}} +{{ ((.Values.datahub).gms).protocol | default .Values.global.datahub.gms.protocol }} +{{- end -}} + +{{/* +Datahub GMS host +*/}} +{{- define "acryl-datahub-actions.datahubGmsHost" -}} +{{ (((.Values.datahub).gms).host | default ((.Values.global.datahub).gms).host) | default (printf "%s-%s" .Release.Name "datahub-gms") | trunc 63 | trimSuffix "-"}} +{{- end -}} + +{{/* +Datahub GMS port +*/}} +{{- define "acryl-datahub-actions.datahubGmsPort" -}} +{{ ((.Values.datahub).gms).port | default .Values.global.datahub.gms.port }} +{{- end -}} diff --git a/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml b/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml index 8a52768d4..b26654c4c 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml +++ b/charts/datahub/subcharts/acryl-datahub-actions/templates/deployment.yaml @@ -16,6 +16,8 @@ spec: metadata: {{- with .Values.podAnnotations }} annotations: + # Ensures resource is only deployed at GMS is deployed, since there is a dependency on GMS being up. + "helm.sh/hook-weight": "1" {{- toYaml . | nindent 8 }} {{- end }} labels: @@ -27,6 +29,7 @@ spec: {{ $key }}: {{ $value | quote }} {{- end }} spec: + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} @@ -50,6 +53,9 @@ spec: {{- if .Values.extraVolumes }} {{ toYaml .Values.extraVolumes | nindent 8 }} {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} initContainers: {{- if .Values.extraInitContainers }} {{- .Values.extraInitContainers | toYaml | nindent 6 }} @@ -64,21 +70,37 @@ spec: - name: http containerPort: 9093 protocol: TCP + {{- if .Values.image.command }} + command: {{ .Values.image.command | toRawJson }} + {{- end }} + {{- if .Values.image.args }} + args: {{ .Values.image.args | toRawJson }} + {{- end }} env: + - name: DATAHUB_GMS_PROTOCOL + value: {{ include "acryl-datahub-actions.datahubGmsProtocol" . }} - name: DATAHUB_GMS_HOST - value: {{ printf "%s-%s" .Release.Name "datahub-gms" }} + value: {{ include "acryl-datahub-actions.datahubGmsHost" . }} - name: DATAHUB_GMS_PORT - value: "{{ .Values.global.datahub.gms.port }}" + value: {{ include "acryl-datahub-actions.datahubGmsPort" . | quote }} # Deprecated in favour of DATAHUB_* variables - name: GMS_HOST value: {{ printf "%s-%s" .Release.Name "datahub-gms" }} - name: GMS_PORT value: "{{ .Values.global.datahub.gms.port }}" + {{- if .Values.actions.executorId }} + - name: EXECUTOR_ID + value: "{{ .Values.actions.executorId }}" + {{- end }} + {{- if .Values.debug.enabled }} + - name: DATAHUB_DEBUG + value: "true" + {{- end }} - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: SCHEMA_REGISTRY_URL - value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} + value: {{ printf "%s://%s:%s/schema-registry/api/" (include "acryl-datahub-actions.datahubGmsProtocol" .) (include "acryl-datahub-actions.datahubGmsHost" .) (include "acryl-datahub-actions.datahubGmsPort" .) | quote }} {{- else if eq .Values.global.kafka.schemaregistry.type "KAFKA" }} - name: SCHEMA_REGISTRY_URL value: "{{ .Values.global.kafka.schemaregistry.url }}" @@ -150,7 +172,7 @@ spec: {{- with .Values.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -158,7 +180,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.tolerations }} + {{- with default .Values.global.tolerations .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/subcharts/acryl-datahub-actions/values.yaml b/charts/datahub/subcharts/acryl-datahub-actions/values.yaml index 2f30cf4e1..e2aca2f15 100644 --- a/charts/datahub/subcharts/acryl-datahub-actions/values.yaml +++ b/charts/datahub/subcharts/acryl-datahub-actions/values.yaml @@ -7,11 +7,17 @@ image: repository: acryldata/datahub-actions tag: "v0.0.1" pullPolicy: IfNotPresent + # Override the image's command & args with a new one. + # This may be necessary for custom startup or shutdown behaviors + command: + args: imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +terminationGracePeriodSeconds: 150 + serviceAccount: # Specifies whether a service account should be created create: false @@ -30,6 +36,8 @@ podSecurityContext: {} securityContext: {} +priorityClassName: + service: type: ClusterIP port: 9093 @@ -69,14 +77,19 @@ affinity: {} actions: kafkaAutoOffsetPolicy: "latest" + # Configure a custom executor id that will be set as the EXECUTOR_ID environment variable + # executorId: "" # mount the k8s secret as a volume in the container, each key name is mounted as a file on the mount path /etc/datahub/ingestion-secret-files # ingestionSecretFiles: # name: ${K8S_SECRET_NAME} # defaultMode: "0444" -global: +debug: + # Set enabled to true will set the DATAHUB_DEBUG env var to true + enabled: false +global: kafka: bootstrap: server: "broker:9092" @@ -87,7 +100,7 @@ global: gms: port: "8080" metadata_service_authentication: - enabled: false + enabled: true systemClientId: "__datahub_system" # systemClientSecret: # secretRef: diff --git a/charts/datahub/subcharts/datahub-frontend/Chart.yaml b/charts/datahub/subcharts/datahub-frontend/Chart.yaml index 1c07a61b5..4e1ad07c8 100644 --- a/charts/datahub/subcharts/datahub-frontend/Chart.yaml +++ b/charts/datahub/subcharts/datahub-frontend/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.141 +version: 0.2.157 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: v0.11.0 +appVersion: v0.13.1 diff --git a/charts/datahub/subcharts/datahub-frontend/README.md b/charts/datahub/subcharts/datahub-frontend/README.md index 3b04d39e7..c1a533d27 100644 --- a/charts/datahub/subcharts/datahub-frontend/README.md +++ b/charts/datahub/subcharts/datahub-frontend/README.md @@ -1,5 +1,6 @@ datahub-frontend ================ + A Helm chart for datahub-frontend Current chart version is `0.2.0` @@ -20,6 +21,9 @@ Current chart version is `0.2.0` | fullnameOverride | string | `"datahub-frontend"` | | | global.datahub_analytics_enabled | boolean | true | | | global.datahub.gms.port | string | `"8080"` | | +| global.datahub.frontend.validateSignUpEmail | boolean | true | Enforces user sign up through invite link to use a valid email. | +| image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | | image.pullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"linkedin/datahub-frontend-react"` | | | image.tag | string | `"head"` | | @@ -45,6 +49,8 @@ Current chart version is `0.2.0` | oidcAuthentication.clientSecretRef.secretKey | string | `"nil"` | Optional, this is the key of the shared secret to use for exchange between you and your identity provider | | oidcAuthentication.oktaDomain | string | `""` | Okta domain, e.g. `dev-12345.okta.com`; needed only if `provider` is set to `okta` | | oidcAuthentication.azureTenantId | string | `""` | Azure directory (tenant) ID; neede only if `provider` is set to `azure` | +| oidcAuthentication.user_name_claim | string | `""` | The attribute that will contain the username used on the DataHub platform | +| oidcAuthentication.user_name_claim_regex | string | `""` | A regex string used for extracting the username from the userNameClaim attribute | | podAnnotations | object | `{}` | | | podSecurityContext | object | `{}` | | | readinessProbe.initialDelaySeconds | int | `60` | | @@ -58,6 +64,7 @@ Current chart version is `0.2.0` | service.port | int | `9001` | | | service.nodePort | int | `""` | | | service.type | string | `"LoadBalancer"` | | +| service.extraLabels | object | `{}` | | | serviceAccount.annotations | object | `{}` | | | serviceAccount.create | bool | `true` | | | serviceAccount.name | string | `nil` | | @@ -66,7 +73,7 @@ Current chart version is `0.2.0` | global.elasticsearch.host | string | `"elasticsearch"` | | | global.elasticsearch.port | string | `"9200"` | | | global.kafka.bootstrap.server | string | `"broker:9092"` | | -| datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | +| datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | | global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | | global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | | global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | diff --git a/charts/datahub/subcharts/datahub-frontend/templates/deployment.yaml b/charts/datahub/subcharts/datahub-frontend/templates/deployment.yaml index 5db08afbb..c7aaa5657 100644 --- a/charts/datahub/subcharts/datahub-frontend/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-frontend/templates/deployment.yaml @@ -15,10 +15,13 @@ spec: {{- include "datahub-frontend.selectorLabels" . | nindent 6 }} template: metadata: - {{- with .Values.podAnnotations }} annotations: + {{- with .Values.podAnnotations }} {{- toYaml . | nindent 8 }} - {{- end }} + {{- end }} + {{- if .Values.defaultUserCredentials }} + checksum/secret: {{ include (print $.Template.BasePath "/user-secrets.yaml") . | sha256sum }} + {{- end }} labels: {{- include "datahub-frontend.selectorLabels" . | nindent 8 }} {{- range $key, $value := .Values.global.podLabels }} @@ -28,6 +31,7 @@ spec: {{ $key }}: {{ $value | quote }} {{- end }} spec: + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- with .Values.imagePullSecrets }} imagePullSecrets: {{- toYaml . | nindent 8 }} @@ -51,6 +55,9 @@ spec: {{- with .Values.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} initContainers: {{- with .Values.extraInitContainers }} {{- toYaml . | nindent 8 }} @@ -61,14 +68,20 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ required "Global or specific tag is required" (.Values.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.image.command }} + command: {{ .Values.image.command | toRawJson }} + {{- end }} + {{- if .Values.image.args }} + args: {{ .Values.image.args | toRawJson }} + {{- end }} lifecycle: {{- toYaml .Values.lifecycle | nindent 12 }} ports: - name: http - containerPort: 9002 + containerPort: {{ .Values.service.containerPort }} protocol: TCP {{- if or .Values.global.datahub.monitoring.enablePrometheus .Values.global.datahub.monitoring.enableJMXPort }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} containerPort: 4318 protocol: TCP {{- end }} @@ -92,9 +105,9 @@ spec: value: "true" {{- end }} - name: DATAHUB_GMS_HOST - value: {{ printf "%s-%s" .Release.Name "datahub-gms" }} + value: {{ (((.Values.datahub).gms).host | default ((.Values.global.datahub).gms).host) | default (printf "%s-%s" .Release.Name "datahub-gms") | trunc 63 | trimSuffix "-"}} - name: DATAHUB_GMS_PORT - value: "{{ .Values.global.datahub.gms.port }}" + value: "{{ ((.Values.datahub).gms).port | default .Values.global.datahub.gms.port }}" - name: DATAHUB_SECRET valueFrom: {{- if .Values.existingGmsSecret }} @@ -114,6 +127,20 @@ spec: value: "{{ .Values.global.datahub_analytics_enabled }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" + - name: ENFORCE_VALID_EMAIL + value: "{{ .Values.global.datahub.frontend.validateSignUpEmail }}" + {{- with .Values.global.kafka.producer.compressionType }} + - name: KAFKA_PRODUCER_COMPRESSION_TYPE + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.producer.maxRequestSize }} + - name: KAFKA_PRODUCER_MAX_REQUEST_SIZE + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.kafka.consumer.maxPartitionFetchBytes }} + - name: KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES + value: {{ . | quote }} + {{- end }} {{- if .Values.global.springKafkaConfigurationOverrides }} {{- range $configName, $configValue := .Values.global.springKafkaConfigurationOverrides }} - name: KAFKA_PROPERTIES_{{ $configName | replace "." "_" | upper }} @@ -193,15 +220,15 @@ spec: {{- end }} - name: AUTH_OIDC_BASE_URL value: https://{{ (first $.Values.ingress.hosts).host }} + - name: AUTH_OIDC_USER_NAME_CLAIM + value: {{ .user_name_claim | default "email" }} + - name: AUTH_OIDC_USER_NAME_CLAIM_REGEX + value: {{ .user_name_claim_regex | default "([^@]+)" }} {{- if eq .provider "google" }} - name: AUTH_OIDC_DISCOVERY_URI value: https://accounts.google.com/.well-known/openid-configuration - name: AUTH_OIDC_SCOPE value: {{ .scope | default "openid profile email" }} - - name: AUTH_OIDC_USER_NAME_CLAIM - value: email - - name: AUTH_OIDC_USER_NAME_CLAIM_REGEX - value: ([^@]+) {{- else if eq .provider "okta" }} - name: AUTH_OIDC_DISCOVERY_URI value: https://{{ .oktaDomain }}/.well-known/openid-configuration @@ -238,7 +265,7 @@ spec: {{- with .Values.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -246,7 +273,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.tolerations }} + {{- with default .Values.global.tolerations .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-frontend/templates/secrets.yaml b/charts/datahub/subcharts/datahub-frontend/templates/secrets.yaml index 9faeb32e0..e3bc54221 100644 --- a/charts/datahub/subcharts/datahub-frontend/templates/secrets.yaml +++ b/charts/datahub/subcharts/datahub-frontend/templates/secrets.yaml @@ -7,5 +7,5 @@ metadata: {{- include "datahub-frontend.labels" . | nindent 4 }} type: Opaque data: - datahub.gms.secret: {{ randAlphaNum 10 | b64enc | quote }} + datahub.gms.secret: {{ randAscii 32 | b64enc | quote }} {{- end -}} \ No newline at end of file diff --git a/charts/datahub/subcharts/datahub-frontend/templates/service.yaml b/charts/datahub/subcharts/datahub-frontend/templates/service.yaml index 4a46cc0fb..5d7246f2f 100644 --- a/charts/datahub/subcharts/datahub-frontend/templates/service.yaml +++ b/charts/datahub/subcharts/datahub-frontend/templates/service.yaml @@ -4,6 +4,9 @@ metadata: name: {{ include "datahub-frontend.fullname" . }} labels: {{- include "datahub-frontend.labels" . | nindent 4 }} + {{- range $key, $val := .Values.service.extraLabels }} + {{ $key }}: {{ $val | quote }} + {{- end }} {{- with .Values.service.annotations }} annotations: {{- toYaml . | nindent 4 }} @@ -21,9 +24,9 @@ spec: {{- end }} {{- end }} {{- if .Values.global.datahub.monitoring.enablePrometheus }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} port: 4318 - targetPort: jmx + targetPort: {{ .Values.global.datahub.monitoring.portName }} protocol: TCP {{- end }} selector: diff --git a/charts/datahub/subcharts/datahub-frontend/templates/servicemonitor.yaml b/charts/datahub/subcharts/datahub-frontend/templates/servicemonitor.yaml index 5df3ce9be..8e3a7e5fb 100644 --- a/charts/datahub/subcharts/datahub-frontend/templates/servicemonitor.yaml +++ b/charts/datahub/subcharts/datahub-frontend/templates/servicemonitor.yaml @@ -11,7 +11,7 @@ metadata: {{- end }} spec: endpoints: - - port: jmx + - port: {{ .Values.global.datahub.monitoring.portName }} relabelings: - separator: / sourceLabels: diff --git a/charts/datahub/subcharts/datahub-frontend/values.yaml b/charts/datahub/subcharts/datahub-frontend/values.yaml index 520622b12..1730b886f 100644 --- a/charts/datahub/subcharts/datahub-frontend/values.yaml +++ b/charts/datahub/subcharts/datahub-frontend/values.yaml @@ -7,14 +7,20 @@ replicaCount: 1 revisionHistoryLimit: 10 image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react tag: pullPolicy: IfNotPresent + # Override the image's command & args with a new one. + # This may be necessary for custom startup or shutdown behaviors + command: + args: imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +terminationGracePeriodSeconds: 150 + serviceAccount: # Specifies whether a service account should be created create: true @@ -38,16 +44,20 @@ securityContext: {} # runAsNonRoot: true # runAsUser: 1000 +priorityClassName: + service: type: LoadBalancer # ClusterIP or NodePort port: 9002 targetPort: http + containerPort: 9002 protocol: TCP name: http # Annotations to add to the service, this will help in adding # Internal load balancer or various other annotation support in AWS annotations: {} # service.beta.kubernetes.io/aws-load-balancer-internal: "true" + extraLabels: {} serviceMonitor: create: false @@ -89,6 +99,11 @@ oidcAuthentication: # if needed, it should set meaningful defaults from provider # scope: "openid profile email" + # The attribute that will contain the username used on the DataHub platform. + # user_name_claim: "email" + # A regex string used for extracting the username from the userNameClaim attribute. + # user_name_claim_regex: "([^@]+)" + # Extra labels for Deployment extraLabels: {} # owner: myteam @@ -223,9 +238,12 @@ global: port: "8080" monitoring: enablePrometheus: true + portName: "jmx" appVersion: "1.0" + frontend: + validateSignUpEmail: true metadata_service_authentication: - enabled: false + enabled: true systemClientId: "__datahub_system" # systemClientSecret: # secretRef: diff --git a/charts/datahub/subcharts/datahub-gms/Chart.yaml b/charts/datahub/subcharts/datahub-gms/Chart.yaml index b9fcd685d..3f36af1f2 100644 --- a/charts/datahub/subcharts/datahub-gms/Chart.yaml +++ b/charts/datahub/subcharts/datahub-gms/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for LinkedIn DataHub's datahub-gms component type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.151 +version: 0.2.167 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: v0.11.0 +appVersion: v0.13.1 diff --git a/charts/datahub/subcharts/datahub-gms/README.md b/charts/datahub/subcharts/datahub-gms/README.md index e258611cd..a11bebf48 100644 --- a/charts/datahub/subcharts/datahub-gms/README.md +++ b/charts/datahub/subcharts/datahub-gms/README.md @@ -6,80 +6,83 @@ Current chart version is `0.2.0` ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| affinity | object | `{}` | | -| extraLabels | object | `{}` | Extra labels for deployment configuration | -| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | -| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | -| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | -| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | -| fullnameOverride | string | `"datahub-gms-deployment"` | | -| global.datahub.appVersion | string | `"1.0"` | | -| global.datahub.gms.port | string | `"8080"` | | -| global.datahub.gms.nodePort | string | `""` | | -| global.elasticsearch.host | string | `"elasticsearch"` | | -| global.elasticsearch.port | string | `"9200"` | | -| global.hostAliases[0].hostnames[0] | string | `"broker"` | | -| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | -| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | -| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | -| global.hostAliases[0].ip | string | `"192.168.0.104"` | | -| global.kafka.bootstrap.server | string | `"broker:9092"` | | -| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | -| global.neo4j.host | string | `"neo4j:7474"` | | -| global.neo4j.uri | string | `"bolt://neo4j"` | | -| global.neo4j.username | string | `"neo4j"` | | -| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | | -| global.neo4j.password.secretKey | string | `"neo4j-password"` | | -| global.sql.datasource.driver | string | `"com.mysql.cj.jdbc.Driver"` | | -| global.sql.datasource.host | string | `"mysql"` | | -| global.sql.datasource.url | string | `"jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false\u0026useSSL=true"` | | -| global.sql.datasource.username | string | `"datahub"` | | -| global.sql.datasource.password.secretRef | string | `"mysql-secrets"` | | -| global.sql.datasource.password.secretKey | string | `"mysql-password"` | | -| global.graph_service_impl | string | `neo4j` | One of `neo4j` or `elasticsearch`. Determines which backend to use for the GMS graph service. Elastic is recommended for a simplified deployment. Neo4j will be the default for now to maintain backwards compatibility | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"linkedin/datahub-gms"` | | -| image.tag | string | `"head"` | | -| imagePullSecrets | list | `[]` | | -| ingress.annotations | object | `{}` | | -| ingress.enabled | bool | `false` | | -| ingress.extraLabels | object | `{}` | provides extra labels for ingress configuration | -| ingress.hosts[0].host | string | `"chart-example.local"` | | -| ingress.hosts[0].paths | list | `[]` | | -| ingress.tls | list | `[]` | | -| livenessProbe.initialDelaySeconds | int | `60` | | -| livenessProbe.periodSeconds | int | `30` | | -| livenessProbe.failureThreshold | int | `8` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| podAnnotations | object | `{}` | | -| podSecurityContext | object | `{}` | | -| readinessProbe.initialDelaySeconds | int | `60` | | -| readinessProbe.periodSeconds | int | `30` | | -| readinessProbe.failureThreshold | int | `8` | | -| replicaCount | int | `1` | | -| revisionHistoryLimit | int | `10` | | -| resources | object | `{}` | | -| securityContext | object | `{}` | | -| service.port | int | `8080` | | -| service.type | string | `"LoadBalancer"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.create | bool | `true` | | -| serviceAccount.name | string | `nil` | | -| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | -| tolerations | list | `[]` | | -| global.datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | -| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. | -| global.datahub.metadata_service_authentication.tokenService.signingKey.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. | -| global.datahub.metadata_service_authentication.tokenService.salt.secretRef | string | `nil` | The reference to a secret containing the internal system salt that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. | -| global.datahub.metadata_service_authentication.tokenService.salt.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. | -| global.datahub.managed_ingestion.enabled | bool | `true` | Whether or not UI-based ingestion experience is enabled. | -| global.datahub.encryptionKey.secretRef | string | `nil` | The reference to a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. Required if managed_ingestion_enabled is 'true'. | -| global.datahub.encryptionKey.secretKey | string | `nil` | The key of a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. Required if managed_ingestion_enabled is 'true'. | -| global.datahub.managed_ingestion.defaultCliVersion | string | `0.11.0` | This is the version of the DataHub CLI to use for UI ingestion, by default. You do not need to explicitly provide this. By default the underlying datahub-gms container will provide a latest version compatible with the server. | -| global.datahub.enable_retention | bool | `false` | Whether or not to enable retention on local DB | +| Key | Type | Default | Description | +|----------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | | +| extraLabels | object | `{}` | Extra labels for deployment configuration | +| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | +| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | +| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | +| fullnameOverride | string | `"datahub-gms-deployment"` | | +| global.datahub.appVersion | string | `"1.0"` | | +| global.datahub.gms.port | string | `"8080"` | | +| global.datahub.gms.nodePort | string | `""` | | +| global.elasticsearch.host | string | `"elasticsearch"` | | +| global.elasticsearch.port | string | `"9200"` | | +| global.hostAliases[0].hostnames[0] | string | `"broker"` | | +| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | +| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | +| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | +| global.hostAliases[0].ip | string | `"192.168.0.104"` | | +| global.kafka.bootstrap.server | string | `"broker:9092"` | | +| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | +| global.neo4j.host | string | `"neo4j:7474"` | | +| global.neo4j.uri | string | `"bolt://neo4j"` | | +| global.neo4j.database | string | `"graph.db"` | Neo4J database | +| global.neo4j.username | string | `"neo4j"` | | +| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | | +| global.neo4j.password.secretKey | string | `"neo4j-password"` | | +| global.sql.datasource.driver | string | `"com.mysql.cj.jdbc.Driver"` | | +| global.sql.datasource.host | string | `"mysql"` | | +| global.sql.datasource.url | string | `"jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false\u0026useSSL=true"` | | +| global.sql.datasource.username | string | `"datahub"` | | +| global.sql.datasource.password.secretRef | string | `"mysql-secrets"` | | +| global.sql.datasource.password.secretKey | string | `"mysql-password"` | | +| global.graph_service_impl | string | `elasticsearch` | One of `elasticsearch` or `neo4j`. Determines which backend to use for the GMS graph service. Elasticsearch is recommended for a simplified deployment. | +| image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"linkedin/datahub-gms"` | | +| image.tag | string | `"head"` | | +| imagePullSecrets | list | `[]` | | +| ingress.annotations | object | `{}` | | +| ingress.enabled | bool | `false` | | +| ingress.extraLabels | object | `{}` | provides extra labels for ingress configuration | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths | list | `[]` | | +| ingress.tls | list | `[]` | | +| livenessProbe.initialDelaySeconds | int | `60` | | +| livenessProbe.periodSeconds | int | `30` | | +| livenessProbe.failureThreshold | int | `8` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podSecurityContext | object | `{}` | | +| readinessProbe.initialDelaySeconds | int | `60` | | +| readinessProbe.periodSeconds | int | `30` | | +| readinessProbe.failureThreshold | int | `8` | | +| replicaCount | int | `1` | | +| revisionHistoryLimit | int | `10` | | +| resources | object | `{}` | | +| securityContext | object | `{}` | | +| service.port | int | `8080` | | +| service.type | string | `"LoadBalancer"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.create | bool | `true` | | +| serviceAccount.name | string | `nil` | | +| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | +| tolerations | list | `[]` | | +| global.datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | +| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.tokenService.signingKey.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. | +| global.datahub.metadata_service_authentication.tokenService.signingKey.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to sign JWT auth tokens issued by DataHub GMS. | +| global.datahub.metadata_service_authentication.tokenService.salt.secretRef | string | `nil` | The reference to a secret containing the internal system salt that is used to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. | +| global.datahub.metadata_service_authentication.tokenService.salt.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to to salt JWT auth tokens signatures issued by DataHub GMS that is part of the metadata graph. | +| global.datahub.managed_ingestion.enabled | bool | `true` | Whether or not UI-based ingestion experience is enabled. | +| global.datahub.encryptionKey.secretRef | string | `nil` | The reference to a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. Required if managed_ingestion_enabled is 'true'. | +| global.datahub.encryptionKey.secretKey | string | `nil` | The key of a secret containing an alpha-numeric encryption key, which is used to encrypt Secrets on DataHub. Required if managed_ingestion_enabled is 'true'. | +| global.datahub.managed_ingestion.defaultCliVersion | string | `0.11.0` | This is the version of the DataHub CLI to use for UI ingestion, by default. You do not need to explicitly provide this. By default the underlying datahub-gms container will provide a latest version compatible with the server. | +| global.datahub.enable_retention | bool | `false` | Whether or not to enable retention on local DB | diff --git a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml index d4d55367b..cca0b2f81 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/deployment.yaml @@ -32,6 +32,7 @@ spec: {{ $key }}: {{ $value | quote }} {{- end }} spec: + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- with .Values.global.hostAliases }} hostAliases: {{- toYaml . | nindent 8 }} @@ -58,6 +59,9 @@ spec: {{- with .Values.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} initContainers: {{- with .Values.extraInitContainers }} {{- toYaml . | nindent 8 }} @@ -68,12 +72,18 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ required "Global or specific tag is required" (.Values.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.image.command }} + command: {{ .Values.image.command | toRawJson }} + {{- end }} + {{- if .Values.image.args }} + args: {{ .Values.image.args | toRawJson }} + {{- end }} ports: - name: http containerPort: 8080 protocol: TCP {{- if or .Values.global.datahub.monitoring.enablePrometheus .Values.global.datahub.monitoring.enableJMXPort }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} containerPort: 4318 protocol: TCP {{- end }} @@ -91,7 +101,7 @@ spec: failureThreshold: {{ .Values.livenessProbe.failureThreshold }} readinessProbe: httpGet: - path: /health/check/ready + path: /health port: http initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.readinessProbe.periodSeconds }} @@ -113,7 +123,7 @@ spec: - name: SEARCH_SERVICE_CACHE_IMPLEMENTATION value: "hazelcast" - name: SEARCH_SERVICE_HAZELCAST_SERVICE_NAME - value: {{ printf "%s-%s-%s" .Release.Name (regexReplaceAll "\\W+" .Values.global.datahub.version "-") "hazelcast-svc" | trunc 63 | trimSuffix "-" }} + value: {{ printf "%s-%s-%s" .Release.Name (regexReplaceAll "[^-a-z0-9]+" .Values.global.datahub.version "-") "hazelcast-svc" | trunc 63 | trimSuffix "-" }} {{- end}} {{- if .Values.global.datahub.systemUpdate.enabled }} - name: DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID @@ -165,6 +175,22 @@ spec: value: "{{ .Values.global.sql.datasource.driver }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" + {{- with .Values.global.kafka.producer.compressionType }} + - name: KAFKA_PRODUCER_COMPRESSION_TYPE + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.consumer.stopContainerOnDeserializationError}} + - name: KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.producer.maxRequestSize }} + - name: KAFKA_PRODUCER_MAX_REQUEST_SIZE + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.kafka.consumer.maxPartitionFetchBytes }} + - name: KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES + value: {{ . | quote }} + {{- end }} {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL value: {{ printf "http://localhost:%s/schema-registry/api/" .Values.global.datahub.gms.port }} @@ -218,6 +244,8 @@ spec: value: "{{ .Values.global.neo4j.host }}" - name: NEO4J_URI value: "{{ .Values.global.neo4j.uri }}" + - name: NEO4J_DATABASE + value: "{{ .Values.global.neo4j.database | default "graph.db" }}" - name: NEO4J_USERNAME value: "{{ .Values.global.neo4j.username }}" - name: NEO4J_PASSWORD @@ -283,6 +311,14 @@ spec: secretKeyRef: name: {{ .Values.global.datahub.metadata_service_authentication.systemClientSecret.secretRef }} key: {{ .Values.global.datahub.metadata_service_authentication.systemClientSecret.secretKey }} + {{- if .Values.global.datahub.metadata_service_authentication.view.authorization.enabled }} + - name: VIEW_AUTHORIZATION_ENABLED + value: "true" + {{- if .Values.global.datahub.metadata_service_authentication.view.authorization.recommendations.peerGroupEnabled }} + - name: VIEW_AUTHORIZATION_RECOMMENDATIONS_PEER_GROUP_ENABLED + value: "true" + {{- end }} + {{- end }} {{- end }} {{- if .Values.global.datahub.managed_ingestion.enabled }} - name: UI_INGESTION_ENABLED @@ -374,7 +410,7 @@ spec: {{- with .Values.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -382,7 +418,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.tolerations }} + {{- with default .Values.global.tolerations .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-gms/templates/hazelcastService.yaml b/charts/datahub/subcharts/datahub-gms/templates/hazelcastService.yaml index b4a1aa427..76034ff18 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/hazelcastService.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/hazelcastService.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Service metadata: - name: {{ printf "%s-%s-%s" .Release.Name (regexReplaceAll "\\W+" .Values.global.datahub.version "-") "hazelcast-svc" | trunc 63 | trimSuffix "-" }} + name: {{ printf "%s-%s-%s" .Release.Name (regexReplaceAll "[^-a-z0-9]+" .Values.global.datahub.version "-") "hazelcast-svc" | trunc 63 | trimSuffix "-" }} spec: clusterIP: None ports: @@ -13,4 +13,4 @@ spec: selector: app.kubernetes.io/name: {{- include "datahub-gms.name" . | nindent 6 }} type: ClusterIP -{{- end}} \ No newline at end of file +{{- end}} diff --git a/charts/datahub/subcharts/datahub-gms/templates/service.yaml b/charts/datahub/subcharts/datahub-gms/templates/service.yaml index dbc400955..b040c4ac1 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/service.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/service.yaml @@ -21,9 +21,9 @@ spec: {{- end }} {{- end }} {{- if .Values.global.datahub.monitoring.enablePrometheus }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} port: 4318 - targetPort: jmx + targetPort: {{ .Values.global.datahub.monitoring.portName }} protocol: TCP {{- end }} selector: diff --git a/charts/datahub/subcharts/datahub-gms/templates/servicemonitor.yaml b/charts/datahub/subcharts/datahub-gms/templates/servicemonitor.yaml index bf1bc4996..68436feb5 100644 --- a/charts/datahub/subcharts/datahub-gms/templates/servicemonitor.yaml +++ b/charts/datahub/subcharts/datahub-gms/templates/servicemonitor.yaml @@ -11,7 +11,7 @@ metadata: {{- end }} spec: endpoints: - - port: jmx + - port: {{ .Values.global.datahub.monitoring.portName }} relabelings: - separator: / sourceLabels: diff --git a/charts/datahub/subcharts/datahub-gms/values.yaml b/charts/datahub/subcharts/datahub-gms/values.yaml index f331afd82..d4c8ec9c9 100644 --- a/charts/datahub/subcharts/datahub-gms/values.yaml +++ b/charts/datahub/subcharts/datahub-gms/values.yaml @@ -7,14 +7,20 @@ replicaCount: 1 revisionHistoryLimit: 10 image: - repository: linkedin/datahub-gms + repository: acryldata/datahub-gms pullPolicy: IfNotPresent tag: + # Override the image's command & args with a new one. + # This may be necessary for custom startup or shutdown behaviors + command: + args: imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +terminationGracePeriodSeconds: 150 + serviceAccount: # Specifies whether a service account should be created create: true @@ -42,6 +48,8 @@ securityContext: {} # runAsNonRoot: true # runAsUser: 1000 +priorityClassName: + service: type: LoadBalancer # ClusterIP or NodePort port: "8080" @@ -133,7 +141,7 @@ readinessProbe: # helm install datahub-gms datahub-gms/ global: datahub_analytics_enabled: true - graph_service_impl: neo4j + graph_service_impl: elasticsearch elasticsearch: host: "elasticsearch" @@ -160,6 +168,12 @@ global: server: "broker:9092" schemaregistry: url: "http://schema-registry:8081" + ## Kafka producer and consumer settings + #producer: + # compressionType: snappy + # maxRequestSize: "5242880" + #consumer: + # maxPartitionFetchBytes: "5242880" neo4j: host: "neo4j:7474" @@ -184,6 +198,7 @@ global: monitoring: enablePrometheus: false enableJMXPort: false + portName: jmx gms: port: "8080" appVersion: "1.0" @@ -198,7 +213,7 @@ global: # defaultCliVersion: "X.X.X" --> Optional: Controls the acryl-datahub package version downloaded from PyPI. metadata_service_authentication: - enabled: false + enabled: true # tokenService: # signingKey: # secretRef: @@ -210,6 +225,14 @@ global: # systemClientSecret: # secretRef: # secretKey: + view: + authorization: + # search/view authorization filters + enabled: false + # recommendation settings when search/view authorization is in effect + recommendations: + # whether to restrict Most Popular entities + peerGroupEnabled: true encryptionKey: secretRef: "encryption-key-secret" diff --git a/charts/datahub/subcharts/datahub-ingestion-cron/Chart.yaml b/charts/datahub/subcharts/datahub-ingestion-cron/Chart.yaml index 7f70d6aad..fd6ac1d03 100644 --- a/charts/datahub/subcharts/datahub-ingestion-cron/Chart.yaml +++ b/charts/datahub/subcharts/datahub-ingestion-cron/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.136 +version: 0.2.143 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: v0.11.0 +appVersion: v0.13.1 diff --git a/charts/datahub/subcharts/datahub-ingestion-cron/README.md b/charts/datahub/subcharts/datahub-ingestion-cron/README.md index 900b2b15a..9ea5cd6d4 100644 --- a/charts/datahub/subcharts/datahub-ingestion-cron/README.md +++ b/charts/datahub/subcharts/datahub-ingestion-cron/README.md @@ -4,35 +4,39 @@ A Helm chart for datahub's metadata-ingestion framework with kerberos authentica ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | -| image.repository | string | `"linkedin/datahub-ingestion"` | DataHub Ingestion image repository | -| image.tag | string | `"head"` | DataHub Ingestion image tag | -| imagePullSecrets | array | `[]` (does not add image pull secrets to deployed pods) | Docker registry secret names as an array | -| labels | string | `{}` | Metadata labels to be added to each crawling cron job | -| crons | type | `{}` | A map of crawling parameters per different technology being crawler, the key in the object will be used as the name for the new cron job | -| crons.schedule | string | `"0 0 * * *"` | Cron expression (default is daily at midnight) for crawler jobs | -| crons.recipe | object | `{}` | Recipe configuration to be executed (required) | -| crons.recipe.configmapName | string | `""` | Name of configmap to be mounted containing recipe to be executed | -| crons.recipe.fileName | string | `""` | Name of property within configMap referenced by `recipe.configName` with the concrete recipe definition | -| crons.command | array | `["/bin/sh", "-c", "datahub ingest -c /etc/recipe/"]` | Array of strings denoting the crawling command to be invoked in the cron job. By default it will execute the recipe defined in the `crons.recipe` object. Cron crawling customization is possible by having extra volumes with custom logic to be executed. | -| crons.hostAliases | array | `[]` | host aliases | -| crons.env | object | `{}` | Environment variables to add to the cronjob container | -| crons.envFromSecrets | object | `{}` | Environment variables from secrets to the cronjob container | -| crons.envFromSecrets*.secret | string | | secretKeyRef.name used for environment variable | -| crons.envFromSecrets*.key | string | | secretKeyRef.key used for environment variable | -| crons.extraVolumes | array | `[]` | Additional volumes to add to the pods | -| crons.extraVolumeMounts | array | `[]` | Additional volume mounts to add to the pods | -| crons.extraInitContainers | object | `{}` | Init containers to add to the cronjob container | -| crons.serviceAccountName | string | | Service account name used for the cronjob container | -| crons.podAnnotations | object | `{}` | Annotations to add to the pods | -| crons.restartPolicy | string | `"Always"` | Pod restart policy | -| crons.concurrencyPolicy | string | `"Allow"` | Specifies how to treat concurrent executions of a job | -| crons.failedJobsHistoryLimit | integer | `1` | Number of failed finished jobs to retain | -| crons.successfulJobsHistoryLimit | integer | `3` | Number of successful finished jobs to retain | -| crons.backoffLimit | integer | `6` | Number of retries before marking job failed | -| crons.nodeSelector | object | `{}` | Node labels for pod assignment | -| crons.affinity | object | `{}` | Affinity for pod assignment | -| crons.tolerations | list | `[]` | Tolerations for pod assignment | -| crons.extraSidecars | list | `[]` | Add sidecar containers to the pod | +| Key | Type | Default | Description | +|----------------------------------|---------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| image.repository | string | `"linkedin/datahub-ingestion"` | DataHub Ingestion image repository | +| image.tag | string | `"head"` | DataHub Ingestion image tag | +| imagePullSecrets | array | `[]` (does not add image pull secrets to deployed pods) | Docker registry secret names as an array | +| labels | string | `{}` | Metadata labels to be added to each crawling cron job | +| podSecurityContext | object | `{}` | Pod security context for cron jobs | +| crons | type | `{}` | A map of crawling parameters per different technology being crawler, the key in the object will be used as the name for the new cron job | +| crons.schedule | string | `"0 0 * * *"` | Cron expression (default is daily at midnight) for crawler jobs | +| crons.recipe | object | `{}` | Recipe configuration to be executed (required) | +| crons.recipe.configmapName | string | `""` | Name of configmap to be mounted containing recipe to be executed | +| crons.recipe.fileName | string | `""` | Name of property within configMap referenced by `recipe.configName` with the concrete recipe definition | +| crons.recipe.fileContent | object | `{}` | Recipe for ingestion. If not present, assumes an externally managed config map | +| crons.command | array | `["/bin/sh", "-c", "datahub ingest -c /etc/recipe/"]` | Array of strings denoting the crawling command to be invoked in the cron job. By default it will execute the recipe defined in the `crons.recipe` object. Cron crawling customization is possible by having extra volumes with custom logic to be executed. | +| crons.hostAliases | array | `[]` | host aliases | +| crons.env | object | `{}` | Environment variables to add to the cronjob container | +| crons.envFromSecrets | object | `{}` | Environment variables from secrets to the cronjob container | +| crons.envFromSecrets*.secret | string | | secretKeyRef.name used for environment variable | +| crons.envFromSecrets*.key | string | | secretKeyRef.key used for environment variable | +| crons.extraVolumes | array | `[]` | Additional volumes to add to the pods | +| crons.extraVolumeMounts | array | `[]` | Additional volume mounts to add to the pods | +| crons.extraInitContainers | object | `{}` | Init containers to add to the cronjob container | +| crons.serviceAccountName | string | | Service account name used for the cronjob container | +| crons.securityContext | object | `{}` | SecurityContext specific to each crawling cron job | +| crons.podAnnotations | object | `{}` | Annotations to add to the pods | +| crons.restartPolicy | string | `"Always"` | Pod restart policy | +| crons.concurrencyPolicy | string | `"Allow"` | Specifies how to treat concurrent executions of a job | +| crons.failedJobsHistoryLimit | integer | `1` | Number of failed finished jobs to retain | +| crons.successfulJobsHistoryLimit | integer | `3` | Number of successful finished jobs to retain | +| crons.backoffLimit | integer | `6` | Number of retries before marking job failed | +| crons.nodeSelector | object | `{}` | Node labels for pod assignment | +| crons.affinity | object | `{}` | Affinity for pod assignment | +| crons.tolerations | list | `[]` | Tolerations for pod assignment | +| crons.extraSidecars | list | `[]` | Add sidecar containers to the pod | +| crons.suspend | boolean | false | Suspend execution of a cron | diff --git a/charts/datahub/subcharts/datahub-ingestion-cron/templates/configmap.yaml b/charts/datahub/subcharts/datahub-ingestion-cron/templates/configmap.yaml new file mode 100644 index 000000000..406aba44e --- /dev/null +++ b/charts/datahub/subcharts/datahub-ingestion-cron/templates/configmap.yaml @@ -0,0 +1,15 @@ +{{- $labels := include "datahub-ingestion-cron.labels" .}} +{{- range $jobName, $val := .Values.crons }} +{{- if $val.recipe.fileContent }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $val.recipe.configmapName }} + labels: {{- $labels | nindent 4 }} +data: + {{ $val.recipe.fileName }}: |- + {{- toYaml $val.recipe.fileContent | nindent 4 }} +--- +{{- end }} +{{- end }} + diff --git a/charts/datahub/subcharts/datahub-ingestion-cron/templates/cron.yaml b/charts/datahub/subcharts/datahub-ingestion-cron/templates/cron.yaml index 2f8001458..a069ad7a9 100644 --- a/charts/datahub/subcharts/datahub-ingestion-cron/templates/cron.yaml +++ b/charts/datahub/subcharts/datahub-ingestion-cron/templates/cron.yaml @@ -12,6 +12,7 @@ spec: concurrencyPolicy: {{ default "Allow" .concurrencyPolicy }} successfulJobsHistoryLimit: {{ default 3 .successfulJobsHistoryLimit }} failedJobsHistoryLimit: {{ default 1 .failedJobsHistoryLimit }} + suspend: {{ default false .suspend }} jobTemplate: spec: backoffLimit: {{ default 6 $val.backoffLimit }} @@ -37,6 +38,8 @@ spec: {{- if .serviceAccountName }} serviceAccountName: {{ .serviceAccountName }} {{- end }} + securityContext: + {{- toYaml $.Values.podSecurityContext | nindent 12 }} containers: - name: {{ $jobName }}-crawler image: "{{ $.Values.image.repository }}:{{ required "Global or specific tag is required" ($.Values.image.tag | default $.Values.global.datahub.version) }}" @@ -48,6 +51,10 @@ spec: {{- toYaml .extraVolumeMounts | nindent 14 }} {{- end }} command: ["/bin/sh", "-c", {{ default $defaultCommand .command }} ] + {{- if .securityContext }} + securityContext: + {{- toYaml .securityContext | nindent 14 }} + {{- end }} env: {{- if .env }} {{- range $key,$value := .env }} @@ -68,17 +75,17 @@ spec: {{- toYaml .extraSidecars | nindent 10 }} {{- end }} restartPolicy: {{ default "OnFailure" .restartPolicy }} - {{- if .nodeSelector }} + {{- with default $.Values.global.nodeSelector .nodeSelector }} nodeSelector: - {{- toYaml .nodeSelector | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} {{- if .affinity }} affinity: {{- toYaml .affinity | nindent 12 }} {{- end }} - {{- if .tolerations }} + {{- with default $.Values.global.tolerations .tolerations }} tolerations: - {{- toYaml .tolerations | nindent 12 }} + {{- toYaml . | nindent 12 }} {{- end }} volumes: - name: recipe diff --git a/charts/datahub/subcharts/datahub-ingestion-cron/values.yaml b/charts/datahub/subcharts/datahub-ingestion-cron/values.yaml index 4e703a6be..509ce8b24 100644 --- a/charts/datahub/subcharts/datahub-ingestion-cron/values.yaml +++ b/charts/datahub/subcharts/datahub-ingestion-cron/values.yaml @@ -3,14 +3,17 @@ # Declare variables to be passed into your templates. image: - repository: linkedin/datahub-ingestion + repository: acryldata/datahub-ingestion tag: pullPolicy: IfNotPresent imagePullSecrets: [] +podSecurityContext: {} +# fsGroup: 2000 + crons: {} - #### Example data + #### Example data with externally managed config map #hive: ## Daily at midnight (we may want to offset this to not conflict with other processes) #schedule: "0 0 * * *" @@ -20,7 +23,32 @@ crons: {} # fileName: ## Command to be executed - #command: ["/bin/sh", "-c", "datahub ingest -c "] + #command: "datahub ingest -c " + + # Example data with helm managed config map + # mysql: + # schedule: "0 0 0 0 0" + # recipe: + # configmapName: datahub-mysql-ingestion + # fileName: mysql.yaml + # # Example mysql -> datahub source recipe + # fileContent: + # source: + # type: mysql + # config: + # # Coordinates + # host_port: localhost:3306 + # database: dbname + # # Credentials + # username: root + # password: example + # sink: + # type: datahub-rest + # config: + # server: http://localhost:8080 + + # Command to be executed +# command: "datahub ingest -c " ## Deployment pod host aliases ## https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/ @@ -62,6 +90,15 @@ crons: {} ## #serviceAccountName: "my-cron-service" + ## If you want to specify container level security + #securityContext: + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + ## Add your own pod annotations. ## #podAnnotations: {} @@ -100,11 +137,9 @@ crons: {} ## #tolerations: [] - ## Add extra sidecar containers to deployment pod - #extraSidecars: - # - name: my-image-name - # image: my-image - # imagePullPolicy: Always + # Set the cron job to suspended state + ## + #suspend: true global: datahub: diff --git a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml index e2ad31b49..57f59d004 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.147 +version: 0.2.158 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: v0.11.0 +appVersion: v0.13.1 diff --git a/charts/datahub/subcharts/datahub-mae-consumer/README.md b/charts/datahub/subcharts/datahub-mae-consumer/README.md index 11b733782..6957bde67 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/README.md +++ b/charts/datahub/subcharts/datahub-mae-consumer/README.md @@ -6,65 +6,68 @@ Current chart version is `0.2.0` ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| affinity | object | `{}` | | -| exporters.jmx.enabled | boolean | false | | -| extraLabels | object | `{}` | Extra labels for deployment configuration | -| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | -| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | -| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | -| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | -| fullnameOverride | string | `"datahub-mae-consumer"` | | -| global.datahub_analytics_enabled | boolean | true | | -| global.datahub.mae_consumer.port | string | `"9091"` | | -| global.datahub.mae_consumer.nodePort | string | `"30002"` | | -| global.elasticsearch.host | string | `"elasticsearch"` | | -| global.elasticsearch.port | string | `"9200"` | | -| global.kafka.bootstrap.server | string | `"broker:9092"` | | -| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | -| global.neo4j.host | string | `"neo4j:7474"` | | -| global.neo4j.uri | string | `"bolt://neo4j"` | | -| global.neo4j.username | string | `"neo4j"` | | -| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | | -| global.neo4j.password.secretKey | string | `"neo4j-password"` | | -| global.hostAliases[0].hostnames[0] | string | `"broker"` | | -| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | -| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | -| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | -| global.hostAliases[0].ip | string | `"192.168.0.104"` | | -| global.graph_service_impl | string | `neo4j` | One of `neo4j` or `elasticsearch`. Determines which backend to use for the GMS graph service. Elastic is recommended for a simplified deployment. Neo4j will be the default for now to maintain backwards compatibility | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"linkedin/datahub-mae-consumer"` | | -| image.tag | string | `"head"` | | -| imagePullSecrets | list | `[]` | | -| ingress.annotations | object | `{}` | | -| ingress.enabled | bool | `false` | | -| ingress.hosts[0].host | string | `"chart-example.local"` | | -| ingress.hosts[0].paths | list | `[]` | | -| ingress.tls | list | `[]` | | -| livenessProbe.initialDelaySeconds | int | `60` | | -| livenessProbe.periodSeconds | int | `30` | | -| livenessProbe.failureThreshold | int | `8` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| podAnnotations | object | `{}` | | -| podSecurityContext | object | `{}` | | -| readinessProbe.initialDelaySeconds | int | `60` | | -| readinessProbe.periodSeconds | int | `30` | | -| readinessProbe.failureThreshold | int | `8` | | -| replicaCount | int | `1` | | -| revisionHistoryLimit | int | `10` | | -| resources | object | `{}` | | -| securityContext | object | `{}` | | -| service.port | int | `80` | | -| service.type | string | `"ClusterIP"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.create | bool | `true` | | -| serviceAccount.name | string | `nil` | | -| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | -| tolerations | list | `[]` | | -| datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | -| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| Key | Type | Default | Description | +|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | | +| exporters.jmx.enabled | boolean | false | | +| extraLabels | object | `{}` | Extra labels for deployment configuration | +| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | +| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | +| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | +| fullnameOverride | string | `"datahub-mae-consumer"` | | +| global.datahub_analytics_enabled | boolean | true | | +| global.datahub.mae_consumer.port | string | `"9091"` | | +| global.datahub.mae_consumer.nodePort | string | `"30002"` | | +| global.elasticsearch.host | string | `"elasticsearch"` | | +| global.elasticsearch.port | string | `"9200"` | | +| global.kafka.bootstrap.server | string | `"broker:9092"` | | +| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | +| global.neo4j.host | string | `"neo4j:7474"` | | +| global.neo4j.uri | string | `"bolt://neo4j"` | | +| global.neo4j.database | string | `"graph.db"` | Neo4J database | +| global.neo4j.username | string | `"neo4j"` | | +| global.neo4j.password.secretRef | string | `"neo4j-secrets"` | | +| global.neo4j.password.secretKey | string | `"neo4j-password"` | | +| global.hostAliases[0].hostnames[0] | string | `"broker"` | | +| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | +| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | +| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | +| global.hostAliases[0].ip | string | `"192.168.0.104"` | | +| global.graph_service_impl | string | `elasticsearch` | One of `elasticsearch` or `neo4j`. Determines which backend to use for the GMS graph service. Elasticsearch is recommended for a simplified deployment. | +| image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"linkedin/datahub-mae-consumer"` | | +| image.tag | string | `"head"` | | +| imagePullSecrets | list | `[]` | | +| ingress.annotations | object | `{}` | | +| ingress.enabled | bool | `false` | | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths | list | `[]` | | +| ingress.tls | list | `[]` | | +| livenessProbe.initialDelaySeconds | int | `60` | | +| livenessProbe.periodSeconds | int | `30` | | +| livenessProbe.failureThreshold | int | `8` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podSecurityContext | object | `{}` | | +| readinessProbe.initialDelaySeconds | int | `60` | | +| readinessProbe.periodSeconds | int | `30` | | +| readinessProbe.failureThreshold | int | `8` | | +| replicaCount | int | `1` | | +| revisionHistoryLimit | int | `10` | | +| resources | object | `{}` | | +| securityContext | object | `{}` | | +| service.port | int | `80` | | +| service.type | string | `"ClusterIP"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.create | bool | `true` | | +| serviceAccount.name | string | `nil` | | +| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | +| tolerations | list | `[]` | | +| datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | +| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | diff --git a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml index 8d0cb142c..174328085 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/templates/deployment.yaml @@ -28,6 +28,7 @@ spec: {{ $key }}: {{ $value | quote }} {{- end }} spec: + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- with .Values.global.hostAliases }} hostAliases: {{- toYaml . | nindent 8 }} @@ -49,6 +50,9 @@ spec: {{- with .Values.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} initContainers: {{- with .Values.extraInitContainers }} {{- toYaml . | nindent 8 }} @@ -59,12 +63,18 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ required "Global or specific tag is required" (.Values.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.image.command }} + command: {{ .Values.image.command | toRawJson }} + {{- end }} + {{- if .Values.image.args }} + args: {{ .Values.image.args | toRawJson }} + {{- end }} ports: - name: http containerPort: 9091 protocol: TCP {{- if or .Values.global.datahub.monitoring.enablePrometheus .Values.global.datahub.monitoring.enableJMXPort }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} containerPort: 4318 protocol: TCP {{- end }} @@ -106,11 +116,27 @@ spec: - name: ENTITY_REGISTRY_CONFIG_PATH value: /datahub/datahub-mae-consumer/resources/entity-registry.yml - name: DATAHUB_GMS_HOST - value: {{ printf "%s-%s" .Release.Name "datahub-gms" }} + value: {{ (((.Values.datahub).gms).host | default ((.Values.global.datahub).gms).host) | default (printf "%s-%s" .Release.Name "datahub-gms") | trunc 63 | trimSuffix "-"}} - name: DATAHUB_GMS_PORT - value: "{{ .Values.global.datahub.gms.port }}" + value: "{{ ((.Values.datahub).gms).port | default .Values.global.datahub.gms.port }}" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" + {{- with .Values.global.kafka.producer.compressionType }} + - name: KAFKA_PRODUCER_COMPRESSION_TYPE + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.producer.maxRequestSize }} + - name: KAFKA_PRODUCER_MAX_REQUEST_SIZE + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.kafka.consumer.stopContainerOnDeserializationError }} + - name: KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.consumer.maxPartitionFetchBytes }} + - name: KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES + value: {{ . | quote }} + {{- end }} {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} @@ -164,6 +190,8 @@ spec: value: "{{ .Values.global.neo4j.host }}" - name: NEO4J_URI value: "{{ .Values.global.neo4j.uri }}" + - name: NEO4J_DATABASE + value: "{{ .Values.global.neo4j.database | default "graph.db" }}" - name: NEO4J_USERNAME value: "{{ .Values.global.neo4j.username }}" - name: NEO4J_PASSWORD @@ -245,7 +273,7 @@ spec: {{- with .Values.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -253,7 +281,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.tolerations }} + {{- with default .Values.global.tolerations .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-mae-consumer/templates/service.yaml b/charts/datahub/subcharts/datahub-mae-consumer/templates/service.yaml index 9bba91be5..36adf3e50 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/templates/service.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/templates/service.yaml @@ -17,9 +17,9 @@ spec: {{- end }} {{- end }} {{- if .Values.global.datahub.monitoring.enablePrometheus }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} port: 4318 - targetPort: jmx + targetPort: {{ .Values.global.datahub.monitoring.portName }} protocol: TCP {{- end }} selector: diff --git a/charts/datahub/subcharts/datahub-mae-consumer/templates/servicemonitor.yaml b/charts/datahub/subcharts/datahub-mae-consumer/templates/servicemonitor.yaml index 450fe182b..4b9ce522e 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/templates/servicemonitor.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/templates/servicemonitor.yaml @@ -11,7 +11,7 @@ metadata: {{- end }} spec: endpoints: - - port: jmx + - port: {{ .Values.global.datahub.monitoring.portName }} relabelings: - separator: / sourceLabels: diff --git a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml index b4e1a54d5..bf8db60dd 100644 --- a/charts/datahub/subcharts/datahub-mae-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mae-consumer/values.yaml @@ -7,14 +7,20 @@ replicaCount: 1 revisionHistoryLimit: 10 image: - repository: linkedin/datahub-mae-consumer + repository: acryldata/datahub-mae-consumer pullPolicy: IfNotPresent tag: + # Override the image's command & args with a new one. + # This may be necessary for custom startup or shutdown behaviors + command: + args: imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +terminationGracePeriodSeconds: 150 + serviceAccount: # Specifies whether a service account should be created create: true @@ -41,6 +47,8 @@ securityContext: {} # runAsNonRoot: true # runAsUser: 1000 +priorityClassName: + service: type: ClusterIP # NodePort port: "9091" @@ -167,7 +175,7 @@ readinessProbe: failureThreshold: 8 global: - graph_service_impl: neo4j + graph_service_impl: elasticsearch datahub_analytics_enabled: true elasticsearch: @@ -180,6 +188,12 @@ global: server: "broker:9092" schemaregistry: url: "http://schema-registry:8081" + ## Kafka producer and consumer settings + #producer: + # compressionType: snappy + # maxRequestSize: 5242880 + #consumer: + # maxPartitionFetchBytes: 5242880 neo4j: host: "neo4j:7474" @@ -195,6 +209,7 @@ global: port: "8080" monitoring: enablePrometheus: false + portName: jmx systemUpdate: ## The following options control settings for datahub-upgrade job which will @@ -208,7 +223,7 @@ global: enabled: true metadata_service_authentication: - enabled: false + enabled: true systemClientId: "__datahub_system" # systemClientSecret: # secretRef: diff --git a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml index 647ff4d54..e25ca3208 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/Chart.yaml @@ -12,7 +12,7 @@ description: A Helm chart for Kubernetes type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.2.150 +version: 0.2.160 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. -appVersion: v0.11.0 +appVersion: v0.13.1 diff --git a/charts/datahub/subcharts/datahub-mce-consumer/README.md b/charts/datahub/subcharts/datahub-mce-consumer/README.md index 5f20dd759..0b7e525ca 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/README.md +++ b/charts/datahub/subcharts/datahub-mce-consumer/README.md @@ -6,55 +6,57 @@ Current chart version is `0.2.0` ## Chart Values -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| affinity | object | `{}` | | -| exporters.jmx.enabled | boolean | false | | -| extraLabels | object | `{}` | Extra labels for deployment configuration | -| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | -| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | -| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | -| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | -| fullnameOverride | string | `""` | | -| global.kafka.bootstrap.server | string | `"broker:9092"` | | -| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | -| global.datahub.gms.port | string | `"8080"` | | -| global.hostAliases[0].hostnames[0] | string | `"broker"` | | -| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | -| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | -| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | -| global.hostAliases[0].ip | string | `"192.168.0.104"` | | -| image.pullPolicy | string | `"IfNotPresent"` | | -| image.repository | string | `"linkedin/datahub-mce-consumer"` | | -| image.tag | string | `"head"` | | -| imagePullSecrets | list | `[]` | | -| ingress.annotations | object | `{}` | | -| ingress.enabled | bool | `false` | | -| ingress.hosts[0].host | string | `"chart-example.local"` | | -| ingress.hosts[0].paths | list | `[]` | | -| ingress.tls | list | `[]` | | -| livenessProbe.initialDelaySeconds | int | `60` | | -| livenessProbe.periodSeconds | int | `30` | | -| livenessProbe.failureThreshold | int | `4` | | -| nameOverride | string | `""` | | -| nodeSelector | object | `{}` | | -| podAnnotations | object | `{}` | | -| podSecurityContext | object | `{}` | | -| readinessProbe.initialDelaySeconds | int | `60` | | -| readinessProbe.periodSeconds | int | `30` | | -| readinessProbe.failureThreshold | int | `4` | | -| replicaCount | int | `1` | | -| revisionHistoryLimit | int | `10` | | -| resources | object | `{}` | | -| securityContext | object | `{}` | | -| service.port | int | `80` | | -| service.type | string | `"ClusterIP"` | | -| serviceAccount.annotations | object | `{}` | | -| serviceAccount.create | bool | `true` | | -| serviceAccount.name | string | `nil` | | -| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | -| tolerations | list | `[]` | | -| datahub.metadata_service_authentication.enabled | bool | `false` | Whether Metadata Service Authentication is enabled. | -| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | -| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| Key | Type | Default | Description | +|-----------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------|-----------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| affinity | object | `{}` | | +| exporters.jmx.enabled | boolean | false | | +| extraLabels | object | `{}` | Extra labels for deployment configuration | +| extraEnvs | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| extraSidecars | list | `[]` | Add additional sidecar containers to the deployment pod(s) | +| extraVolumes | Templatable string of additional `volumes` to be passed to the `tpl` function | "" | +| extraVolumeMounts | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | "" | +| fullnameOverride | string | `""` | | +| global.kafka.bootstrap.server | string | `"broker:9092"` | | +| global.kafka.schemaregistry.url | string | `"http://schema-registry:8081"` | | +| global.datahub.gms.port | string | `"8080"` | | +| global.hostAliases[0].hostnames[0] | string | `"broker"` | | +| global.hostAliases[0].hostnames[1] | string | `"mysql"` | | +| global.hostAliases[0].hostnames[2] | string | `"elasticsearch"` | | +| global.hostAliases[0].hostnames[3] | string | `"neo4j"` | | +| global.hostAliases[0].ip | string | `"192.168.0.104"` | | +| image.args | list | `[]` | Override the image's args. Used to configure custom startup or shutdown behavior | +| image.command | list | `[]` | Override the image's command. Used to configure custom startup or shutdown behavior | +| image.pullPolicy | string | `"IfNotPresent"` | | +| image.repository | string | `"linkedin/datahub-mce-consumer"` | | +| image.tag | string | `"head"` | | +| imagePullSecrets | list | `[]` | | +| ingress.annotations | object | `{}` | | +| ingress.enabled | bool | `false` | | +| ingress.hosts[0].host | string | `"chart-example.local"` | | +| ingress.hosts[0].paths | list | `[]` | | +| ingress.tls | list | `[]` | | +| livenessProbe.initialDelaySeconds | int | `60` | | +| livenessProbe.periodSeconds | int | `30` | | +| livenessProbe.failureThreshold | int | `4` | | +| nameOverride | string | `""` | | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | | +| podSecurityContext | object | `{}` | | +| readinessProbe.initialDelaySeconds | int | `60` | | +| readinessProbe.periodSeconds | int | `30` | | +| readinessProbe.failureThreshold | int | `4` | | +| replicaCount | int | `1` | | +| revisionHistoryLimit | int | `10` | | +| resources | object | `{}` | | +| securityContext | object | `{}` | | +| service.port | int | `80` | | +| service.type | string | `"ClusterIP"` | | +| serviceAccount.annotations | object | `{}` | | +| serviceAccount.create | bool | `true` | | +| serviceAccount.name | string | `nil` | | +| serviceMonitor.create | bool | `false` | If set true and `global.datahub.monitoring.enablePrometheus` is set `true` it will create a ServiceMonitor resource | +| tolerations | list | `[]` | | +| datahub.metadata_service_authentication.enabled | bool | `true` | Whether Metadata Service Authentication is enabled. | +| global.datahub.metadata_service_authentication.systemClientId | string | `"__datahub_system"` | The internal system id that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretRef | string | `nil` | The reference to a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | +| global.datahub.metadata_service_authentication.systemClientSecret.secretKey | string | `nil` | The key of a secret containing the internal system secret that is used to communicate with DataHub GMS. Required if metadata_service_authentication is 'true'. | diff --git a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml index 7b84f2268..355f1eb38 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/templates/deployment.yaml @@ -32,6 +32,7 @@ spec: {{ $key }}: {{ $value | quote }} {{- end }} spec: + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} {{- with .Values.global.hostAliases }} hostAliases: {{- toYaml . | nindent 8 }} @@ -53,6 +54,9 @@ spec: {{- with .Values.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" + {{- end }} initContainers: {{- with .Values.extraInitContainers }} {{- toYaml . | nindent 8 }} @@ -63,12 +67,18 @@ spec: {{- toYaml .Values.securityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ required "Global or specific tag is required" (.Values.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- if .Values.image.command }} + command: {{ .Values.image.command | toRawJson }} + {{- end }} + {{- if .Values.image.args }} + args: {{ .Values.image.args | toRawJson }} + {{- end }} ports: - name: http containerPort: 9090 protocol: TCP {{- if or .Values.global.datahub.monitoring.enablePrometheus .Values.global.datahub.monitoring.enableJMXPort }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} containerPort: 4318 protocol: TCP {{- end }} @@ -107,6 +117,22 @@ spec: value: "true" - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" + {{- with .Values.global.kafka.producer.compressionType }} + - name: KAFKA_PRODUCER_COMPRESSION_TYPE + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.producer.maxRequestSize }} + - name: KAFKA_PRODUCER_MAX_REQUEST_SIZE + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.kafka.consumer.stopContainerOnDeserializationError }} + - name: KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.consumer.maxPartitionFetchBytes }} + - name: KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES + value: {{ . | quote }} + {{- end }} {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} @@ -188,6 +214,8 @@ spec: value: "{{ .Values.global.neo4j.host }}" - name: NEO4J_URI value: "{{ .Values.global.neo4j.uri }}" + - name: NEO4J_DATABASE + value: "{{ .Values.global.neo4j.database | default "graph.db" }}" - name: NEO4J_USERNAME value: "{{ .Values.global.neo4j.username }}" - name: NEO4J_PASSWORD @@ -258,7 +286,7 @@ spec: {{- with .Values.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -266,7 +294,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.tolerations }} + {{- with default .Values.global.tolerations .Values.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/subcharts/datahub-mce-consumer/templates/service.yaml b/charts/datahub/subcharts/datahub-mce-consumer/templates/service.yaml index 67cd48d98..f9a908a9e 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/templates/service.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/templates/service.yaml @@ -17,9 +17,9 @@ spec: {{- end }} {{- end }} {{- if .Values.global.datahub.monitoring.enablePrometheus }} - - name: jmx + - name: {{ .Values.global.datahub.monitoring.portName }} port: 4318 - targetPort: jmx + targetPort: {{ .Values.global.datahub.monitoring.portName }} protocol: TCP {{- end }} selector: diff --git a/charts/datahub/subcharts/datahub-mce-consumer/templates/servicemonitor.yaml b/charts/datahub/subcharts/datahub-mce-consumer/templates/servicemonitor.yaml index 0e0a54ad3..bdc753141 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/templates/servicemonitor.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/templates/servicemonitor.yaml @@ -11,7 +11,7 @@ metadata: {{- end }} spec: endpoints: - - port: jmx + - port: {{ .Values.global.datahub.monitoring.portName }} relabelings: - separator: / sourceLabels: diff --git a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml index b8b3d6cd8..9572f8ec3 100644 --- a/charts/datahub/subcharts/datahub-mce-consumer/values.yaml +++ b/charts/datahub/subcharts/datahub-mce-consumer/values.yaml @@ -7,14 +7,20 @@ replicaCount: 1 revisionHistoryLimit: 10 image: - repository: linkedin/datahub-mce-consumer + repository: acryldata/datahub-mce-consumer pullPolicy: IfNotPresent tag: + # Override the image's command & args with a new one. + # This may be necessary for custom startup or shutdown behaviors + command: + args: imagePullSecrets: [] nameOverride: "" fullnameOverride: "" +terminationGracePeriodSeconds: 150 + serviceAccount: # Specifies whether a service account should be created create: false @@ -39,6 +45,8 @@ securityContext: {} # runAsNonRoot: true # runAsUser: 1000 +priorityClassName: + service: type: ClusterIP # NodePort port: "9090" @@ -176,15 +184,22 @@ global: server: "broker:9092" schemaregistry: url: "http://schema-registry:8081" + # Kafka producer and consumer settings + #producer: + # compressionType: snappy + # maxRequestSize: "5242880" + #consumer: + # maxPartitionFetchBytes: "5242880" datahub: version: head monitoring: enablePrometheus: false + portName: jmx gms: port: "8080" metadata_service_authentication: - enabled: false + enabled: true systemClientId: "__datahub_system" # systemClientSecret: # secretRef: diff --git a/charts/datahub/templates/datahub-upgrade/_upgrade.tpl b/charts/datahub/templates/datahub-upgrade/_upgrade.tpl index e67ea962b..6cfdc87e5 100644 --- a/charts/datahub/templates/datahub-upgrade/_upgrade.tpl +++ b/charts/datahub/templates/datahub-upgrade/_upgrade.tpl @@ -39,8 +39,29 @@ Return the env variables for upgrade jobs value: "{{ .Values.global.sql.datasource.url }}" - name: EBEAN_DATASOURCE_DRIVER value: "{{ .Values.global.sql.datasource.driver }}" +{{- if .Values.global.datahub.metadata_service_authentication.enabled }} +- name: DATAHUB_SYSTEM_CLIENT_ID + value: {{ .Values.global.datahub.metadata_service_authentication.systemClientId }} +- name: DATAHUB_SYSTEM_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: {{ .Values.global.datahub.metadata_service_authentication.systemClientSecret.secretRef }} + key: {{ .Values.global.datahub.metadata_service_authentication.systemClientSecret.secretKey }} +{{- end }} - name: KAFKA_BOOTSTRAP_SERVER value: "{{ .Values.global.kafka.bootstrap.server }}" +{{- with .Values.global.kafka.producer.compressionType }} +- name: KAFKA_PRODUCER_COMPRESSION_TYPE + value: "{{ . }}" +{{- end }} +{{- with .Values.global.kafka.producer.maxRequestSize }} +- name: KAFKA_PRODUCER_MAX_REQUEST_SIZE + value: {{ . | quote }} +{{- end }} +{{- with .Values.global.kafka.consumer.maxPartitionFetchBytes }} +- name: KAFKA_CONSUMER_MAX_PARTITION_FETCH_BYTES + value: {{ . | quote }} +{{- end }} {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: KAFKA_SCHEMAREGISTRY_URL value: {{ printf "http://%s-%s:%s/schema-registry/api/" .Release.Name "datahub-gms" .Values.global.datahub.gms.port }} @@ -84,6 +105,8 @@ Return the env variables for upgrade jobs value: "{{ .Values.global.neo4j.host }}" - name: NEO4J_URI value: "{{ .Values.global.neo4j.uri }}" +- name: NEO4J_DATABASE + value: "{{ .Values.global.neo4j.database | default "graph.db" }}" - name: NEO4J_USERNAME value: "{{ .Values.global.neo4j.username }}" - name: NEO4J_PASSWORD diff --git a/charts/datahub/templates/datahub-upgrade/datahub-cleanup-job-template.yml b/charts/datahub/templates/datahub-upgrade/datahub-cleanup-job-template.yml index 54d51b8f6..05aca8945 100644 --- a/charts/datahub/templates/datahub-upgrade/datahub-cleanup-job-template.yml +++ b/charts/datahub/templates/datahub-upgrade/datahub-cleanup-job-template.yml @@ -12,6 +12,7 @@ metadata: spec: schedule: "* * * * *" suspend: true + concurrencyPolicy: {{ .Values.datahubUpgrade.cleanupJob.concurrencyPolicy | default "Allow" }} jobTemplate: spec: template: @@ -55,7 +56,14 @@ spec: - name: datahub-upgrade-job image: "{{ .Values.datahubUpgrade.image.repository }}:{{ required "Global or specific tag is required" (.Values.datahubUpgrade.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.datahubUpgrade.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.datahubUpgrade.cleanupJob.image.command }} + command: {{ .Values.datahubUpgrade.cleanupJob.image.command | toRawJson }} + {{- end }} + {{- if .Values.datahubUpgrade.cleanupJob.image.args }} + args: {{ .Values.datahubUpgrade.cleanupJob.image.args | toRawJson }} + {{- else }} args: [ "-u", "NoCodeDataMigrationCleanup" ] + {{- end }} env: {{- include "datahub.upgrade.env" . | nindent 16}} {{- if .Values.global.datahub.metadata_service_authentication.enabled }} @@ -81,7 +89,7 @@ spec: {{- with .Values.datahubUpgrade.cleanupJob.extraSidecars }} {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubUpgrade.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.datahubUpgrade.nodeSelector }} nodeSelector: {{- toYaml . | nindent 12 }} {{- end }} @@ -89,7 +97,7 @@ spec: affinity: {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubUpgrade.tolerations }} + {{- with default .Values.global.tolerations .Values.datahubUpgrade.tolerations }} tolerations: {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/templates/datahub-upgrade/datahub-nocode-migration-job.yml b/charts/datahub/templates/datahub-upgrade/datahub-nocode-migration-job.yml index 311c1f3a5..9f9018d1f 100644 --- a/charts/datahub/templates/datahub-upgrade/datahub-nocode-migration-job.yml +++ b/charts/datahub/templates/datahub-upgrade/datahub-nocode-migration-job.yml @@ -57,6 +57,12 @@ spec: - name: datahub-upgrade-job image: "{{ .Values.datahubUpgrade.image.repository }}:{{ required "Global or specific tag is required" ($.Values.datahubUpgrade.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.datahubUpgrade.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.datahubUpgrade.noCodeDataMigration.image.command }} + command: {{ .Values.datahubUpgrade.noCodeDataMigration.image.command | toRawJson }} + {{- end }} + {{- if .Values.datahubUpgrade.noCodeDataMigration.image.args }} + args: {{ .Values.datahubUpgrade.noCodeDataMigration.image.args | toRawJson }} + {{- else }} args: - "-u" - "NoCodeDataMigration" @@ -66,6 +72,7 @@ spec: - "batchDelayMs={{ .Values.datahubUpgrade.batchDelayMs }}" - "-a" - "dbType={{ .Values.datahubUpgrade.noCodeDataMigration.sqlDbType }}" + {{- end }} env: {{- include "datahub.upgrade.env" . | nindent 12}} {{- if .Values.global.datahub.metadata_service_authentication.enabled }} @@ -100,7 +107,7 @@ spec: {{- with .Values.datahubUpgrade.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.datahubUpgrade.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.datahubUpgrade.nodeSelector }} nodeSelector: {{- toYaml . | nindent 12 }} {{- end }} @@ -108,7 +115,7 @@ spec: affinity: {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubUpgrade.tolerations }} + {{- with default .Values.global.tolerations .Values.datahubUpgrade.tolerations }} tolerations: {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/templates/datahub-upgrade/datahub-restore-indices-job-template.yml b/charts/datahub/templates/datahub-upgrade/datahub-restore-indices-job-template.yml index 167b0d63a..c88c15624 100644 --- a/charts/datahub/templates/datahub-upgrade/datahub-restore-indices-job-template.yml +++ b/charts/datahub/templates/datahub-upgrade/datahub-restore-indices-job-template.yml @@ -10,8 +10,9 @@ metadata: labels: {{- include "datahub.labels" . | nindent 4 }} spec: - schedule: "* * * * *" + schedule: {{ .Values.datahubUpgrade.restoreIndices.schedule | default "0 0 * * 0" }} suspend: true + concurrencyPolicy: {{ .Values.datahubUpgrade.restoreIndices.concurrencyPolicy | default "Allow" }} jobTemplate: spec: template: @@ -61,6 +62,12 @@ spec: - name: datahub-upgrade-job image: "{{ .Values.datahubUpgrade.image.repository }}:{{ required "Global or specific tag is required" (.Values.datahubUpgrade.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.datahubUpgrade.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.datahubUpgrade.restoreIndices.image.command }} + command: {{ .Values.datahubUpgrade.restoreIndices.image.command | toRawJson }} + {{- end }} + {{- if .Values.datahubUpgrade.restoreIndices.image.args }} + args: {{ .Values.datahubUpgrade.restoreIndices.image.args | toRawJson }} + {{- else }} args: - "-u" - "RestoreIndices" @@ -68,6 +75,7 @@ spec: - "batchSize={{ .Values.datahubUpgrade.batchSize }}" - "-a" - "batchDelayMs={{ .Values.datahubUpgrade.batchDelayMs }}" + {{- end }} env: {{- include "datahub.upgrade.env" . | nindent 16}} {{- if .Values.global.datahub.metadata_service_authentication.enabled }} @@ -97,7 +105,7 @@ spec: {{- with .Values.datahubUpgrade.restoreIndices.extraSidecars }} {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubUpgrade.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.datahubUpgrade.nodeSelector }} nodeSelector: {{- toYaml . | nindent 12 }} {{- end }} @@ -105,7 +113,7 @@ spec: affinity: {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubUpgrade.tolerations }} + {{- with default .Values.global.tolerations .Values.datahubUpgrade.tolerations }} tolerations: {{- toYaml . | nindent 12 }} {{- end }} diff --git a/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml b/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml index 261f0b921..d28248b07 100644 --- a/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml +++ b/charts/datahub/templates/datahub-upgrade/datahub-system-update-job.yml @@ -2,7 +2,7 @@ apiVersion: batch/v1 kind: Job metadata: - name: {{ .Release.Name }}-datahub-system-update-job + name: {{ .Release.Name }}-system-update labels: {{- include "datahub.labels" . | nindent 4 }} {{- with .Values.datahubSystemUpdate.annotations }} @@ -57,9 +57,173 @@ spec: - name: datahub-system-update-job image: "{{ .Values.datahubSystemUpdate.image.repository }}:{{ required "Global or specific tag is required" (.Values.datahubSystemUpdate.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.datahubSystemUpdate.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.datahubSystemUpdate.image.command }} + command: {{ .Values.datahubSystemUpdate.image.command | toRawJson }} + {{- end }} + {{- if .Values.datahubSystemUpdate.image.args }} + args: {{ .Values.datahubSystemUpdate.image.args | toRawJson }} + {{- else }} args: + {{- if .Values.datahubSystemUpdate.nonblocking.enabled }} + - "-u" + - "SystemUpdateBlocking" + {{- else }} - "-u" - "SystemUpdate" + {{- end }} + {{- end }} + env: + - name: DATAHUB_REVISION + value: {{ .Release.Revision | quote }} + {{- include "datahub.upgrade.env" . | nindent 12}} + - name: DATAHUB_ANALYTICS_ENABLED + value: {{ .Values.global.datahub_analytics_enabled | quote }} + {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} + - name: SCHEMA_REGISTRY_SYSTEM_UPDATE + value: "true" + - name: SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS + value: "true" + - name: SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION + value: "true" + {{- end }} + {{- with .Values.global.kafka.schemaregistry.type }} + - name: SCHEMA_REGISTRY_TYPE + value: "{{ . }}" + {{- end }} + {{- with .Values.global.kafka.schemaregistry.glue }} + - name: AWS_GLUE_SCHEMA_REGISTRY_REGION + value: "{{ .region }}" + {{- with .registry }} + - name: AWS_GLUE_SCHEMA_REGISTRY_NAME + value: "{{ . }}" + {{- end }} + {{- end }} + - name: ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES + value: {{ .Values.global.elasticsearch.index.upgrade.cloneIndices | quote }} + {{- with .Values.global.elasticsearch.index.enableMappingsReindex }} + - name: ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.elasticsearch.index.enableSettingsReindex }} + - name: ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.elasticsearch.index.settingsOverrides }} + - name: ELASTICSEARCH_INDEX_BUILDER_SETTINGS_OVERRIDES + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.elasticsearch.index.entitySettingsOverrides }} + - name: ELASTICSEARCH_INDEX_BUILDER_ENTITY_SETTINGS_OVERRIDES + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.elasticsearch.index.refreshIntervalSeconds }} + - name: ELASTICSEARCH_INDEX_BUILDER_REFRESH_INTERVAL_SECONDS + value: {{ . | quote }} + {{- end }} + {{- with .Values.global.elasticsearch.index.upgrade.allowDocCountMismatch }} + - name: ELASTICSEARCH_BUILD_INDICES_ALLOW_DOC_COUNT_MISMATCH + value: {{ . | quote }} + {{- end }} + {{- with .Values.datahubSystemUpdate.extraEnvs }} + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + {{- toYaml .Values.datahubSystemUpdate.securityContext | nindent 12 }} + volumeMounts: + {{- with .Values.global.credentialsAndCertsSecrets }} + - name: datahub-certs-dir + mountPath: {{ .path | default "/mnt/certs" }} + {{- end }} + {{- with .Values.datahubSystemUpdate.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.datahubSystemUpdate.resources | nindent 12 }} + {{- with .Values.datahubSystemUpdate.extraSidecars }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with default .Values.global.nodeSelector .Values.datahubSystemUpdate.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.datahubSystemUpdate.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with default .Values.global.tolerations .Values.datahubSystemUpdate.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} +{{- if .Values.datahubSystemUpdate.nonblocking.enabled }} +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-system-update-nonblk + labels: + {{- include "datahub.labels" . | nindent 4 }} + {{- with .Values.datahubSystemUpdate.nonblocking.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + template: + {{- if or .Values.global.podLabels .Values.datahubSystemUpdate.podAnnotations}} + metadata: + {{- with .Values.datahubSystemUpdate.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.global.podLabels }} + labels: + {{- range $key, $value := . }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- end }} + spec: + {{- with .Values.global.hostAliases }} + hostAliases: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.datahubSystemUpdate.serviceAccount }} + serviceAccountName: {{ . }} + {{- end }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + {{- with .Values.global.credentialsAndCertsSecrets }} + - name: datahub-certs-dir + secret: + defaultMode: 0444 + secretName: {{ .name }} + {{- end }} + {{- with .Values.datahubSystemUpdate.extraVolumes }} + {{- toYaml . | nindent 8}} + {{- end }} + restartPolicy: Never + securityContext: + {{- toYaml .Values.datahubSystemUpdate.podSecurityContext | nindent 8 }} + {{- with .Values.datahubSystemUpdate.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: datahub-system-update-job + image: "{{ .Values.datahubSystemUpdate.image.repository }}:{{ required "Global or specific tag is required" (.Values.datahubSystemUpdate.image.tag | default .Values.global.datahub.version) }}" + imagePullPolicy: {{ .Values.datahubSystemUpdate.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.datahubSystemUpdate.image.command }} + command: {{ .Values.datahubSystemUpdate.image.command | toRawJson }} + {{- end }} + {{- if .Values.datahubSystemUpdate.nonblocking.image.args }} + args: {{ .Values.datahubSystemUpdate.nonblocking.image.args | toRawJson }} + {{- else }} + args: + - "-u" + - "SystemUpdateNonBlocking" + {{- end }} env: - name: DATAHUB_REVISION value: {{ .Release.Revision | quote }} @@ -130,7 +294,7 @@ spec: {{- with .Values.datahubSystemUpdate.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.datahubSystemUpdate.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.datahubSystemUpdate.nodeSelector }} nodeSelector: {{- toYaml . | nindent 12 }} {{- end }} @@ -138,8 +302,9 @@ spec: affinity: {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.datahubSystemUpdate.tolerations }} + {{- with default .Values.global.tolerations .Values.datahubSystemUpdate.tolerations }} tolerations: {{- toYaml . | nindent 12 }} {{- end }} +{{- end }} {{- end -}} diff --git a/charts/datahub/templates/elasticsearch-setup-job.yml b/charts/datahub/templates/elasticsearch-setup-job.yml index 8af9d1175..fabafa666 100644 --- a/charts/datahub/templates/elasticsearch-setup-job.yml +++ b/charts/datahub/templates/elasticsearch-setup-job.yml @@ -51,6 +51,12 @@ spec: - name: elasticsearch-setup-job image: "{{ .Values.elasticsearchSetupJob.image.repository }}:{{ required "Global or specific tag is required" ( .Values.elasticsearchSetupJob.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.elasticsearchSetupJob.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.elasticsearchSetupJob.image.command }} + command: {{ .Values.elasticsearchSetupJob.image.command | toRawJson }} + {{- end }} + {{- if .Values.elasticsearchSetupJob.image.args }} + args: {{ .Values.elasticsearchSetupJob.image.args | toRawJson }} + {{- end }} env: - name: ELASTICSEARCH_HOST value: {{ .Values.global.elasticsearch.host | quote }} @@ -97,7 +103,7 @@ spec: {{- with .Values.elasticsearchSetupJob.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.elasticsearchSetupJob.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.elasticsearchSetupJob.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -105,7 +111,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.elasticsearchSetupJob.tolerations }} + {{- with default .Values.global.tolerations .Values.elasticsearchSetupJob.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/templates/kafka-setup-job.yml b/charts/datahub/templates/kafka-setup-job.yml index a6551d7f1..50b635df3 100644 --- a/charts/datahub/templates/kafka-setup-job.yml +++ b/charts/datahub/templates/kafka-setup-job.yml @@ -57,11 +57,23 @@ spec: - name: kafka-setup-job image: "{{ .Values.kafkaSetupJob.image.repository }}:{{ required "Global or specific tag is required" ( .Values.kafkaSetupJob.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.kafkaSetupJob.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.kafkaSetupJob.image.command }} + command: {{ .Values.kafkaSetupJob.image.command | toRawJson }} + {{- end }} + {{- if .Values.kafkaSetupJob.image.args }} + args: {{ .Values.kafkaSetupJob.image.args | toRawJson }} + {{- end }} env: + {{- if .Values.global.kafka.zookeeper.server }} - name: KAFKA_ZOOKEEPER_CONNECT value: {{ .Values.global.kafka.zookeeper.server | quote }} + {{- end }} - name: KAFKA_BOOTSTRAP_SERVER value: {{ .Values.global.kafka.bootstrap.server | quote }} + {{- with .Values.global.kafka.maxMessageBytes }} + - name: MAX_MESSAGE_BYTES + value: {{ . | quote }} + {{- end }} {{- if eq .Values.global.kafka.schemaregistry.type "INTERNAL" }} - name: USE_CONFLUENT_SCHEMA_REGISTRY value: "false" @@ -132,7 +144,7 @@ spec: {{- with .Values.kafkaSetupJob.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.kafkaSetupJob.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.kafkaSetupJob.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -140,7 +152,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.kafkaSetupJob.tolerations }} + {{- with default .Values.global.tolerations .Values.kafkaSetupJob.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/templates/mysql-setup-job.yml b/charts/datahub/templates/mysql-setup-job.yml index e847726ae..cfc743171 100644 --- a/charts/datahub/templates/mysql-setup-job.yml +++ b/charts/datahub/templates/mysql-setup-job.yml @@ -51,9 +51,23 @@ spec: - name: mysql-setup-job image: "{{ .Values.mysqlSetupJob.image.repository }}:{{ required "Global or specific tag is required" ( .Values.mysqlSetupJob.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.mysqlSetupJob.image.pullPolicy | default "IfNotPresent" }} + {{- if .Values.mysqlSetupJob.image.command }} + command: {{ .Values.mysqlSetupJob.image.command | toRawJson }} + {{- end }} + {{- if .Values.mysqlSetupJob.image.args }} + args: {{ .Values.mysqlSetupJob.image.args | toRawJson }} + {{- end }} env: - name: MYSQL_USERNAME - value: {{ .Values.mysqlSetupJob.username | default .Values.global.sql.datasource.username | quote }} + {{- $usernameValue := (.Values.mysqlSetupJob).username | default .Values.global.sql.datasource.username }} + {{- if and (kindIs "string" $usernameValue) $usernameValue }} + value: {{ $usernameValue | quote }} + {{- else }} + valueFrom: + secretKeyRef: + name: "{{ ($usernameValue).secretRef | default .Values.global.sql.datasource.username.secretRef }}" + key: "{{ ($usernameValue).secretKey | default .Values.global.sql.datasource.username.secretKey }}" + {{- end }} - name: MYSQL_PASSWORD {{- $passwordValue := (.Values.mysqlSetupJob.password).value | default .Values.global.sql.datasource.password.value }} {{- if $passwordValue }} @@ -82,7 +96,7 @@ spec: {{- with .Values.mysqlSetupJob.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.mysqlSetupJob.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.mysqlSetupJob.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -90,7 +104,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.mysqlSetupJob.tolerations }} + {{- with default .Values.global.tolerations .Values.mysqlSetupJob.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/templates/postgresql-setup-job.yml b/charts/datahub/templates/postgresql-setup-job.yml index f5c021468..3a39557cc 100644 --- a/charts/datahub/templates/postgresql-setup-job.yml +++ b/charts/datahub/templates/postgresql-setup-job.yml @@ -51,9 +51,23 @@ spec: - name: postgresql-setup-job image: "{{ .Values.postgresqlSetupJob.image.repository }}:{{ required "Global or specific tag is required" (.Values.postgresqlSetupJob.image.tag | default .Values.global.datahub.version) }}" imagePullPolicy: {{ .Values.postgresqlSetupJob.image.pullPolicy | default "Always" }} + {{- if .Values.postgresqlSetupJob.image.command }} + command: {{ .Values.postgresqlSetupJob.image.command | toRawJson }} + {{- end }} + {{- if .Values.postgresqlSetupJob.image.args }} + args: {{ .Values.postgresqlSetupJob.image.args | toRawJson }} + {{- end }} env: - name: POSTGRES_USERNAME - value: {{ .Values.postgresqlSetupJob.username | default .Values.global.sql.datasource.username | quote }} + {{- $usernameValue := (.Values.postgresqlSetupJob).username | default .Values.global.sql.datasource.username }} + {{- if and (kindIs "string" $usernameValue) $usernameValue }} + value: {{ $usernameValue | quote }} + {{- else }} + valueFrom: + secretKeyRef: + name: "{{ ($usernameValue).secretRef | default .Values.global.sql.datasource.username.secretRef }}" + key: "{{ ($usernameValue).secretKey | default .Values.global.sql.datasource.username.secretKey }}" + {{- end }} - name: POSTGRES_PASSWORD {{- $passwordValue := (.Values.postgresqlSetupJob.password).value | default .Values.global.sql.datasource.password.value }} {{- if $passwordValue }} @@ -82,7 +96,7 @@ spec: {{- with .Values.postgresqlSetupJob.extraSidecars }} {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.postgresqlSetupJob.nodeSelector }} + {{- with default .Values.global.nodeSelector .Values.postgresqlSetupJob.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} @@ -90,7 +104,7 @@ spec: affinity: {{- toYaml . | nindent 8 }} {{- end }} - {{- with .Values.postgresqlSetupJob.tolerations }} + {{- with default .Values.global.tolerations .Values.postgresqlSetupJob.tolerations }} tolerations: {{- toYaml . | nindent 8 }} {{- end }} diff --git a/charts/datahub/values.yaml b/charts/datahub/values.yaml index 597993ecb..c3d9edafb 100644 --- a/charts/datahub/values.yaml +++ b/charts/datahub/values.yaml @@ -3,14 +3,29 @@ datahub-gms: enabled: true image: - repository: linkedin/datahub-gms + repository: acryldata/datahub-gms # tag: "v0.11.0 # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: memory: 2Gi requests: cpu: 100m memory: 1Gi + livenessProbe: + initialDelaySeconds: 60 + periodSeconds: 30 + failureThreshold: 8 + readinessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + failureThreshold: 8 + # Optionaly specify service type for datahub-gms: LoadBalancer, ClusterIP or NodePort, by default: LoadBalancer + # service: + # type: ClusterIP # Optionally set a GMS specific SQL login (defaults to global login) # sql: # datasource: @@ -22,8 +37,12 @@ datahub-gms: datahub-frontend: enabled: true image: - repository: linkedin/datahub-frontend-react + repository: acryldata/datahub-frontend-react # tag: "v0.11.0" # # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: memory: 1400Mi @@ -34,17 +53,26 @@ datahub-frontend: ingress: enabled: false defaultUserCredentials: {} - # randomAdminPassword: true - # # You can also set specific passwords for default users - # # manualValues: | - # # datahub:manualPassword - # # initialViewer:manualPassword + service: + extraLabels: {} + # randomAdminPassword: true + # You can also set specific passwords for default users + # manualValues: | + # datahub:manualPassword + # initialViewer:manualPassword + # Optionaly specify service type for datahub-frontend: LoadBalancer, ClusterIP or NodePort, by default: LoadBalancer + # service: + # type: ClusterIP acryl-datahub-actions: enabled: true image: repository: acryldata/datahub-actions - tag: "v0.0.11" + tag: "v0.0.15" + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] # mount the k8s secret as a volume in the container, each key name is mounted as a file on the mount path /etc/datahub/ingestion-secret-files # ingestionSecretFiles: # name: ${K8S_SECRET_NAME} @@ -58,8 +86,12 @@ acryl-datahub-actions: datahub-mae-consumer: image: - repository: linkedin/datahub-mae-consumer + repository: acryldata/datahub-mae-consumer # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: memory: 1536Mi @@ -69,8 +101,12 @@ datahub-mae-consumer: datahub-mce-consumer: image: - repository: linkedin/datahub-mce-consumer + repository: acryldata/datahub-mce-consumer # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: memory: 1536Mi @@ -87,8 +123,12 @@ datahub-ingestion-cron: elasticsearchSetupJob: enabled: true image: - repository: linkedin/datahub-elasticsearch-setup + repository: acryldata/datahub-elasticsearch-setup # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: cpu: 500m @@ -117,8 +157,12 @@ elasticsearchSetupJob: kafkaSetupJob: enabled: true image: - repository: linkedin/datahub-kafka-setup + repository: acryldata/datahub-kafka-setup # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: cpu: 500m @@ -149,6 +193,10 @@ mysqlSetupJob: image: repository: acryldata/datahub-mysql-setup # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: cpu: 500m @@ -184,6 +232,10 @@ postgresqlSetupJob: image: repository: acryldata/datahub-postgres-setup # tag: "v0.11.0" # defaults to .global.datahub.version + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] resources: limits: cpu: 500m @@ -227,6 +279,11 @@ datahubUpgrade: batchSize: 1000 batchDelayMs: 100 noCodeDataMigration: + image: + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + command: + args: [] sqlDbType: "MYSQL" # sqlDbType: "POSTGRES" podSecurityContext: {} @@ -246,6 +303,11 @@ datahubUpgrade: # image: my-image # imagePullPolicy: Always cleanupJob: + image: + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + command: + args: [] resources: limits: cpu: 500m @@ -253,12 +315,19 @@ datahubUpgrade: requests: cpu: 300m memory: 256Mi + # Add the concurrency Policy flexibility via values + concurrencyPolicy: Allow # Add extra sidecar containers to job pod extraSidecars: [] # - name: my-image-name # image: my-image # imagePullPolicy: Always restoreIndices: + image: + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + command: + args: [] resources: limits: cpu: 500m @@ -266,6 +335,10 @@ datahubUpgrade: requests: cpu: 300m memory: 256Mi + # Schedule of CronJob when enabled + schedule: "0 0 * * 0" + # Add the concurrency Policy flexibility via values + concurrencyPolicy: Allow # Add extra sidecar containers to job pod extraSidecars: [] # - name: my-image-name @@ -283,6 +356,10 @@ datahubSystemUpdate: image: repository: acryldata/datahub-upgrade # tag: + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] podSecurityContext: {} # fsGroup: 1000 securityContext: {} @@ -293,6 +370,25 @@ datahubSystemUpdate: helm.sh/hook: pre-install,pre-upgrade helm.sh/hook-weight: "-4" helm.sh/hook-delete-policy: before-hook-creation + # !! Requires version v0.13.0 or greater + # Split the system update into 2 jobs, one that is blocking the rest of + # the deployment and the other which is non-blocking. Once the blocking + # steps are completed, the non-blocking job runs while the rest of the + # system is starting. + nonblocking: + enabled: false + # When mode = 'nonblocking' the nonblocking job should not include the above helm.sh/hook annotations + annotations: + # This is what defines this resource as a hook. Without this line, the + # job is considered part of the release. + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-delete-policy: before-hook-creation + image: + args: + # Add custom command / arguments to this job. Useful if you need a custom startup or shutdown script + # to run + # command: customCommand + # args: [] podAnnotations: {} resources: limits: @@ -481,6 +577,13 @@ global: metadata_change_log_timeseries_topic_name: "MetadataChangeLog_Timeseries_v1" platform_event_topic_name: "PlatformEvent_v1" datahub_upgrade_history_topic_name: "DataHubUpgradeHistory_v1" + maxMessageBytes: "5242880" # 5MB + producer: + compressionType: none + maxRequestSize: "5242880" # 5MB + consumer: + maxPartitionFetchBytes: "5242880" # 5MB + stopContainerOnDeserializationError: true ## For AWS MSK set this to a number larger than 1 # partitions: 3 # replicationFactor: 3 @@ -554,13 +657,19 @@ global: # value: password datahub: - version: v0.11.0 + version: v0.13.2 gms: + protocol: "http" port: "8080" nodePort: "30001" + frontend: + validateSignUpEmail: true + monitoring: enablePrometheus: true + # Set a custom name for the monitoring port + portName: jmx mae_consumer: port: "9091" @@ -586,10 +695,10 @@ global: managed_ingestion: enabled: true - defaultCliVersion: "0.11.0" + defaultCliVersion: "0.13.1.2" metadata_service_authentication: - enabled: false + enabled: true systemClientId: "__datahub_system" systemClientSecret: secretRef: "datahub-auth-secrets" @@ -653,3 +762,5 @@ global: # ssl.truststore.type: JKS # ssl.protocol: TLS # ssl.endpoint.identification.algorithm: +# basic.auth.credentials.source: USER_INFO +# basic.auth.user.info: diff --git a/charts/prerequisites/Chart.yaml b/charts/prerequisites/Chart.yaml index 86b95a471..4048c2235 100644 --- a/charts/prerequisites/Chart.yaml +++ b/charts/prerequisites/Chart.yaml @@ -4,25 +4,20 @@ description: A Helm chart for packages that Datahub depends on type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. -version: 0.0.18 +version: 0.1.10 dependencies: - name: elasticsearch version: 7.17.3 repository: https://helm.elastic.co condition: elasticsearch.enabled - # This chart deploys an enterprise version of neo4j that requires commercial license + # This chart deploys an enterprise or community version of neo4j - name: neo4j - version: 4.2.2-1 - repository: https://neo4j-contrib.github.io/neo4j-helm/ + version: 5.11.0 + repository: https://helm.neo4j.com/neo4j condition: neo4j.enabled - # This chart deploys a community version of neo4j - - name: neo4j-community - version: 1.2.5 - repository: https://equinor.github.io/helm-charts/charts/ - condition: neo4j-community.enabled - name: mysql - version: 9.1.8 - repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami + version: 9.4.9 + repository: https://charts.bitnami.com/bitnami condition: mysql.enabled - name: postgresql version: 11.2.6 @@ -40,7 +35,7 @@ dependencies: condition: cp-helm-charts.enabled # This chart deploys a community version of kafka - name: kafka - version: 22.1.3 + version: 26.11.2 repository: https://raw.githubusercontent.com/bitnami/charts/archive-full-index/bitnami condition: kafka.enabled maintainers: diff --git a/charts/prerequisites/values.yaml b/charts/prerequisites/values.yaml index 620e1e897..26c296f69 100644 --- a/charts/prerequisites/values.yaml +++ b/charts/prerequisites/values.yaml @@ -1,7 +1,8 @@ # Default configuration for pre-requisites to get you started # Copy this file and update to the configuration of choice elasticsearch: - enabled: true # set this to false, if you want to provide your own ES instance. + # set this to false, if you want to provide your own ES instance. + enabled: true # If you're running in production, set this to 3 and comment out antiAffinity below # Or alternatively if you're running production, bring your own ElasticSearch @@ -15,16 +16,16 @@ elasticsearch: clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s" # # Shrink default JVM heap. - esJavaOpts: "-Xmx384m -Xms384m" + esJavaOpts: "-Xmx512m -Xms512m" # # Allocate smaller chunks of memory per pod. resources: requests: cpu: "100m" - memory: "768M" + memory: "1024M" limits: cpu: "1000m" - memory: "768M" + memory: "1024M" # # Request smaller persistent volumes. # volumeClaimTemplate: @@ -34,30 +35,52 @@ elasticsearch: # requests: # storage: 100M -# Official neo4j chart uses the Neo4j Enterprise Edition which requires a license +# Official neo4j chart, supports both community and enterprise editions +# see https://neo4j.com/docs/operations-manual/current/kubernetes/ for more information +# source: https://github.com/neo4j/helm-charts neo4j: - enabled: false # set this to true, if you have a license for the enterprise edition - acceptLicenseAgreement: "yes" - defaultDatabase: "graph.db" - neo4jPassword: "datahub" - # For better security, add password to neo4j-secrets k8s secret and uncomment below - # existingPasswordSecret: neo4j-secrets - core: - standalone: true + enabled: false + nameOverride: neo4j + neo4j: + name: neo4j + edition: "community" + acceptLicenseAgreement: "yes" + defaultDatabase: "graph.db" + password: "datahub" + # For better security, add password to neo4j-secrets k8s secret with neo4j-username neo4j-passwordn and NEO4J_AUTH and uncomment below + # NEO4J_AUTH: should be composed like so: {Username}/{Password} + # passwordFromSecret: neo4j-secrets + + # Set security context for pod + securityContext: + runAsNonRoot: true + runAsUser: 7474 + runAsGroup: 7474 + fsGroup: 7474 + fsGroupChangePolicy: "Always" + + # Disallow privilegeEscalation on container level + containerSecurityContext: + allowPrivilegeEscalation: false + + # Create a volume for neo4j, SSD storage is recommended + volumes: + data: + mode: "defaultStorageClass" + # mode: "dynamic" + # dynamic: + # storageClassName: managed-csi-premium -# Deploys neo4j community version. Only supports single node -neo4j-community: - enabled: false # set this to true, if you want to run neo4j community edition - acceptLicenseAgreement: "yes" - defaultDatabase: "graph.db" - # For better security, add neo4j-secrets k8s secret with neo4j-password and uncomment below - existingPasswordSecret: neo4j-secrets + env: + NEO4J_PLUGINS: '["apoc"]' mysql: enabled: true auth: # For better security, add mysql-secrets k8s secret with mysql-root-password, mysql-replication-password and mysql-password existingSecret: mysql-secrets + primary: + extraFlags: "--character-set-server=utf8mb4 --collation-server=utf8mb4_bin" postgresql: enabled: false @@ -81,13 +104,13 @@ gcloud-sqlproxy: # use port 3306 for MySQL, or other port you set for your SQL instance. instances: # GCP Cloud SQL instance id - - instance: "" - # GCP project where the instance exists. - project: "" - # GCP region where the instance exists. - region: "" - # Port number for the proxy to expose for this instance. - port: 3306 + - instance: "" + # GCP project where the instance exists. + project: "" + # GCP region where the instance exists. + region: "" + # Port number for the proxy to expose for this instance. + port: 3306 cp-helm-charts: enabled: false @@ -95,7 +118,8 @@ cp-helm-charts: cp-schema-registry: enabled: false kafka: - bootstrapServers: "prerequisites-kafka:9092" # <>-kafka:9092 + # <>-kafka:9092 + bootstrapServers: "prerequisites-kafka:9092" cp-kafka: enabled: false cp-zookeeper: @@ -112,6 +136,26 @@ cp-helm-charts: # Bitnami version of Kafka that deploys open source Kafka https://artifacthub.io/packages/helm/bitnami/kafka kafka: enabled: true + listeners: + client: + protocol: PLAINTEXT + interbroker: + protocol: PLAINTEXT + controller: + replicaCount: 0 + broker: + replicaCount: 1 + # The new minId for broker is 100. If we don't override this, the broker will have id 100 + # and cannot load the partitions. So we set minId to 0 to be backwards compatible + minId: 0 + # These server properties are no longer exposed as parameters in the bitnami kafka chart since 24.0.0 + # They need to be passed in through extraConfig. See below for reference + # https://github.com/bitnami/charts/tree/main/bitnami/kafka#to-2400 + extraConfig: | + message.max.bytes=5242880 + default.replication.factor=1 + offsets.topic.replication.factor=1 + transaction.state.log.replication.factor=1 kraft: enabled: false zookeeper: