From c482de1493d06a0fdfb0c366e9c7abdc97e05449 Mon Sep 17 00:00:00 2001 From: Alex Rakowski <20504869+agrski@users.noreply.github.com> Date: Mon, 10 Oct 2022 14:49:50 +0100 Subject: [PATCH] Use Helm chart for Kafka cluster setup (#477) * Remove trailing whitespace in CRDs in Helm chart * Add Chart manifest for Kafka cluster * Move Kafka cluster & user manifests to chart templates * Add basic parameterisation for Kafka cluster values * Add enable toggles for plaintext & TLS listeners Also change structure of Helm values to be nested for plaintext & TLS listener config. * Parameterise listener types & change to internal by default * Add Helm vars for broker probe timeouts * Add Helm vars for topic config * Add Helm vars for broker storage config * Add Helm vars for Zookeeper probe timeouts * Add Helm vars for Zookeeper storage config * Separate metrics config to separate Helm template * Reorder Ansible Kafka vars for logical grouping * Use feature gate toggle in Kafka role instead of duplicating logic * Use Kafka cluster Helm chart in Ansible * Fix whitespace chomping issues in cluster template * Add Python requirements for Ansible playbooks * Add Ansible Galaxy requirements for playbooks * Disable Grafana installation in Ansible ecosystem * Remove unused value Variables set in one role are not visible to others. Inheritence is hierarchical and does not include siblings. * Force reinstallation of Kafka Helm chart in case of changes to the chart itself This is mostly a convenience when developing, as we would otherwise expect published chart versions which should not be changing (patches are new versions). * Allow passing Helm values to Kafka chart via Ansible * Allow disabling Kafka metrics in Helm chart * Allow disabling KafkaUser CR creation in Helm chart --- ansible/playbooks/vars/default.yaml | 4 +- ansible/requirements.txt | 4 + ansible/requirements.yml | 6 ++ ansible/roles/ecosystem/defaults/main.yaml | 4 +- ansible/roles/ecosystem/tasks/kafka.yaml | 19 ++--- .../seldon-core-v2-setup/values.yaml | 4 +- kafka/strimzi/Chart.yaml | 14 ++++ kafka/strimzi/templates/cluster.yaml | 74 +++++++++++++++++++ .../{cluster.yaml => templates/metrics.yaml} | 74 +------------------ kafka/strimzi/{ => templates}/user.yaml | 2 + kafka/strimzi/values.yaml | 54 ++++++++++++++ 11 files changed, 172 insertions(+), 87 deletions(-) create mode 100644 ansible/requirements.txt create mode 100644 ansible/requirements.yml create mode 100644 kafka/strimzi/Chart.yaml create mode 100644 kafka/strimzi/templates/cluster.yaml rename kafka/strimzi/{cluster.yaml => templates/metrics.yaml} (76%) rename kafka/strimzi/{ => templates}/user.yaml (76%) create mode 100644 kafka/strimzi/values.yaml diff --git a/ansible/playbooks/vars/default.yaml b/ansible/playbooks/vars/default.yaml index f17feddada..08d8a91d4e 100644 --- a/ansible/playbooks/vars/default.yaml +++ b/ansible/playbooks/vars/default.yaml @@ -9,7 +9,9 @@ seldon_monitoring_prometheus_operator_values: # Strimzi Kafka Configuration strimzi_kafka_namespace: seldon-mesh +strimzi_kafka_create_cluster: false +strimzi_kafka_add_feature_gates: true strimzi_kafka_install_prometheus_operator: false strimzi_kafka_create_prometheus_instance: false -strimzi_kafka_create_cluster: false +strimzi_kafka_install_grafana: false strimzi_kafka_grafana_prometheus_url: "http://seldon-monitoring-prometheus.{{ seldon_monitoring_namespace }}.svc:9090" diff --git a/ansible/requirements.txt b/ansible/requirements.txt new file mode 100644 index 0000000000..1978575fa3 --- /dev/null +++ b/ansible/requirements.txt @@ -0,0 +1,4 @@ +ansible>=4.2.0,<5.0.0 +docker>=5.0.0,<6.0.0 +openshift>=0.12.0,<0.13.0 +passlib>=1.7.4,<2.0.0 diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 0000000000..0d45a6df5e --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,6 @@ +collections: + - name: https://github.com/SeldonIO/ansible-k8s-collection.git + type: git + version: ddad72dcff53dd8ca05ca477f74df7992fd489fc + - name: kubernetes.core + version: 2.3.0 diff --git a/ansible/roles/ecosystem/defaults/main.yaml b/ansible/roles/ecosystem/defaults/main.yaml index 32a558892a..8f60dca785 100644 --- a/ansible/roles/ecosystem/defaults/main.yaml +++ b/ansible/roles/ecosystem/defaults/main.yaml @@ -2,7 +2,9 @@ repo_root_dir: "{{ inventory_dir }}/.." seldon_mesh_namespace: seldon-mesh kafka_namespace: seldon-mesh -strimzi_kafka_namespace: kafka + +kafka_wait_timeout: "300s" +kafka_cluster_values: {} # Installation on/off flags install_prometheus: true diff --git a/ansible/roles/ecosystem/tasks/kafka.yaml b/ansible/roles/ecosystem/tasks/kafka.yaml index 06fc31e4bd..b8e07a3988 100644 --- a/ansible/roles/ecosystem/tasks/kafka.yaml +++ b/ansible/roles/ecosystem/tasks/kafka.yaml @@ -3,17 +3,14 @@ name: seldonio.k8s.strimzi_kafka when: install_kafka | bool -- name: Enable KRaft - shell: - " kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n {{ strimzi_kafka_namespace }} " - tags: kraft - when: ecosystem_enable_kraft | bool - -- name: Create Kafka Cluster - kubernetes.core.k8s: +- name: "Create Kafka Cluster" + kubernetes.core.helm: state: present + force: yes namespace: "{{ kafka_namespace }}" - template: "{{ repo_root_dir }}/{{ item }}" - with_items: - - kafka/strimzi/cluster.yaml + name: "seldon-core-v2-kafka" + chart_ref: "{{ repo_root_dir }}/kafka/strimzi" + values: "{{ kafka_cluster_values }}" + wait: yes + wait_timeout: "{{ kafka_wait_timeout }}" when: ecosystem_configure_kafka | bool diff --git a/k8s/helm-charts/seldon-core-v2-setup/values.yaml b/k8s/helm-charts/seldon-core-v2-setup/values.yaml index a9515e1cb2..c4dc9fcd8b 100644 --- a/k8s/helm-charts/seldon-core-v2-setup/values.yaml +++ b/k8s/helm-charts/seldon-core-v2-setup/values.yaml @@ -62,7 +62,7 @@ security: crtPath: /tmp/certs/ddc/tls.crt caPath: /tmp/certs/ddc/ca.crt serverCaPath: /tmp/certs/dds/ca.crt - + opentelemetry: endpoint: seldon-collector:4317 enable: true @@ -153,7 +153,7 @@ scheduler: registry: docker.io repository: seldonio/seldon-scheduler tag: latest - + serverConfig: rclone: image: diff --git a/kafka/strimzi/Chart.yaml b/kafka/strimzi/Chart.yaml new file mode 100644 index 0000000000..5d07c4afa7 --- /dev/null +++ b/kafka/strimzi/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +type: application + +version: 0.1.0 +appVersion: "0.2.0-dev" +name: "seldon-core-v2-kafka" +description: "A Kafka cluster suitable for Seldon Core v2" +maintainers: + - name: Seldon Technologies Ltd + email: hello@seldon.io + url: www.seldon.io +home: https://docs.seldon.io/projects/seldon-core-v2 +sources: + - https://github.com/SeldonIO/seldon-core-v2/kafka/strimzi diff --git a/kafka/strimzi/templates/cluster.yaml b/kafka/strimzi/templates/cluster.yaml new file mode 100644 index 0000000000..62afaec504 --- /dev/null +++ b/kafka/strimzi/templates/cluster.yaml @@ -0,0 +1,74 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: Kafka +metadata: + name: {{ .Values.cluster.name }} +spec: + entityOperator: + userOperator: {} + kafka: + version: {{ .Values.cluster.version }} + replicas: {{ .Values.broker.replicas }} + listeners: + {{- if .Values.broker.plaintext.enabled }} + - name: plain + port: {{ .Values.broker.plaintext.port }} + type: {{ .Values.broker.plaintext.listenerType }} + tls: false + {{- end }} + {{- if .Values.broker.tls.enabled }} + - name: tls + port: {{ .Values.broker.tls.port }} + type: {{ .Values.broker.tls.listenerType }} + tls: true + authentication: + type: tls + {{- end }} + readinessProbe: + initialDelaySeconds: {{ .Values.broker.readiness.initialDelaySeconds }} + timeoutSeconds: {{ .Values.broker.readiness.timeoutSeconds }} + livenessProbe: + initialDelaySeconds: {{ .Values.broker.liveness.initialDelaySeconds }} + timeoutSeconds: {{ .Values.broker.liveness.timeoutSeconds }} + config: + auto.create.topics.enable: {{ .Values.topic.autoCreate }} + offsets.topic.replication.factor: {{ .Values.topic.offsetReplicationFactor }} + transaction.state.log.replication.factor: {{ .Values.topic.txStateReplicationFactor }} + transaction.state.log.min.isr: {{ .Values.topic.txStateMinISR }} + default.replication.factor: {{ .Values.topic.defaultReplicationFactor }} + min.insync.replicas: {{ .Values.topic.minISR }} + inter.broker.protocol.version: {{ .Values.broker.interBrokerProtocolVersion }} + template: + pod: + tmpDirSizeLimit: {{ .Values.broker.tmpDirSizeLimit }} + storage: + type: jbod + volumes: + - id: 0 + type: persistent-claim + size: {{ .Values.broker.pvcSize }} + deleteClaim: false + metricsConfig: + type: jmxPrometheusExporter + valueFrom: + configMapKeyRef: + name: kafka-metrics + key: kafka-metrics-config.yml + # zookeeper settings should not be use in case of STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft (raft) + # to enable raft run: + # `kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n kafka` + # which is the default with ansible install + zookeeper: + replicas: {{ .Values.zookeeper.replicas }} + readinessProbe: + initialDelaySeconds: {{ .Values.zookeeper.readiness.initialDelaySeconds }} + timeoutSeconds: {{ .Values.zookeeper.readiness.timeoutSeconds }} + livenessProbe: + initialDelaySeconds: {{ .Values.zookeeper.liveness.initialDelaySeconds }} + timeoutSeconds: {{ .Values.zookeeper.liveness.timeoutSeconds }} + storage: + type: persistent-claim + size: {{ .Values.zookeeper.pvcSize }} + deleteClaim: false + kafkaExporter: + topicRegex: ".*" + groupRegex: ".*" diff --git a/kafka/strimzi/cluster.yaml b/kafka/strimzi/templates/metrics.yaml similarity index 76% rename from kafka/strimzi/cluster.yaml rename to kafka/strimzi/templates/metrics.yaml index ce4d3a9c77..ab9787972f 100644 --- a/kafka/strimzi/cluster.yaml +++ b/kafka/strimzi/templates/metrics.yaml @@ -1,74 +1,4 @@ -apiVersion: kafka.strimzi.io/v1beta2 -kind: Kafka -metadata: - name: seldon -spec: - entityOperator: - userOperator: {} - kafka: - version: 3.2.0 - replicas: 3 - listeners: - - name: plain - port: 9092 - type: loadbalancer - tls: false - - name: tls - port: 9093 - type: loadbalancer - tls: true - authentication: - type: tls - readinessProbe: - initialDelaySeconds: 15 - timeoutSeconds: 5 - livenessProbe: - initialDelaySeconds: 15 - timeoutSeconds: 5 - config: - auto.create.topics.enable: true - offsets.topic.replication.factor: 1 - transaction.state.log.replication.factor: 1 - transaction.state.log.min.isr: 1 - default.replication.factor: 1 - min.insync.replicas: 1 - inter.broker.protocol.version: "3.2" - template: - pod: - tmpDirSizeLimit: 100Mi - storage: - type: jbod - volumes: - - id: 0 - type: persistent-claim - size: 200Gi - deleteClaim: false - metricsConfig: - type: jmxPrometheusExporter - valueFrom: - configMapKeyRef: - name: kafka-metrics - key: kafka-metrics-config.yml - # zookeeper settings should not be use in case of STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft (raft) - # to enable raft run: - # `kubectl set env deployment/strimzi-cluster-operator STRIMZI_FEATURE_GATES=+UseStrimziPodSets,+UseKRaft -n kafka` - # which is the default with ansible install - zookeeper: - replicas: 1 - readinessProbe: - initialDelaySeconds: 15 - timeoutSeconds: 5 - livenessProbe: - initialDelaySeconds: 15 - timeoutSeconds: 5 - storage: - type: persistent-claim - size: 100Gi - deleteClaim: false - kafkaExporter: - topicRegex: ".*" - groupRegex: ".*" ---- +{{- if .Values.metrics.enabled }} kind: ConfigMap apiVersion: v1 metadata: @@ -231,4 +161,4 @@ data: labels: replicaId: "$2" memberType: "$3" - +{{- end }} diff --git a/kafka/strimzi/user.yaml b/kafka/strimzi/templates/user.yaml similarity index 76% rename from kafka/strimzi/user.yaml rename to kafka/strimzi/templates/user.yaml index 2e9ca8d5c0..564a9d7dbd 100644 --- a/kafka/strimzi/user.yaml +++ b/kafka/strimzi/templates/user.yaml @@ -1,3 +1,4 @@ +{{- if .Values.broker.tls.enabled }} apiVersion: kafka.strimzi.io/v1beta2 kind: KafkaUser metadata: @@ -7,3 +8,4 @@ metadata: spec: authentication: type: tls +{{- end }} diff --git a/kafka/strimzi/values.yaml b/kafka/strimzi/values.yaml new file mode 100644 index 0000000000..3e5a188a43 --- /dev/null +++ b/kafka/strimzi/values.yaml @@ -0,0 +1,54 @@ +cluster: + name: "seldon" + version: "3.2.0" + +metrics: + enabled: true + +broker: + replicas: 3 + + plaintext: + enabled: true + port: 9092 + listenerType: internal + + tls: + enabled: true + port: 9093 + listenerType: internal + + readiness: + initialDelaySeconds: 15 + timeoutSeconds: 5 + + liveness: + initialDelaySeconds: 15 + timeoutSeconds: 5 + + interBrokerProtocolVersion: "3.2" + + tmpDirSizeLimit: 100Mi + + pvcSize: 100Gi + +topic: + autoCreate: true + offsetReplicationFactor: 1 + txStateReplicationFactor: 1 + txStateMinISR: 1 + defaultReplicationFactor: 1 + minISR: 1 + +zookeeper: + replicas: 1 + + readiness: + initialDelaySeconds: 15 + timeoutSeconds: 5 + + liveness: + initialDelaySeconds: 15 + timeoutSeconds: 5 + + pvcSize: 100Gi