Skip to content

Commit

Permalink
Implement External Datastore Integration (ETCD) (#47)
Browse files Browse the repository at this point in the history
* Initial ETCD implementation

* Add Integration tests

* Add Datastore verification logic

* Fix lint

* Add BlockedState on invalid datastore

* Lighter ETCD bundle

* Assert Datastore in Supported Datastores

* Fix nodes count

* Skip etcd tests

* Address linting issues

* remove extraneous file

* revert move of BootstrapConfig

* Increment timeout values

* Run etcd tests again

* Remove duplicate logging

* Parallel test runs

* Test GH keys

* Bump timeout to avoid LXD provisioning delays

---------

Co-authored-by: Adam Dyess <adam.dyess@canonical.com>
  • Loading branch information
mateoflorido and addyess committed Apr 2, 2024
1 parent 304bdf7 commit e1e6fa8
Show file tree
Hide file tree
Showing 13 changed files with 424 additions and 74 deletions.
9 changes: 8 additions & 1 deletion .github/workflows/integration_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
pull_request:

jobs:

extra-args:
runs-on: ubuntu-latest
outputs:
Expand All @@ -14,6 +15,8 @@ jobs:
id: flags
env:
TITLE: ${{ github.event.pull_request.title }}
JOB: ${{ github.job }}
WORKFLOW: ${{ github.workflow }}
run: |
EXTRA_ARGS="--crash-dump=on-failure"
if [[ "$TITLE" == *"[COS]"* ]]; then
Expand All @@ -35,14 +38,18 @@ jobs:
integration-tests:
uses: canonical/operator-workflows/.github/workflows/integration_test.yaml@main
needs: [build-all-charms, extra-args]
strategy:
matrix:
suite: ["k8s", "etcd"]
secrets: inherit
with:
provider: lxd
juju-channel: 3.3/stable
extra-arguments: ${{needs.extra-args.outputs.args}}
extra-arguments: ${{needs.extra-args.outputs.args}} -k test_${{ matrix.suite }}
load-test-enabled: false
zap-enabled: false
trivy-fs-enabled: true
trivy-image-config: "trivy.yaml"
tmate-debug: true
test-timeout: 120
test-tox-env: integration-${{ matrix.suite }}
13 changes: 12 additions & 1 deletion charms/worker/k8s/charmcraft.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,19 @@ config:
default: edge
type: string
description: Snap channel of the k8s snap
datastore:
default: dqlite
type: string
description: |
The datastore to use in Canonical Kubernetes. This cannot be changed
after deployment. Allowed values are "dqlite" and "etcd". If "etcd" is
chosen, the charm should be integrated with the etcd charm.
labels:
default: ""
type: string
description: |
Labels can be used to organize and to select subsets of nodes in the
cluster. Declare node labels in key=value format, separated by spaces.
cluster. Declare node labels in key=value format, separated by spaces.
actions:
get-kubeconfig:
Expand All @@ -87,3 +94,7 @@ provides:
interface: k8s-cluster
cos-worker-tokens:
interface: cos-tokens

requires:
etcd:
interface: etcd
218 changes: 218 additions & 0 deletions charms/worker/k8s/lib/charms/kubernetes_libs/v0/etcd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Charm library for the etcd reactive relation.
The module defines an interface for a charm that requires the etcd relation.
It encapsulates the functionality and events related to managing the etcd relation,
including connection, availability of data, and handling of TLS credentials.
It uses events to handle state changes in the etcd relation, such as when a connection is
established (`EtcdConnected`), when etcd data is available (`EtcdAvailable`), and when TLS data
for etcd is available (`EtcdTLSAvailable`).
A class `EtcdReactiveRequires` is defined, which provides an abstraction over the charm's
requires relation to etcd. It encapsulates the functionality to check the status of the
relation, get connection details, and handle client credentials.
This module also provides helper methods for handling client credentials, such as
saving them to local files and retrieving them from the relation data.
You can use this charm library in your charm by adding it as a dependency in your
`charmcraft.yaml` file and then importing the relevant classes and functions.
Example usage:
```python
from charms.kubernetes_libs.v0.etcd import EtcdReactiveRequires
...
def __init__(self, *args):
self.etcd = EtcdReactiveRequires(self)
...
# Handle the events from the relation
self.framework.observe(self.etcd.on.connected, self._on_etcd_connected)
self.framework.observe(self.etcd.on.available, self._on_etcd_available)
self.framework.observe(self.etcd.on.tls_available, self._on_etcd_tls_available)
```
"""

import hashlib
import json
import logging
import os
from functools import cached_property
from typing import Optional

from ops.framework import EventBase, EventSource, Object, ObjectEvents, StoredState
from ops.model import Relation

# The unique Charmhub library identifier, never change it
LIBID = "2d422394fe044d61ad1dc044ed051d1b"

# Increment this major API version when introducing breaking changes
LIBAPI = 0

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 1

log = logging.getLogger(__name__)


class EtcdAvailable(EventBase):
"""Event emitted when the etcd relation data is available."""

pass


class EtcdConnected(EventBase):
"""Event emitted when the etcd relation is connected."""

pass


class EtcdTLSAvailable(EventBase):
"""Event emitted when the etcd relation TLS data is available."""

pass


class EtcdConsumerEvents(ObjectEvents):
"""Events emitted by the etcd translation interface."""

available = EventSource(EtcdAvailable)
connected = EventSource(EtcdConnected)
tls_available = EventSource(EtcdTLSAvailable)


class EtcdReactiveRequires(Object):
"""Requires side of the etcd interface.
This class is a translation interface that wraps the requires side
of the reactive etcd interface.
"""

state = StoredState()
on = EtcdConsumerEvents()

def __init__(self, charm, endpoint="etcd"):
super().__init__(charm, f"relation-{endpoint}")
self.charm = charm
self.endpoint = endpoint

self.state.set_default(
connected=False, available=False, tls_available=False, connection_string=""
)

for event in (
charm.on[endpoint].relation_created,
charm.on[endpoint].relation_joined,
charm.on[endpoint].relation_changed,
charm.on[endpoint].relation_departed,
charm.on[endpoint].relation_broken,
):
self.framework.observe(event, self._check_relation)

def _check_relation(self, _: EventBase):
"""Check if the relation is available and emit the appropriate event."""
if self.relation:
self.state.connected = True
self.on.connected.emit()
# etcd is available only if the connection string is available
if self.get_connection_string():
self.state.available = True
self.on.available.emit()
# etcd tls is available only if the tls data is available
# (i.e. client cert, client key, ca cert)
cert = self.get_client_credentials()
if cert["client_cert"] and cert["client_key"] and cert["client_ca"]:
self.state.tls_available = True
self.on.tls_available.emit()

def _get_dict_hash(self, data: dict) -> str:
"""Generate a SHA-256 hash for a dictionary.
This function converts the dictionary into a JSON string, ensuring it
is sorted in order. It then generates a SHA-256 hash of this string.
Args:
data(dict): The dictionary to be hashed.
Returns:
str: The hexadecimal representation of the hash of the dictionary.
"""
dump = json.dumps(data, sort_keys=True)
hash_obj = hashlib.sha256()
hash_obj.update(dump.encode())
return hash_obj.hexdigest()

@property
def is_ready(self):
"""Check if the relation is available and emit the appropriate event."""
if self.relation:
if self.get_connection_string():
cert = self.get_client_credentials()
if all(cert.get(key) for key in ["client_cert", "client_key", "client_ca"]):
return True
return False

def get_connection_string(self) -> str:
"""Return the connection string for etcd."""
remote_data = self._remote_data
if remote_data:
return remote_data.get("connection_string")
return ""

def get_client_credentials(self) -> dict:
"""Return the client credentials for etcd."""
remote_data = self._remote_data
return {
"client_cert": remote_data.get("client_cert"),
"client_key": remote_data.get("client_key"),
"client_ca": remote_data.get("client_ca"),
}

@cached_property
def relation(self) -> Optional[Relation]:
"""Return the relation object for this interface."""
return self.model.get_relation(self.endpoint)

@property
def _remote_data(self):
"""Return the remote relation data for this interface."""
if not (self.relation and self.relation.units):
return {}

first_unit = next(iter(self.relation.units), None)
data = self.relation.data[first_unit]
return data

def save_client_credentials(self, ca_path, cert_path, key_path):
"""Save all the client certificates for etcd to local files."""
credentials = {"client_key": key_path, "client_cert": cert_path, "client_ca": ca_path}
for key, path in credentials.items():
self._save_remote_data(key, path)

def _save_remote_data(self, key: str, path: str):
"""Save the remote data to a file."""
value = self._remote_data.get(key)
if value:
parent = os.path.dirname(path)
if not os.path.isdir(parent):
os.makedirs(parent)
with open(path, "w") as stream:
stream.write(value)
45 changes: 42 additions & 3 deletions charms/worker/k8s/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
UpdateClusterConfigRequest,
UserFacingClusterConfig,
)
from charms.kubernetes_libs.v0.etcd import EtcdReactiveRequires
from charms.node_base import LabelMaker
from charms.operator_libs_linux.v2.snap import SnapError, SnapState
from charms.operator_libs_linux.v2.snap import ensure as snap_ensure
Expand All @@ -60,6 +61,7 @@
ETC_KUBERNETES = Path("/etc/kubernetes")
KUBECTL_PATH = Path("/snap/k8s/current/bin/kubectl")
K8SD_PORT = 6400
SUPPORTED_DATASTORES = ["dqlite", "etcd"]


class K8sCharm(ops.CharmBase):
Expand Down Expand Up @@ -107,6 +109,7 @@ def __init__(self, *args):

self.framework.observe(self.on.update_status, self._on_update_status)
if self.is_control_plane:
self.etcd = EtcdReactiveRequires(self)
self.framework.observe(self.on.get_kubeconfig_action, self._get_external_kubeconfig)

@status.on_error(
Expand Down Expand Up @@ -205,17 +208,19 @@ def _check_k8sd_ready(self):
self.api_manager.check_k8sd_ready()

@on_error(
ops.WaitingStatus("Failed to bootstrap k8s snap"),
ops.WaitingStatus("Waiting to bootstrap k8s snap"),
AssertionError,
InvalidResponseError,
K8sdConnectionError,
)
def _bootstrap_k8s_snap(self):
"""Bootstrap the k8s snap package."""
"""Bootstrap k8s if it's not already bootstrapped."""
if self.api_manager.is_cluster_bootstrapped():
log.info("K8s cluster already bootstrapped")
return

bootstrap_config = BootstrapConfig()
self._configure_datastore(bootstrap_config)

status.add(ops.MaintenanceStatus("Bootstrapping Cluster"))

Expand All @@ -225,7 +230,6 @@ def _bootstrap_k8s_snap(self):
config_str = {
"bootstrapConfig": yaml.dump(bootstrap_config.dict(by_alias=True, exclude_none=True))
}

payload = CreateClusterRequest(
name=node_name, address=f"{address}:{K8SD_PORT}", config=config_str
)
Expand All @@ -248,6 +252,41 @@ def _configure_cos_integration(self):
if relation := self.model.get_relation("cos-tokens"):
self.collector.request(relation)

def _configure_datastore(self, config: BootstrapConfig):
"""Configure the datastore for the Kubernetes cluster.
Args:
config (BootstrapConfig): The bootstrap configuration object for
the Kubernetes cluster that is being configured. This object
will be modified in-place to include etcd's configuration details.
"""
datastore = self.config.get("datastore")

if datastore not in SUPPORTED_DATASTORES:
log.error(
"Invalid datastore: %s. Supported values: %s",
datastore,
", ".join(SUPPORTED_DATASTORES),
)
status.add(ops.BlockedStatus(f"Invalid datastore: {datastore}"))
assert datastore in SUPPORTED_DATASTORES # nosec

if datastore == "etcd":
log.info("Using etcd as external datastore")
etcd_relation = self.model.get_relation("etcd")

assert etcd_relation, "Missing etcd relation" # nosec
assert self.etcd.is_ready, "etcd is not ready" # nosec

config.datastore = "external"
etcd_config = self.etcd.get_client_credentials()
config.datastore_ca_cert = etcd_config.get("client_ca", "")
config.datastore_client_cert = etcd_config.get("client_cert", "")
config.datastore_client_key = etcd_config.get("client_key", "")
config.datastore_url = self.etcd.get_connection_string()
elif datastore == "dqlite":
log.info("Using dqlite as datastore")

def _revoke_cluster_tokens(self):
"""Revoke tokens for the units in the cluster and k8s-cluster relations.
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace_packages = true
[tool.pylint]
# Ignore too-few-public-methods due to pydantic models
# Ignore no-self-argument due to pydantic validators
disable = "wrong-import-order,redefined-outer-name,too-few-public-methods,no-self-argument,fixme,parse-error"
disable = "wrong-import-order,redefined-outer-name,too-many-instance-attributes,too-few-public-methods,no-self-argument,fixme,parse-error"
# Ignore Pydantic check: https://github.com/pydantic/pydantic/issues/1961
extension-pkg-whitelist = "pydantic" # wokeignore:rule=whitelist

Expand All @@ -54,7 +54,6 @@ ignored-classes = "ProfileManager,InstanceManager,NetworkManager"

[tool.pytest.ini_options]
minversion = "6.0"
log_cli_level = "INFO"

# Linting tools configuration
[tool.ruff]
Expand Down
Loading

0 comments on commit e1e6fa8

Please sign in to comment.