dyff-operator¶
dyff-operator
manages deployments of Dyff CRDs.
Set variable for huggingface_access_token
to use models from Hugging Face:
variable "huggingface_access_token" {
type = string
}
Set up Terraform remote state for the service dyff-operator
needs:
data "terraform_remote_state" "storage" {
backend = "local"
config = {
path = "/state/file"
}
}
Add locals for dyff-operator:
locals {
crd_path = "${path.module}/crds/"
crd_files = fileset("${local.crd_path}", "*.yaml")
versions = yamldecode(file("${path.module}/versions.yaml"))
models = {
huggingface_access_token = var.huggingface_access_token
}
storage = data.terraform_remote_state.storage.outputs
storage_urls = {
DYFF_RESOURCES__DATASETS__STORAGE__URL = local.storage.buckets["datasets"].s3_url
DYFF_RESOURCES__MEASUREMENTS__STORAGE__URL = local.storage.buckets["measurements"].s3_url
DYFF_RESOURCES__MODULES__STORAGE__URL = local.storage.buckets["modules"].s3_url
DYFF_RESOURCES__OUTPUTS__STORAGE__URL = local.storage.buckets["outputs"].s3_url
DYFF_RESOURCES__REPORTS__STORAGE__URL = local.storage.buckets["reports"].s3_url
DYFF_RESOURCES__SAFETYCASES__STORAGE__URL = local.storage.buckets["safetycases"].s3_url
}
}
Create namespace for dyff-operator:
resource "kubernetes_namespace" "dyff_operator" {
metadata {
name = "dyff-operator"
labels = {
"pod-security.kubernetes.io/enforce" = "restricted"
}
}
}
Deploy the dyff-operator Helm chart:
# https://artifacthub.io/packages/helm/dyff-operator/dyff-operator
resource "helm_release" "dyff_operator" {
name = "dyff-operator"
namespace = kubernetes_namespace.dyff_operator.metadata[0].name
repository = "oci://registry.gitlab.com/dyff/charts"
chart = "dyff-operator"
# get dyff-operator version from versions.yaml to synchronize with CRDs
version = local.versions.dyff_operator
values = [yamlencode({
extraEnvVarsConfigMap = merge(local.storage_urls, {
# SECURITY: Production configurations MUST set this value:
DYFF_WORKFLOWS__REPORTS__RUN__RUNTIME_CLASS_NAME = "gvisor"
DYFF_WORKFLOWS__COMMON__STORAGE_TRANSFER__IMAGE = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
DYFF_WORKFLOWS__EVALUATIONS__CLIENT__IMAGE = "registry.gitlab.com/dyff/workflows/evaluation-client:0.1.2"
DYFF_WORKFLOWS__EVALUATIONS__VERIFICATION__IMAGE = "registry.gitlab.com/dyff/workflows/verify-evaluation-output:0.1.1"
DYFF_WORKFLOWS__MODELS__FETCH__IMAGE = "registry.gitlab.com/dyff/workflows/fetch-model:0.1.2"
DYFF_WORKFLOWS__REPORTS__DOWNLOAD__IMAGE = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
DYFF_WORKFLOWS__REPORTS__RUN__IMAGE = "registry.gitlab.com/dyff/workflows/run-report:0.3.10"
DYFF_WORKFLOWS__REPORTS__RUN__ACTIVE_DEADLINE_SECONDS = "86400"
DYFF_WORKFLOWS__REPORTS__UPLOAD__IMAGE = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
})
})]
depends_on = [kubernetes_manifest.crds]
}
resource "kubernetes_manifest" "crds" {
for_each = local.crd_files
manifest = yamldecode(file("${local.crd_path}${each.key}"))
}
resource "kubernetes_service_account" "evaluation_client" {
metadata {
namespace = "workflows"
name = "evaluation-client"
}
}
resource "kubernetes_service_account" "model_fetcher" {
metadata {
namespace = "workflows"
name = "model-fetcher"
}
}
resource "kubernetes_service_account" "report_runner" {
metadata {
namespace = "workflows"
name = "report-runner"
}
}
resource "kubernetes_storage_class" "dyff_model" {
metadata {
name = "dyff-model"
}
storage_provisioner = "pd.csi.storage.gke.io"
volume_binding_mode = "Immediate"
# Expansion seems to cause problems with read-only volumes
allow_volume_expansion = false
parameters = {
type = "pd-standard"
}
}