dyff-operator

dyff-operator manages deployments of Dyff CRDs.

Set variable for huggingface_access_token to use models from Hugging Face:

variable "huggingface_access_token" {
  type = string
}

Set up Terraform remote state for the service dyff-operator needs:

data "terraform_remote_state" "storage" {
  backend = "local"
  config = {
    path  = "/state/file"
  }
}

Add locals for dyff-operator:

locals {
  crd_path  = "${path.module}/crds/"
  crd_files = fileset("${local.crd_path}", "*.yaml")
  versions  = yamldecode(file("${path.module}/versions.yaml"))

  models = {
    huggingface_access_token = var.huggingface_access_token
  }

  storage = data.terraform_remote_state.storage.outputs

  storage_urls = {
    DYFF_RESOURCES__DATASETS__STORAGE__URL     = local.storage.buckets["datasets"].s3_url
    DYFF_RESOURCES__MEASUREMENTS__STORAGE__URL = local.storage.buckets["measurements"].s3_url
    DYFF_RESOURCES__MODULES__STORAGE__URL      = local.storage.buckets["modules"].s3_url
    DYFF_RESOURCES__OUTPUTS__STORAGE__URL      = local.storage.buckets["outputs"].s3_url
    DYFF_RESOURCES__REPORTS__STORAGE__URL      = local.storage.buckets["reports"].s3_url
    DYFF_RESOURCES__SAFETYCASES__STORAGE__URL  = local.storage.buckets["safetycases"].s3_url
  }
}

Create namespace for dyff-operator:

resource "kubernetes_namespace" "dyff_operator" {
  metadata {
    name = "dyff-operator"
    labels = {
      "pod-security.kubernetes.io/enforce" = "restricted"
    }
  }
}

Deploy the dyff-operator Helm chart:

# https://artifacthub.io/packages/helm/dyff-operator/dyff-operator
resource "helm_release" "dyff_operator" {
  name       = "dyff-operator"
  namespace  = kubernetes_namespace.dyff_operator.metadata[0].name
  repository = "oci://registry.gitlab.com/dyff/charts"
  chart      = "dyff-operator"

  # get dyff-operator version from versions.yaml to synchronize with CRDs
  version = local.versions.dyff_operator

  values = [yamlencode({

    extraEnvVarsConfigMap = merge(local.storage_urls, {
      # SECURITY: Production configurations MUST set this value:
      DYFF_WORKFLOWS__REPORTS__RUN__RUNTIME_CLASS_NAME = "gvisor"

      DYFF_WORKFLOWS__COMMON__STORAGE_TRANSFER__IMAGE       = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
      DYFF_WORKFLOWS__EVALUATIONS__CLIENT__IMAGE            = "registry.gitlab.com/dyff/workflows/evaluation-client:0.1.2"
      DYFF_WORKFLOWS__EVALUATIONS__VERIFICATION__IMAGE      = "registry.gitlab.com/dyff/workflows/verify-evaluation-output:0.1.1"
      DYFF_WORKFLOWS__MODELS__FETCH__IMAGE                  = "registry.gitlab.com/dyff/workflows/fetch-model:0.1.2"
      DYFF_WORKFLOWS__REPORTS__DOWNLOAD__IMAGE              = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
      DYFF_WORKFLOWS__REPORTS__RUN__IMAGE                   = "registry.gitlab.com/dyff/workflows/run-report:0.3.10"
      DYFF_WORKFLOWS__REPORTS__RUN__ACTIVE_DEADLINE_SECONDS = "86400"
      DYFF_WORKFLOWS__REPORTS__UPLOAD__IMAGE                = "registry.gitlab.com/dyff/workflows/storage-transfer:0.2.0"
    })

  })]

  depends_on = [kubernetes_manifest.crds]
}

resource "kubernetes_manifest" "crds" {
  for_each = local.crd_files
  manifest = yamldecode(file("${local.crd_path}${each.key}"))
}

resource "kubernetes_service_account" "evaluation_client" {
  metadata {
    namespace = "workflows"
    name      = "evaluation-client"
  }
}

resource "kubernetes_service_account" "model_fetcher" {
  metadata {
    namespace = "workflows"
    name      = "model-fetcher"
  }
}

resource "kubernetes_service_account" "report_runner" {
  metadata {
    namespace = "workflows"
    name      = "report-runner"
  }
}

resource "kubernetes_storage_class" "dyff_model" {
  metadata {
    name = "dyff-model"
  }

  storage_provisioner = "pd.csi.storage.gke.io"
  volume_binding_mode = "Immediate"
  # Expansion seems to cause problems with read-only volumes
  allow_volume_expansion = false
  parameters = {
    type = "pd-standard"
  }
}