diff --git a/CHANGES.md b/CHANGES.md index bec4a07b..7ffe8b3c 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,12 @@ Notable changes between versions. ## Latest +#### Google Cloud + +* Add support for multi-controller clusters (i.e. multi-master) ([#54](https://github.com/poseidon/typhoon/issues/54), [#190](https://github.com/poseidon/typhoon/pull/190)) + * Switch from Google Cloud network load balancer to a TCP proxy load balancer. Avoid a [bug](https://issuetracker.google.com/issues/67366622) in Google network load balancers that limited clusters to only bootstrapping one controller node. + * Add TCP health check for apiserver pods on controllers. Replace kubelet check approximation. + #### Addons * Update kube-state-metrics from v1.3.0 to v1.3.1 diff --git a/docs/google-cloud.md b/docs/google-cloud.md index c36c577e..28d0a811 100644 --- a/docs/google-cloud.md +++ b/docs/google-cloud.md @@ -252,7 +252,7 @@ resource "google_dns_managed_zone" "zone-for-clusters" { | Name | Description | Default | Example | |:-----|:------------|:--------|:--------| -| controller_count | Number of controllers (i.e. masters) | 1 | 1 | +| controller_count | Number of controllers (i.e. masters) | 1 | 3 | | worker_count | Number of workers | 1 | 3 | | controller_type | Machine type for controllers | "n1-standard-1" | See below | | worker_type | Machine type for workers | "n1-standard-1" | See below | @@ -268,9 +268,6 @@ resource "google_dns_managed_zone" "zone-for-clusters" { Check the list of valid [machine types](https://cloud.google.com/compute/docs/machine-types). -!!! warning - Set controller_count to 1. A bug in Google Cloud network load balancer health checking prevents multiple controllers from bootstrapping. There are workarounds, but they all involve tradeoffs we're uncomfortable recommending. See [#54](https://github.com/poseidon/typhoon/issues/54). - #### Preemption Add `worker_preemeptible = "true"` to allow worker nodes to be [preempted](https://cloud.google.com/compute/docs/instances/preemptible) at random, but pay [significantly](https://cloud.google.com/compute/pricing) less. Clusters tolerate stopping instances fairly well (reschedules pods, but cannot drain) and preemption provides a nice reward for running fault-tolerant cluster systems.` diff --git a/docs/topics/performance.md b/docs/topics/performance.md index 68ff03d4..5b1fd647 100644 --- a/docs/topics/performance.md +++ b/docs/topics/performance.md @@ -9,7 +9,7 @@ Provisioning times vary based on the platform. Sampling the time to create (appl | AWS | 6 min | 5 min | | Bare-Metal | 10-14 min | NA | | Digital Ocean | 3 min 30 sec | 20 sec | -| Google Cloud | 4 min | 4 min 30 sec | +| Google Cloud | 7 min | 4 min 30 sec | Notes: diff --git a/google-cloud/container-linux/kubernetes/apiserver.tf b/google-cloud/container-linux/kubernetes/apiserver.tf index f7f41dba..573db376 100644 --- a/google-cloud/container-linux/kubernetes/apiserver.tf +++ b/google-cloud/container-linux/kubernetes/apiserver.tf @@ -1,10 +1,5 @@ -# Static IPv4 address for the Network Load Balancer -resource "google_compute_address" "controllers-ip" { - name = "${var.cluster_name}-controllers-ip" -} - -# DNS record for the Network Load Balancer -resource "google_dns_record_set" "controllers" { +# TCP Proxy load balancer DNS record +resource "google_dns_record_set" "apiserver" { # DNS Zone name where record should be created managed_zone = "${var.dns_zone_name}" @@ -13,44 +8,88 @@ resource "google_dns_record_set" "controllers" { type = "A" ttl = 300 - # IPv4 address of controllers' network load balancer - rrdatas = ["${google_compute_address.controllers-ip.address}"] + # IPv4 address of apiserver TCP Proxy load balancer + rrdatas = ["${google_compute_global_address.apiserver-ipv4.address}"] } -# Network Load Balancer for controllers -resource "google_compute_forwarding_rule" "controller-https-rule" { - name = "${var.cluster_name}-controller-https-rule" - ip_address = "${google_compute_address.controllers-ip.address}" +# Static IPv4 address for the TCP Proxy Load Balancer +resource "google_compute_global_address" "apiserver-ipv4" { + name = "${var.cluster_name}-apiserver-ip" + ip_version = "IPV4" +} + +# Forward IPv4 TCP traffic to the TCP proxy load balancer +resource "google_compute_global_forwarding_rule" "apiserver" { + name = "${var.cluster_name}-apiserver" + ip_address = "${google_compute_global_address.apiserver-ipv4.address}" + ip_protocol = "TCP" port_range = "443" - target = "${google_compute_target_pool.controllers.self_link}" + target = "${google_compute_target_tcp_proxy.apiserver.self_link}" } -# Target pool of instances for the controller(s) Network Load Balancer -resource "google_compute_target_pool" "controllers" { - name = "${var.cluster_name}-controller-pool" +# Global TCP Proxy Load Balancer for apiservers +resource "google_compute_target_tcp_proxy" "apiserver" { + name = "${var.cluster_name}-apiserver" + description = "Distribute TCP load across ${var.cluster_name} controllers" + backend_service = "${google_compute_backend_service.apiserver.self_link}" +} - instances = [ - "${google_compute_instance.controllers.*.self_link}", - ] - - health_checks = [ - "${google_compute_http_health_check.kubelet.name}", - ] +# Global backend service backed by unmanaged instance groups +resource "google_compute_backend_service" "apiserver" { + name = "${var.cluster_name}-apiserver" + description = "${var.cluster_name} apiserver service" + protocol = "TCP" + port_name = "apiserver" session_affinity = "NONE" + timeout_sec = "60" + + # controller(s) spread across zonal instance groups + backend { + group = "${google_compute_instance_group.controllers.0.self_link}" + } + backend { + group = "${google_compute_instance_group.controllers.1.self_link}" + } + backend { + group = "${google_compute_instance_group.controllers.2.self_link}" + } + + health_checks = ["${google_compute_health_check.apiserver.self_link}"] } -# Kubelet HTTP Health Check -resource "google_compute_http_health_check" "kubelet" { - name = "${var.cluster_name}-kubelet-health" - description = "Health check Kubelet health host port" +# Instance group of heterogeneous (unmanged) controller instances +resource "google_compute_instance_group" "controllers" { + count = "${length(local.zones)}" - timeout_sec = 5 + name = "${format("%s-controllers-%s", var.cluster_name, element(local.zones, count.index))}" + zone = "${element(local.zones, count.index)}" + + named_port { + name = "apiserver" + port = "443" + } + + # add instances in the zone into the instance group + instances = [ + "${matchkeys(google_compute_instance.controllers.*.self_link, + google_compute_instance.controllers.*.zone, + list(element(local.zones, count.index)))}" + ] +} + +# TCP health check for apiserver +resource "google_compute_health_check" "apiserver" { + name = "${var.cluster_name}-apiserver-tcp-health" + description = "TCP health check for kube-apiserver" + + timeout_sec = 5 check_interval_sec = 5 - healthy_threshold = 2 - unhealthy_threshold = 4 + healthy_threshold = 1 + unhealthy_threshold = 3 - port = 10255 - request_path = "/healthz" + tcp_health_check { + port = "443" + } } diff --git a/google-cloud/container-linux/kubernetes/controllers.tf b/google-cloud/container-linux/kubernetes/controllers.tf index 9acce2dc..37e780e0 100644 --- a/google-cloud/container-linux/kubernetes/controllers.tf +++ b/google-cloud/container-linux/kubernetes/controllers.tf @@ -19,12 +19,19 @@ data "google_compute_zones" "all" { region = "${var.region}" } +locals { + # TCP proxy load balancers require a fixed number of zonal backends. Spread + # controllers over up to 3 zones, since all GCP regions have at least 3. + zones = "${slice(data.google_compute_zones.all.names, 0, 3)}" + controllers_ipv4_public = ["${google_compute_instance.controllers.*.network_interface.0.access_config.0.assigned_nat_ip}"] +} + # Controller instances resource "google_compute_instance" "controllers" { count = "${var.controller_count}" name = "${var.cluster_name}-controller-${count.index}" - zone = "${element(data.google_compute_zones.all.names, count.index)}" + zone = "${element(local.zones, count.index)}" machine_type = "${var.controller_type}" metadata { @@ -51,10 +58,6 @@ resource "google_compute_instance" "controllers" { tags = ["${var.cluster_name}-controller"] } -locals { - controllers_ipv4_public = ["${google_compute_instance.controllers.*.network_interface.0.access_config.0.assigned_nat_ip}"] -} - # Controller Container Linux Config data "template_file" "controller_config" { count = "${var.controller_count}" diff --git a/google-cloud/container-linux/kubernetes/ssh.tf b/google-cloud/container-linux/kubernetes/ssh.tf index 692fbb24..bba9b9fc 100644 --- a/google-cloud/container-linux/kubernetes/ssh.tf +++ b/google-cloud/container-linux/kubernetes/ssh.tf @@ -66,7 +66,7 @@ resource "null_resource" "bootkube-start" { depends_on = [ "module.bootkube", "module.workers", - "google_dns_record_set.controllers", + "google_dns_record_set.apiserver", "null_resource.copy-controller-secrets", ]