Nginx-Ingress-Controller端口映射原理解析

118

我们通过自定义的ingress部署文件部署了ingress之后,会被controller自动解析到对应的服务,然后用户访问就能直接访问到服务了。但是这里有个问题需要注意:ingress域名 -> controller这一段是如何实现的?原理是什么?我这里搞个案例给大家说明一下

前提

假如Nginx-Ingress-Controller的部署方式是daemonset,则每个节点都会有controller的pod,假如部署方式是Deployment,则只有单一节点有该pod,并且端口暴露的方式就是nodeport。这个原理过于简单,我们就不做讨论了,本章只讨论daemonset部署方式的controller的pod实现原理

示例

  1. 新建一个ingress的部署文件

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  annotations:
    nginx.ingress.kubernetes.io/proxy-body-size: 1024m
    nginx.ingress.kubernetes.io/rewrite-target: /
  name: grafana7
  namespace: frontend-ingress
spec:
  ingressClassName: nginx
  rules:
  - host: grafana7.test.cn
    http:
      paths:
      - backend:
          service:
            name: grafana7
            port:
              number: 3000
        path: /
        pathType: Prefix
  1. ingress生效之后,我们可以进入到ingress-controller的pod里面,查看到生效的配置

## start server grafana7.test.cn
server {
        server_name grafana7.test.cn ;

        listen 80  ;
        listen 443  ssl http2 ;

        set $proxy_upstream_name "-";

        ssl_certificate_by_lua_block {
                certificate.call()
        }

        location / {

                set $namespace      "frontend-ingress";
                set $ingress_name   "grafana7";
                set $service_name   "grafana7";
                set $service_port   "3000";
                set $location_path  "/";
                set $global_rate_limit_exceeding n;

                rewrite_by_lua_block {
                        lua_ingress.rewrite({
                                force_ssl_redirect = false,
                                ssl_redirect = true,
                                force_no_ssl_redirect = false,
                                preserve_trailing_slash = false,
                                use_port_in_redirects = false,
                                global_throttle = { namespace = "", limit = 0, window_size = 0, key = { }, ignored_cidrs = { } },
                        })
                        balancer.rewrite()
                        plugins.run()
                }

                # be careful with `access_by_lua_block` and `satisfy any` directives as satisfy any
                # will always succeed when there's `access_by_lua_block` that does not have any lua code doing `ngx.exit(ngx.DECLINED)`
                # other authentication method such as basic auth or external auth useless - all requests will be allowed.
                #access_by_lua_block {
                #}

                header_filter_by_lua_block {
                        lua_ingress.header()
                        plugins.run()
                }

                body_filter_by_lua_block {
                        plugins.run()
                }

                log_by_lua_block {
                        balancer.log()

                        monitor.call()

                        plugins.run()
                }

                port_in_redirect off;

                set $balancer_ewma_score -1;
                set $proxy_upstream_name "frontend-ingress-grafana7-3000";
                set $proxy_host          $proxy_upstream_name;
                set $pass_access_scheme  $scheme;

                set $pass_server_port    $server_port;

                set $best_http_host      $http_host;
                set $pass_port           $pass_server_port;

                set $proxy_alternative_upstream_name "";

                client_max_body_size                    1024m;

                proxy_set_header Host                   $best_http_host;

                # Pass the extracted client certificate to the backend

                # Allow websocket connections
                proxy_set_header                        Upgrade           $http_upgrade;

                proxy_set_header                        Connection        $connection_upgrade;

                proxy_set_header X-Request-ID           $req_id;
                proxy_set_header X-Real-IP              $remote_addr;

                proxy_set_header X-Forwarded-For        $remote_addr;

                proxy_set_header X-Forwarded-Host       $best_http_host;
                proxy_set_header X-Forwarded-Port       $pass_port;
                proxy_set_header X-Forwarded-Proto      $pass_access_scheme;
                proxy_set_header X-Forwarded-Scheme     $pass_access_scheme;

                proxy_set_header X-Scheme               $pass_access_scheme;

                # Pass the original X-Forwarded-For
                proxy_set_header X-Original-Forwarded-For $http_x_forwarded_for;

                # mitigate HTTPoxy Vulnerability
                # https://www.nginx.com/blog/mitigating-the-httpoxy-vulnerability-with-nginx/
                proxy_set_header Proxy                  "";

                # Custom headers to proxied server

                proxy_connect_timeout                   5s;
                proxy_send_timeout                      60s;
                proxy_read_timeout                      60s;

                proxy_buffering                         off;
                proxy_buffer_size                       4k;
                proxy_buffers                           4 4k;

                proxy_max_temp_file_size                1024m;

                proxy_request_buffering                 on;
                proxy_http_version                      1.1;

                proxy_cookie_domain                     off;
                proxy_cookie_path                       off;

                # In case of errors try the next upstream server before returning an error
                proxy_next_upstream                     error timeout;
                proxy_next_upstream_timeout             0;
                proxy_next_upstream_tries               3;

                proxy_pass http://upstream_balancer;

                proxy_redirect                          off;

        }

}
## end server grafana7.test.cn
  1. 接着在前端Nginx或者修改本地Host的方式映射到K8S的节点+80或443即可访问

upstream k8s_80 {
    server 10.x.x.x:80;
    server 10.x.x.x:80;
    server 10.x.x.x:80;
    server 10.x.x.x:80;
}

原理剖析

我们通过查看controller的daemonset的详细文件可以看到,端口的映射这里使用了hostPort

ports:
  - containerPort: 80
    hostPort: 80
    name: http
    protocol: TCP
  - containerPort: 443
    hostPort: 443
    name: https
    protocol: TCP

经过查看官方文档,可以看到hostPort是通过iptables转发规则,把来自所有地方到本机的80和443端口的请求都转发到controller-pod的IP地址和80、443

可以到任意节点上查看示例配置如下:

iptables -S -t nat | grep -E ':80|:443'
-A CNI-DN-6c162218cbbfdbd8f6c5d -p tcp -m tcp --dport 80 -j DNAT --to-destination 10.42.1.11:80
-A CNI-DN-6c162218cbbfdbd8f6c5d -p tcp -m tcp --dport 443 -j DNAT --to-destination 10.42.1.11:443


iptables -L -t nat | grep -E ':80|:443'
DNAT       tcp  --  anywhere             anywhere             tcp dpt:http to:10.42.1.11:80
DNAT       tcp  --  anywhere             anywhere             tcp dpt:https to:10.42.1.11:443

其中10.42.1.11的地址就是controller-pod的IP地址

我们再查看一下-S输出的规则中的CNI的请求链详情:

iptables -L CNI-DN-6c162218cbbfdbd8f6c5d -t nat -v -n

Chain CNI-DN-6c162218cbbfdbd8f6c5d (1 references)
 pkts bytes target     prot opt in     out     source               destination         
    0     0 CNI-HOSTPORT-SETMARK  tcp  --  *      *       10.42.1.11           0.0.0.0/0            tcp dpt:80
    0     0 CNI-HOSTPORT-SETMARK  tcp  --  *      *       127.0.0.1            0.0.0.0/0            tcp dpt:80
 213K   13M DNAT       tcp  --  *      *       0.0.0.0/0            0.0.0.0/0            tcp dpt:80 to:10.42.1.11:80
    0     0 CNI-HOSTPORT-SETMARK  tcp  --  *      *       10.42.1.11           0.0.0.0/0            tcp dpt:443
    0     0 CNI-HOSTPORT-SETMARK  tcp  --  *      *       127.0.0.1            0.0.0.0/0            tcp dpt:443
  12M  739M DNAT       tcp  --  *      *       0.0.0.0/0            0.0.0.0/0            tcp dpt:443 to:10.42.1.11:443

接着查看一下该CNI请求链的引用

iptables -L -t nat -v -n | grep CNI-DN-6c162218cbbfdbd8f6c5d

Chain CNI-DN-6c162218cbbfdbd8f6c5d (1 references)
  13M  752M CNI-DN-6c162218cbbfdbd8f6c5d  tcp  --  *      *       0.0.0.0/0            0.0.0.0/0            /* dnat name: "k8s-pod-network" id: "94b32bad2bc576900d5d716897c0adbdc0b3435a1684578730f06727e0cbc4fb" */ multiport dports 80,443

最后我们再附上hostPort和Nodeport的区别

hostPort

NodePort

仅绑定到 Pod 所在节点的端口

绑定到集群中所有节点的端口

通过 iptables的 DNAT 规则实现

通过 kube-proxy的 iptables或 IPVS规则实现

仅在 Pod 所在节点上检查端口冲突

在所有节点上检查端口冲突

较差,仅适用于单个节点

较好,适用于整个集群