src/event/quic/ngx_event_quic_bpf.c - nginx source code

Global variables defined

Data types defined

Functions defined

Macros defined

Source code


  1. /*
  2. * Copyright (C) Nginx, Inc.
  3. */


  4. #include <ngx_config.h>
  5. #include <ngx_core.h>


  6. #define NGX_QUIC_BPF_VARNAME  "NGINX_BPF_MAPS"
  7. #define NGX_QUIC_BPF_VARSEP    ';'
  8. #define NGX_QUIC_BPF_ADDRSEP   '#'


  9. #define ngx_quic_bpf_get_conf(cycle)                                          \
  10.     (ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module)

  11. #define ngx_quic_bpf_get_old_conf(cycle)                                      \
  12.     cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle)      \
  13.                                : NULL

  14. #define ngx_core_get_conf(cycle)                                              \
  15.     (ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module)


  16. typedef struct {
  17.     ngx_queue_t           queue;
  18.     int                   map_fd;

  19.     struct sockaddr      *sockaddr;
  20.     socklen_t             socklen;
  21.     ngx_uint_t            unused;     /* unsigned  unused:1; */
  22. } ngx_quic_sock_group_t;


  23. typedef struct {
  24.     ngx_flag_t            enabled;
  25.     ngx_uint_t            map_size;
  26.     ngx_queue_t           groups;     /* of ngx_quic_sock_group_t */
  27. } ngx_quic_bpf_conf_t;


  28. static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle);
  29. static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle);

  30. static void ngx_quic_bpf_cleanup(void *data);
  31. static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd,
  32.     const char *name);

  33. static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf,
  34.     ngx_listening_t *ls);
  35. static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle,
  36.     struct sockaddr *sa, socklen_t socklen);
  37. static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle,
  38.     ngx_listening_t *ls);
  39. static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle,
  40.     ngx_listening_t *ls);
  41. static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,
  42.     ngx_listening_t *ls);
  43. static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log);

  44. static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle);
  45. static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle);

  46. extern ngx_bpf_program_t  ngx_quic_reuseport_helper;


  47. static ngx_command_t  ngx_quic_bpf_commands[] = {

  48.     { ngx_string("quic_bpf"),
  49.       NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG,
  50.       ngx_conf_set_flag_slot,
  51.       0,
  52.       offsetof(ngx_quic_bpf_conf_t, enabled),
  53.       NULL },

  54.       ngx_null_command
  55. };


  56. static ngx_core_module_t  ngx_quic_bpf_module_ctx = {
  57.     ngx_string("quic_bpf"),
  58.     ngx_quic_bpf_create_conf,
  59.     NULL
  60. };


  61. ngx_module_t  ngx_quic_bpf_module = {
  62.     NGX_MODULE_V1,
  63.     &ngx_quic_bpf_module_ctx,              /* module context */
  64.     ngx_quic_bpf_commands,                 /* module directives */
  65.     NGX_CORE_MODULE,                       /* module type */
  66.     NULL,                                  /* init master */
  67.     ngx_quic_bpf_module_init,              /* init module */
  68.     NULL,                                  /* init process */
  69.     NULL,                                  /* init thread */
  70.     NULL,                                  /* exit thread */
  71.     NULL,                                  /* exit process */
  72.     NULL,                                  /* exit master */
  73.     NGX_MODULE_V1_PADDING
  74. };


  75. static void *
  76. ngx_quic_bpf_create_conf(ngx_cycle_t *cycle)
  77. {
  78.     ngx_quic_bpf_conf_t  *bcf;

  79.     bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t));
  80.     if (bcf == NULL) {
  81.         return NULL;
  82.     }

  83.     bcf->enabled = NGX_CONF_UNSET;
  84.     bcf->map_size = NGX_CONF_UNSET_UINT;

  85.     ngx_queue_init(&bcf->groups);

  86.     return bcf;
  87. }


  88. static ngx_int_t
  89. ngx_quic_bpf_module_init(ngx_cycle_t *cycle)
  90. {
  91.     ngx_uint_t            i;
  92.     ngx_listening_t      *ls;
  93.     ngx_core_conf_t      *ccf;
  94.     ngx_pool_cleanup_t   *cln;
  95.     ngx_quic_bpf_conf_t  *bcf;

  96.     if (ngx_test_config) {
  97.         /*
  98.          * during config test, SO_REUSEPORT socket option is
  99.          * not set, thus making further processing meaningless
  100.          */
  101.         return NGX_OK;
  102.     }

  103.     ccf = ngx_core_get_conf(cycle);
  104.     bcf = ngx_quic_bpf_get_conf(cycle);

  105.     ngx_conf_init_value(bcf->enabled, 0);

  106.     bcf->map_size = ccf->worker_processes * 4;

  107.     cln = ngx_pool_cleanup_add(cycle->pool, 0);
  108.     if (cln == NULL) {
  109.         goto failed;
  110.     }

  111.     cln->data = bcf;
  112.     cln->handler = ngx_quic_bpf_cleanup;

  113.     if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) {
  114.         if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) {
  115.             goto failed;
  116.         }
  117.     }

  118.     ls = cycle->listening.elts;

  119.     for (i = 0; i < cycle->listening.nelts; i++) {
  120.         if (ls[i].quic && ls[i].reuseport) {
  121.             if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) {
  122.                 goto failed;
  123.             }
  124.         }
  125.     }

  126.     if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) {
  127.         goto failed;
  128.     }

  129.     return NGX_OK;

  130. failed:

  131.     if (ngx_is_init_cycle(cycle->old_cycle)) {
  132.         ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  133.                       "ngx_quic_bpf_module failed to initialize, check limits");

  134.         /* refuse to start */
  135.         return NGX_ERROR;
  136.     }

  137.     /*
  138.      * returning error now will lead to master process exiting immediately
  139.      * leaving worker processes orphaned, what is really unexpected.
  140.      * Instead, just issue a not about failed initialization and try
  141.      * to cleanup a bit. Still program can be already loaded to kernel
  142.      * for some reuseport groups, and there is no way to revert, so
  143.      * behaviour may be inconsistent.
  144.      */

  145.     ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  146.                   "ngx_quic_bpf_module failed to initialize properly, ignored."
  147.                   "please check limits and note that nginx state now "
  148.                   "can be inconsistent and restart may be required");

  149.     return NGX_OK;
  150. }


  151. static void
  152. ngx_quic_bpf_cleanup(void *data)
  153. {
  154.     ngx_quic_bpf_conf_t  *bcf = (ngx_quic_bpf_conf_t *) data;

  155.     ngx_queue_t            *q;
  156.     ngx_quic_sock_group_t  *grp;

  157.     for (q = ngx_queue_head(&bcf->groups);
  158.          q != ngx_queue_sentinel(&bcf->groups);
  159.          q = ngx_queue_next(q))
  160.     {
  161.         grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

  162.         ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map");
  163.     }
  164. }


  165. static ngx_inline void
  166. ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name)
  167. {
  168.     if (close(fd) != -1) {
  169.         return;
  170.     }

  171.     ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
  172.                   "quic bpf close %s fd:%d failed", name, fd);
  173. }


  174. static ngx_quic_sock_group_t *
  175. ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls)
  176. {
  177.     ngx_queue_t            *q;
  178.     ngx_quic_sock_group_t  *grp;

  179.     for (q = ngx_queue_head(&bcf->groups);
  180.          q != ngx_queue_sentinel(&bcf->groups);
  181.          q = ngx_queue_next(q))
  182.     {
  183.         grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

  184.         if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen,
  185.                              grp->sockaddr, grp->socklen, 1)
  186.             == NGX_OK)
  187.         {
  188.             return grp;
  189.         }
  190.     }

  191.     return NULL;
  192. }


  193. static ngx_quic_sock_group_t *
  194. ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa,
  195.     socklen_t socklen)
  196. {
  197.     ngx_quic_bpf_conf_t    *bcf;
  198.     ngx_quic_sock_group_t  *grp;

  199.     bcf = ngx_quic_bpf_get_conf(cycle);

  200.     grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t));
  201.     if (grp == NULL) {
  202.         return NULL;
  203.     }

  204.     grp->socklen = socklen;
  205.     grp->sockaddr = ngx_palloc(cycle->pool, socklen);
  206.     if (grp->sockaddr == NULL) {
  207.         return NULL;
  208.     }
  209.     ngx_memcpy(grp->sockaddr, sa, socklen);

  210.     ngx_queue_insert_tail(&bcf->groups, &grp->queue);

  211.     return grp;
  212. }


  213. static ngx_quic_sock_group_t *
  214. ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
  215. {
  216.     int                     progfd, failed, flags, rc;
  217.     ngx_quic_bpf_conf_t    *bcf;
  218.     ngx_quic_sock_group_t  *grp;

  219.     bcf = ngx_quic_bpf_get_conf(cycle);

  220.     if (!bcf->enabled) {
  221.         return NULL;
  222.     }

  223.     grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
  224.     if (grp == NULL) {
  225.         return NULL;
  226.     }

  227.     grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH,
  228.                                      sizeof(uint64_t), sizeof(uint64_t),
  229.                                      bcf->map_size, 0);
  230.     if (grp->map_fd == -1) {
  231.         goto failed;
  232.     }

  233.     flags = fcntl(grp->map_fd, F_GETFD);
  234.     if (flags == -1) {
  235.         ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
  236.                       "quic bpf getfd failed");
  237.         goto failed;
  238.     }

  239.     /* need to inherit map during binary upgrade after exec */
  240.     flags &= ~FD_CLOEXEC;

  241.     rc = fcntl(grp->map_fd, F_SETFD, flags);
  242.     if (rc == -1) {
  243.         ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
  244.                       "quic bpf setfd failed");
  245.         goto failed;
  246.     }

  247.     ngx_bpf_program_link(&ngx_quic_reuseport_helper,
  248.                          "ngx_quic_sockmap", grp->map_fd);

  249.     progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper);
  250.     if (progfd < 0) {
  251.         goto failed;
  252.     }

  253.     failed = 0;

  254.     if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
  255.                    &progfd, sizeof(int))
  256.         == -1)
  257.     {
  258.         ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno,
  259.                       "quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed");
  260.         failed = 1;
  261.     }

  262.     ngx_quic_bpf_close(cycle->log, progfd, "program");

  263.     if (failed) {
  264.         goto failed;
  265.     }

  266.     ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
  267.                    "quic bpf sockmap created fd:%d", grp->map_fd);
  268.     return grp;

  269. failed:

  270.     if (grp->map_fd != -1) {
  271.         ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
  272.     }

  273.     ngx_queue_remove(&grp->queue);

  274.     return NULL;
  275. }


  276. static ngx_quic_sock_group_t *
  277. ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
  278. {
  279.     ngx_quic_bpf_conf_t    *bcf, *old_bcf;
  280.     ngx_quic_sock_group_t  *grp, *ogrp;

  281.     bcf = ngx_quic_bpf_get_conf(cycle);

  282.     grp = ngx_quic_bpf_find_group(bcf, ls);
  283.     if (grp) {
  284.         return grp;
  285.     }

  286.     old_bcf = ngx_quic_bpf_get_old_conf(cycle);

  287.     if (old_bcf == NULL) {
  288.         return ngx_quic_bpf_create_group(cycle, ls);
  289.     }

  290.     ogrp = ngx_quic_bpf_find_group(old_bcf, ls);
  291.     if (ogrp == NULL) {
  292.         return ngx_quic_bpf_create_group(cycle, ls);
  293.     }

  294.     grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
  295.     if (grp == NULL) {
  296.         return NULL;
  297.     }

  298.     grp->map_fd = dup(ogrp->map_fd);
  299.     if (grp->map_fd == -1) {
  300.         ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
  301.                       "quic bpf failed to duplicate bpf map descriptor");

  302.         ngx_queue_remove(&grp->queue);

  303.         return NULL;
  304.     }

  305.     ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
  306.                    "quic bpf sockmap fd duplicated old:%d new:%d",
  307.                    ogrp->map_fd, grp->map_fd);

  308.     return grp;
  309. }


  310. static ngx_int_t
  311. ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,  ngx_listening_t *ls)
  312. {
  313.     uint64_t                cookie;
  314.     ngx_quic_bpf_conf_t    *bcf;
  315.     ngx_quic_sock_group_t  *grp;

  316.     bcf = ngx_quic_bpf_get_conf(cycle);

  317.     grp = ngx_quic_bpf_get_group(cycle, ls);

  318.     if (grp == NULL) {
  319.         if (!bcf->enabled) {
  320.             return NGX_OK;
  321.         }

  322.         return NGX_ERROR;
  323.     }

  324.     grp->unused = 0;

  325.     cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log);
  326.     if (cookie == (uint64_t) NGX_ERROR) {
  327.         return NGX_ERROR;
  328.     }

  329.     /* map[cookie] = socket; for use in kernel helper */
  330.     if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) {
  331.         ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
  332.                       "quic bpf failed to update socket map key=%xL", cookie);
  333.         return NGX_ERROR;
  334.     }

  335.     ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
  336.                  "quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%ui",
  337.                  grp->map_fd, ls->fd, cookie, ls->worker);

  338.     /* do not inherit this socket */
  339.     ls->ignore = 1;

  340.     return NGX_OK;
  341. }


  342. static uint64_t
  343. ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log)
  344. {
  345.     uint64_t   cookie;
  346.     socklen_t  optlen;

  347.     optlen = sizeof(cookie);

  348.     if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) {
  349.         ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
  350.                       "quic bpf getsockopt(SO_COOKIE) failed");

  351.         return (ngx_uint_t) NGX_ERROR;
  352.     }

  353.     return cookie;
  354. }


  355. static ngx_int_t
  356. ngx_quic_bpf_export_maps(ngx_cycle_t *cycle)
  357. {
  358.     u_char                 *p, *buf;
  359.     size_t                  len;
  360.     ngx_str_t              *var;
  361.     ngx_queue_t            *q;
  362.     ngx_core_conf_t        *ccf;
  363.     ngx_quic_bpf_conf_t    *bcf;
  364.     ngx_quic_sock_group_t  *grp;

  365.     ccf = ngx_core_get_conf(cycle);
  366.     bcf = ngx_quic_bpf_get_conf(cycle);

  367.     len = sizeof(NGX_QUIC_BPF_VARNAME) + 1;

  368.     q = ngx_queue_head(&bcf->groups);

  369.     while (q != ngx_queue_sentinel(&bcf->groups)) {

  370.         grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

  371.         q = ngx_queue_next(q);

  372.         if (grp->unused) {
  373.             /*
  374.              * map was inherited, but it is not used in this configuration;
  375.              * do not pass such map further and drop the group to prevent
  376.              * interference with changes during reload
  377.              */

  378.             ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
  379.             ngx_queue_remove(&grp->queue);

  380.             continue;
  381.         }

  382.         len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1;
  383.     }

  384.     len++;

  385.     buf = ngx_palloc(cycle->pool, len);
  386.     if (buf == NULL) {
  387.         return NGX_ERROR;
  388.     }

  389.     p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=",
  390.                    sizeof(NGX_QUIC_BPF_VARNAME));

  391.     for (q = ngx_queue_head(&bcf->groups);
  392.          q != ngx_queue_sentinel(&bcf->groups);
  393.          q = ngx_queue_next(q))
  394.     {
  395.         grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);

  396.         p = ngx_sprintf(p, "%ud", grp->map_fd);

  397.         *p++ = NGX_QUIC_BPF_ADDRSEP;

  398.         p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p,
  399.                            NGX_SOCKADDR_STRLEN, 1);

  400.         *p++ = NGX_QUIC_BPF_VARSEP;
  401.     }

  402.     *p = '\0';

  403.     var = ngx_array_push(&ccf->env);
  404.     if (var == NULL) {
  405.         return NGX_ERROR;
  406.     }

  407.     var->data = buf;
  408.     var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1;

  409.     return NGX_OK;
  410. }


  411. static ngx_int_t
  412. ngx_quic_bpf_import_maps(ngx_cycle_t *cycle)
  413. {
  414.     int                     s;
  415.     u_char                 *inherited, *p, *v;
  416.     ngx_uint_t              in_fd;
  417.     ngx_addr_t              tmp;
  418.     ngx_quic_bpf_conf_t    *bcf;
  419.     ngx_quic_sock_group_t  *grp;

  420.     inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME);

  421.     if (inherited == NULL) {
  422.         return NGX_OK;
  423.     }

  424.     bcf = ngx_quic_bpf_get_conf(cycle);

  425. #if (NGX_SUPPRESS_WARN)
  426.     s = -1;
  427. #endif

  428.     in_fd = 1;

  429.     for (p = inherited, v = p; *p; p++) {

  430.         switch (*p) {

  431.         case NGX_QUIC_BPF_ADDRSEP:

  432.             if (!in_fd) {
  433.                 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  434.                               "quic bpf failed to parse inherited env");
  435.                 return NGX_ERROR;
  436.             }
  437.             in_fd = 0;

  438.             s = ngx_atoi(v, p - v);
  439.             if (s == NGX_ERROR) {
  440.                 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  441.                               "quic bpf failed to parse inherited map fd");
  442.                 return NGX_ERROR;
  443.             }

  444.             v = p + 1;
  445.             break;

  446.         case NGX_QUIC_BPF_VARSEP:

  447.             if (in_fd) {
  448.                 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  449.                               "quic bpf failed to parse inherited env");
  450.                 return NGX_ERROR;
  451.             }
  452.             in_fd = 1;

  453.             grp = ngx_pcalloc(cycle->pool,
  454.                               sizeof(ngx_quic_sock_group_t));
  455.             if (grp == NULL) {
  456.                 return NGX_ERROR;
  457.             }

  458.             grp->map_fd = s;

  459.             if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v)
  460.                 != NGX_OK)
  461.             {
  462.                 ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
  463.                               "quic bpf failed to parse inherited"
  464.                               " address '%*s'", p - v , v);

  465.                 ngx_quic_bpf_close(cycle->log, s, "inherited map");

  466.                 return NGX_ERROR;
  467.             }

  468.             grp->sockaddr = tmp.sockaddr;
  469.             grp->socklen = tmp.socklen;

  470.             grp->unused = 1;

  471.             ngx_queue_insert_tail(&bcf->groups, &grp->queue);

  472.             ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
  473.                            "quic bpf sockmap inherited with "
  474.                            "fd:%d address:%*s",
  475.                            grp->map_fd, p - v, v);
  476.             v = p + 1;
  477.             break;

  478.         default:
  479.             break;
  480.         }
  481.     }

  482.     return NGX_OK;
  483. }