QUIC: ngx_quic_bpf module.

The quic kernel bpf helper inspects packet payload for DCID, extracts key
and routes the packet into socket matching the key.

Due to reuseport feature, each worker owns a personal socket, which is
identified by the same key, used to create DCID.

BPF objects are locked in RAM and are subject to RLIMIT_MEMLOCK.
The "ulimit -l" command may be used to setup proper limits, if maps
cannot be created with EPERM or updated with ETOOLONG.
This commit is contained in:
Vladimir Homutov 2020-12-25 15:01:15 +03:00
parent ff201f3fe3
commit 5a3c80e12a
12 changed files with 1145 additions and 0 deletions

View file

@ -1345,6 +1345,22 @@ if [ $USE_OPENSSL$USE_OPENSSL_QUIC = YESYES ]; then
ngx_module_order=
. auto/module
if [ $NGX_QUIC_BPF$BPF_FOUND$SO_COOKIE_FOUND = YESYESYES ]; then
ngx_module_type=CORE
ngx_module_name=ngx_quic_bpf_module
ngx_module_incs=
ngx_module_deps=
ngx_module_srcs="src/event/quic/ngx_event_quic_bpf.c \
src/event/quic/ngx_event_quic_bpf_code.c"
ngx_module_libs=
ngx_module_link=YES
ngx_module_order=
. auto/module
have=NGX_QUIC_BPF . auto/have
fi
fi

View file

@ -45,6 +45,8 @@ USE_THREADS=NO
NGX_FILE_AIO=NO
NGX_QUIC_BPF=YES
HTTP=YES
NGX_HTTP_LOG_PATH=
@ -170,6 +172,7 @@ NGX_GOOGLE_PERFTOOLS=NO
NGX_CPP_TEST=NO
BPF_FOUND=NO
SO_COOKIE_FOUND=NO
NGX_LIBATOMIC=NO
@ -216,6 +219,8 @@ do
--with-file-aio) NGX_FILE_AIO=YES ;;
--without-quic_bpf_module) NGX_QUIC_BPF=NO ;;
--with-ipv6)
NGX_POST_CONF_MSG="$NGX_POST_CONF_MSG
$0: warning: the \"--with-ipv6\" option is deprecated"
@ -450,6 +455,8 @@ cat << END
--with-file-aio enable file AIO support
--without-quic_bpf_module disable ngx_quic_bpf_module
--with-http_ssl_module enable ngx_http_ssl_module
--with-http_quic_module enable ngx_http_quic_module
--with-http_v2_module enable ngx_http_v2_module

View file

@ -234,3 +234,41 @@ if [ $ngx_found = yes ]; then
CORE_SRCS="$CORE_SRCS src/core/ngx_bpf.c"
CORE_DEPS="$CORE_DEPS src/core/ngx_bpf.h"
fi
# SO_COOKIE socket option
ngx_feature="SO_COOKIE"
ngx_feature_name="NGX_HAVE_SO_COOKIE"
ngx_feature_run=no
ngx_feature_incs="#include <sys/socket.h>
#include <stdint.h>"
ngx_feature_path=
ngx_feature_libs=
ngx_feature_test="socklen_t optlen = sizeof(uint64_t);
uint64_t cookie;
getsockopt(0, SOL_SOCKET, SO_COOKIE, &cookie, &optlen)"
. auto/feature
if [ $ngx_found = yes ]; then
SO_COOKIE_FOUND=YES
have=NGX_HAVE_SO_COOKIE . auto/have
fi
# ngx_quic_bpf module uses sockhash to select socket from reuseport group,
# support appeared in Linux-5.7:
#
# commit: 9fed9000c5c6cacfcaaa48aff74818072ae294cc
# bpf: Allow selecting reuseport socket from a SOCKMAP/SOCKHASH
#
if [ $NGX_QUIC_BPF$BPF_FOUND = YESYES ]; then
echo $ngx_n "checking for kernel with reuseport/BPF support...$ngx_c"
if [ $version -lt 329472 ]; then
echo " not found (at least 5.7 is required)"
NGX_QUIC_BPF=NO
else
echo " found"
fi
fi

View file

@ -680,6 +680,9 @@ ngx_exec_new_binary(ngx_cycle_t *cycle, char *const *argv)
ls = cycle->listening.elts;
for (i = 0; i < cycle->listening.nelts; i++) {
if (ls[i].ignore) {
continue;
}
p = ngx_sprintf(p, "%ud;", ls[i].fd);
}

View file

@ -0,0 +1,113 @@
#!/bin/bash
export LANG=C
set -e
if [ $# -lt 1 ]; then
echo "Usage: PROGNAME=foo LICENSE=bar $0 <bpf object file>"
exit 1
fi
self=$0
filename=$1
funcname=$PROGNAME
generate_head()
{
cat << END
/* AUTO-GENERATED, DO NOT EDIT. */
#include <stddef.h>
#include <stdint.h>
#include "ngx_bpf.h"
END
}
generate_tail()
{
cat << END
ngx_bpf_program_t $PROGNAME = {
.relocs = bpf_reloc_prog_$funcname,
.nrelocs = sizeof(bpf_reloc_prog_$funcname)
/ sizeof(bpf_reloc_prog_$funcname[0]),
.ins = bpf_insn_prog_$funcname,
.nins = sizeof(bpf_insn_prog_$funcname)
/ sizeof(bpf_insn_prog_$funcname[0]),
.license = "$LICENSE",
.type = BPF_PROG_TYPE_SK_REUSEPORT,
};
END
}
process_relocations()
{
echo "static ngx_bpf_reloc_t bpf_reloc_prog_$funcname[] = {"
objdump -r $filename | awk '{
if (enabled && $NF > 0) {
off = strtonum(sprintf("0x%s", $1));
name = $3;
printf(" { \"%s\", %d },\n", name, off/8);
}
if ($1 == "OFFSET") {
enabled=1;
}
}'
echo "};"
echo
}
process_section()
{
echo "static struct bpf_insn bpf_insn_prog_$funcname[] = {"
echo " /* opcode dst src offset imm */"
section_info=$(objdump -h $filename --section=$funcname | grep "1 $funcname")
# dd doesn't know hex
length=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f3))
offset=$(printf "%d" 0x$(echo $section_info | cut -d ' ' -f6))
for ins in $(dd if="$filename" bs=1 count=$length skip=$offset status=none | xxd -p -c 8)
do
opcode=0x${ins:0:2}
srcdst=0x${ins:2:2}
# bytes are dumped in LE order
offset=0x${ins:6:2}${ins:4:2} # short
immedi=0x${ins:14:2}${ins:12:2}${ins:10:2}${ins:8:2} # int
dst="$(($srcdst & 0xF))"
src="$(($srcdst & 0xF0))"
src="$(($src >> 4))"
opcode=$(printf "0x%x" $opcode)
dst=$(printf "BPF_REG_%d" $dst)
src=$(printf "BPF_REG_%d" $src)
offset=$(printf "%d" $offset)
immedi=$(printf "0x%x" $immedi)
printf " { %4s, %11s, %11s, (int16_t) %6s, %10s },\n" $opcode $dst $src $offset $immedi
done
cat << END
};
END
}
generate_head
process_relocations
process_section
generate_tail

View file

@ -0,0 +1,30 @@
CFLAGS=-O2 -Wall
LICENSE=BSD
PROGNAME=ngx_quic_reuseport_helper
RESULT=ngx_event_quic_bpf_code
DEST=../$(RESULT).c
all: $(RESULT)
$(RESULT): $(PROGNAME).o
LICENSE=$(LICENSE) PROGNAME=$(PROGNAME) bash ./bpfgen.sh $< > $@
DEFS=-DPROGNAME=\"$(PROGNAME)\" \
-DLICENSE_$(LICENSE) \
-DLICENSE=\"$(LICENSE)\" \
$(PROGNAME).o: $(PROGNAME).c
clang $(CFLAGS) $(DEFS) -target bpf -c $< -o $@
install: $(RESULT)
cp $(RESULT) $(DEST)
clean:
@rm -f $(RESULT) *.o
debug: $(PROGNAME).o
llvm-objdump -S -no-show-raw-insn $<
.DELETE_ON_ERROR:

View file

@ -0,0 +1,140 @@
#include <errno.h>
#include <linux/string.h>
#include <linux/udp.h>
#include <linux/bpf.h>
/*
* the bpf_helpers.h is not included into linux-headers, only available
* with kernel sources in "tools/lib/bpf/bpf_helpers.h" or in libbpf.
*/
#include <bpf/bpf_helpers.h>
#if !defined(SEC)
#define SEC(NAME) __attribute__((section(NAME), used))
#endif
#if defined(LICENSE_GPL)
/*
* To see debug:
*
* echo 1 > /sys/kernel/debug/tracing/events/bpf_trace/enable
* cat /sys/kernel/debug/tracing/trace_pipe
* echo 0 > /sys/kernel/debug/tracing/events/bpf_trace/enable
*/
#define debugmsg(fmt, ...) \
do { \
char __buf[] = fmt; \
bpf_trace_printk(__buf, sizeof(__buf), ##__VA_ARGS__); \
} while (0)
#else
#define debugmsg(fmt, ...)
#endif
char _license[] SEC("license") = LICENSE;
/*****************************************************************************/
#define NGX_QUIC_PKT_LONG 0x80 /* header form */
#define NGX_QUIC_SERVER_CID_LEN 20
#define advance_data(nbytes) \
offset += nbytes; \
if (start + offset > end) { \
debugmsg("cannot read %ld bytes at offset %ld", nbytes, offset); \
goto failed; \
} \
data = start + offset - 1;
#define ngx_quic_parse_uint64(p) \
(((__u64)(p)[0] << 56) | \
((__u64)(p)[1] << 48) | \
((__u64)(p)[2] << 40) | \
((__u64)(p)[3] << 32) | \
(p)[4] << 24 | \
(p)[5] << 16 | \
(p)[6] << 8 | \
(p)[7])
/*
* actual map object is created by the "bpf" system call,
* all pointers to this variable are replaced by the bpf loader
*/
struct bpf_map_def SEC("maps") ngx_quic_sockmap;
SEC(PROGNAME)
int ngx_quic_select_socket_by_dcid(struct sk_reuseport_md *ctx)
{
int rc;
__u64 key;
size_t len, offset;
unsigned char *start, *end, *data, *dcid;
start = ctx->data;
end = (unsigned char *) ctx->data_end;
offset = 0;
advance_data(sizeof(struct udphdr)); /* skip UDP header */
advance_data(1); /* QUIC flags */
if (data[0] & NGX_QUIC_PKT_LONG) {
advance_data(4); /* skip QUIC version */
len = data[0]; /* read DCID length */
if (len < 8) {
/* it's useless to search for key in such short DCID */
return SK_PASS;
}
advance_data(1); /* skip DCID len */
} else {
len = NGX_QUIC_SERVER_CID_LEN;
}
dcid = &data[1];
advance_data(len); /* we expect the packet to have full DCID */
/* make verifier happy */
if (dcid + sizeof(__u64) > end) {
goto failed;
}
key = ngx_quic_parse_uint64(dcid);
rc = bpf_sk_select_reuseport(ctx, &ngx_quic_sockmap, &key, 0);
switch (rc) {
case 0:
debugmsg("nginx quic socket selected by key 0x%x", key);
return SK_PASS;
/* kernel returns positive error numbers, errno.h defines positive */
case -ENOENT:
debugmsg("nginx quic default route for key 0x%x", key);
/* let the default reuseport logic decide which socket to choose */
return SK_PASS;
default:
debugmsg("nginx quic bpf_sk_select_reuseport err: %d key 0x%x",
rc, key);
goto failed;
}
failed:
/*
* SK_DROP will generate ICMP, but we may want to process "invalid" packet
* in userspace quic to investigate further and finally react properly
* (maybe ignore, maybe send something in response or close connection)
*/
return SK_PASS;
}

View file

@ -232,6 +232,9 @@ static ngx_int_t ngx_quic_process_stateless_reset(ngx_connection_t *c,
static ngx_int_t ngx_quic_negotiate_version(ngx_connection_t *c,
ngx_quic_header_t *inpkt);
static ngx_int_t ngx_quic_create_server_id(ngx_connection_t *c, u_char *id);
#if (NGX_QUIC_BPF)
static ngx_int_t ngx_quic_bpf_attach_id(ngx_connection_t *c, u_char *id);
#endif
static ngx_int_t ngx_quic_send_retry(ngx_connection_t *c);
static ngx_int_t ngx_quic_new_token(ngx_connection_t *c, ngx_str_t *token);
static ngx_int_t ngx_quic_validate_token(ngx_connection_t *c,
@ -1297,6 +1300,14 @@ ngx_quic_create_server_id(ngx_connection_t *c, u_char *id)
return NGX_ERROR;
}
#if (NGX_QUIC_BPF)
if (ngx_quic_bpf_attach_id(c, id) != NGX_OK) {
ngx_log_error(NGX_LOG_ERR, c->log, 0,
"quic bpf failed to generate socket key");
/* ignore error, things still may work */
}
#endif
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, c->log, 0,
"quic create server id %*xs",
(size_t) NGX_QUIC_SERVER_CID_LEN, id);
@ -1304,6 +1315,34 @@ ngx_quic_create_server_id(ngx_connection_t *c, u_char *id)
}
#if (NGX_QUIC_BPF)
static ngx_int_t
ngx_quic_bpf_attach_id(ngx_connection_t *c, u_char *id)
{
int fd;
uint64_t cookie;
socklen_t optlen;
fd = c->listening->fd;
optlen = sizeof(cookie);
if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) {
ngx_log_error(NGX_LOG_ERR, c->log, ngx_socket_errno,
"quic getsockopt(SO_COOKIE) failed");
return NGX_ERROR;
}
ngx_quic_dcid_encode_key(id, cookie);
return NGX_OK;
}
#endif
static ngx_int_t
ngx_quic_send_retry(ngx_connection_t *c)
{

View file

@ -0,0 +1,649 @@
/*
* Copyright (C) Nginx, Inc.
*/
#include <ngx_config.h>
#include <ngx_core.h>
#define NGX_QUIC_BPF_VARNAME "NGINX_BPF_MAPS"
#define NGX_QUIC_BPF_VARSEP ';'
#define NGX_QUIC_BPF_ADDRSEP '#'
#define ngx_quic_bpf_get_conf(cycle) \
(ngx_quic_bpf_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_quic_bpf_module)
#define ngx_quic_bpf_get_old_conf(cycle) \
cycle->old_cycle->conf_ctx ? ngx_quic_bpf_get_conf(cycle->old_cycle) \
: NULL
#define ngx_core_get_conf(cycle) \
(ngx_core_conf_t *) ngx_get_conf(cycle->conf_ctx, ngx_core_module)
typedef struct {
ngx_queue_t queue;
int map_fd;
struct sockaddr *sockaddr;
socklen_t socklen;
ngx_uint_t unused; /* unsigned unused:1; */
} ngx_quic_sock_group_t;
typedef struct {
ngx_flag_t enabled;
ngx_uint_t map_size;
ngx_queue_t groups; /* of ngx_quic_sock_group_t */
} ngx_quic_bpf_conf_t;
static void *ngx_quic_bpf_create_conf(ngx_cycle_t *cycle);
static ngx_int_t ngx_quic_bpf_module_init(ngx_cycle_t *cycle);
static void ngx_quic_bpf_cleanup(void *data);
static ngx_inline void ngx_quic_bpf_close(ngx_log_t *log, int fd,
const char *name);
static ngx_quic_sock_group_t *ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf,
ngx_listening_t *ls);
static ngx_quic_sock_group_t *ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle,
struct sockaddr *sa, socklen_t socklen);
static ngx_quic_sock_group_t *ngx_quic_bpf_create_group(ngx_cycle_t *cycle,
ngx_listening_t *ls);
static ngx_quic_sock_group_t *ngx_quic_bpf_get_group(ngx_cycle_t *cycle,
ngx_listening_t *ls);
static ngx_int_t ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle,
ngx_listening_t *ls);
static uint64_t ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log);
static ngx_int_t ngx_quic_bpf_export_maps(ngx_cycle_t *cycle);
static ngx_int_t ngx_quic_bpf_import_maps(ngx_cycle_t *cycle);
extern ngx_bpf_program_t ngx_quic_reuseport_helper;
static ngx_command_t ngx_quic_bpf_commands[] = {
{ ngx_string("quic_bpf"),
NGX_MAIN_CONF|NGX_DIRECT_CONF|NGX_CONF_FLAG,
ngx_conf_set_flag_slot,
0,
offsetof(ngx_quic_bpf_conf_t, enabled),
NULL },
ngx_null_command
};
static ngx_core_module_t ngx_quic_bpf_module_ctx = {
ngx_string("quic_bpf"),
ngx_quic_bpf_create_conf,
NULL
};
ngx_module_t ngx_quic_bpf_module = {
NGX_MODULE_V1,
&ngx_quic_bpf_module_ctx, /* module context */
ngx_quic_bpf_commands, /* module directives */
NGX_CORE_MODULE, /* module type */
NULL, /* init master */
ngx_quic_bpf_module_init, /* init module */
NULL, /* init process */
NULL, /* init thread */
NULL, /* exit thread */
NULL, /* exit process */
NULL, /* exit master */
NGX_MODULE_V1_PADDING
};
static void *
ngx_quic_bpf_create_conf(ngx_cycle_t *cycle)
{
ngx_quic_bpf_conf_t *bcf;
bcf = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_bpf_conf_t));
if (bcf == NULL) {
return NULL;
}
bcf->enabled = NGX_CONF_UNSET;
bcf->map_size = NGX_CONF_UNSET_UINT;
ngx_queue_init(&bcf->groups);
return bcf;
}
static ngx_int_t
ngx_quic_bpf_module_init(ngx_cycle_t *cycle)
{
ngx_uint_t i;
ngx_listening_t *ls;
ngx_core_conf_t *ccf;
ngx_pool_cleanup_t *cln;
ngx_quic_bpf_conf_t *bcf;
ccf = ngx_core_get_conf(cycle);
bcf = ngx_quic_bpf_get_conf(cycle);
ngx_conf_init_value(bcf->enabled, 0);
bcf->map_size = ccf->worker_processes * 4;
cln = ngx_pool_cleanup_add(cycle->pool, 0);
if (cln == NULL) {
goto failed;
}
cln->data = bcf;
cln->handler = ngx_quic_bpf_cleanup;
if (ngx_inherited && ngx_is_init_cycle(cycle->old_cycle)) {
if (ngx_quic_bpf_import_maps(cycle) != NGX_OK) {
goto failed;
}
}
ls = cycle->listening.elts;
for (i = 0; i < cycle->listening.nelts; i++) {
if (ls[i].quic && ls[i].reuseport) {
if (ngx_quic_bpf_group_add_socket(cycle, &ls[i]) != NGX_OK) {
goto failed;
}
}
}
if (ngx_quic_bpf_export_maps(cycle) != NGX_OK) {
goto failed;
}
return NGX_OK;
failed:
if (ngx_is_init_cycle(cycle->old_cycle)) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"ngx_quic_bpf_module failed to initialize, check limits");
/* refuse to start */
return NGX_ERROR;
}
/*
* returning error now will lead to master process exiting immediately
* leaving worker processes orphaned, what is really unexpected.
* Instead, just issue a not about failed initialization and try
* to cleanup a bit. Still program can be already loaded to kernel
* for some reuseport groups, and there is no way to revert, so
* behaviour may be inconsistent.
*/
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"ngx_quic_bpf_module failed to initialize properly, ignored."
"please check limits and note that nginx state now "
"can be inconsistent and restart may be required");
return NGX_OK;
}
static void
ngx_quic_bpf_cleanup(void *data)
{
ngx_quic_bpf_conf_t *bcf = (ngx_quic_bpf_conf_t *) data;
ngx_queue_t *q;
ngx_quic_sock_group_t *grp;
for (q = ngx_queue_head(&bcf->groups);
q != ngx_queue_sentinel(&bcf->groups);
q = ngx_queue_next(q))
{
grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
ngx_quic_bpf_close(ngx_cycle->log, grp->map_fd, "map");
}
}
static ngx_inline void
ngx_quic_bpf_close(ngx_log_t *log, int fd, const char *name)
{
if (close(fd) != -1) {
return;
}
ngx_log_error(NGX_LOG_EMERG, log, ngx_errno,
"quic bpf close %s fd:%i failed", name, fd);
}
static ngx_quic_sock_group_t *
ngx_quic_bpf_find_group(ngx_quic_bpf_conf_t *bcf, ngx_listening_t *ls)
{
ngx_queue_t *q;
ngx_quic_sock_group_t *grp;
for (q = ngx_queue_head(&bcf->groups);
q != ngx_queue_sentinel(&bcf->groups);
q = ngx_queue_next(q))
{
grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
if (ngx_cmp_sockaddr(ls->sockaddr, ls->socklen,
grp->sockaddr, grp->socklen, 1)
== NGX_OK)
{
return grp;
}
}
return NULL;
}
static ngx_quic_sock_group_t *
ngx_quic_bpf_alloc_group(ngx_cycle_t *cycle, struct sockaddr *sa,
socklen_t socklen)
{
ngx_quic_bpf_conf_t *bcf;
ngx_quic_sock_group_t *grp;
bcf = ngx_quic_bpf_get_conf(cycle);
grp = ngx_pcalloc(cycle->pool, sizeof(ngx_quic_sock_group_t));
if (grp == NULL) {
return NULL;
}
grp->socklen = socklen;
grp->sockaddr = ngx_palloc(cycle->pool, socklen);
if (grp->sockaddr == NULL) {
return NULL;
}
ngx_memcpy(grp->sockaddr, sa, socklen);
ngx_queue_insert_tail(&bcf->groups, &grp->queue);
return grp;
}
static ngx_quic_sock_group_t *
ngx_quic_bpf_create_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
{
int progfd, failed, flags, rc;
ngx_quic_bpf_conf_t *bcf;
ngx_quic_sock_group_t *grp;
bcf = ngx_quic_bpf_get_conf(cycle);
if (!bcf->enabled) {
return NULL;
}
grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
if (grp == NULL) {
return NULL;
}
grp->map_fd = ngx_bpf_map_create(cycle->log, BPF_MAP_TYPE_SOCKHASH,
sizeof(uint64_t), sizeof(uint64_t),
bcf->map_size, 0);
if (grp->map_fd == -1) {
goto failed;
}
flags = fcntl(grp->map_fd, F_GETFD);
if (flags == -1) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
"quic bpf getfd failed");
goto failed;
}
/* need to inherit map during binary upgrade after exec */
flags &= ~FD_CLOEXEC;
rc = fcntl(grp->map_fd, F_SETFD, flags);
if (rc == -1) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, errno,
"quic bpf setfd failed");
goto failed;
}
ngx_bpf_program_link(&ngx_quic_reuseport_helper,
"ngx_quic_sockmap", grp->map_fd);
progfd = ngx_bpf_load_program(cycle->log, &ngx_quic_reuseport_helper);
if (progfd < 0) {
goto failed;
}
failed = 0;
if (setsockopt(ls->fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
&progfd, sizeof(int))
== -1)
{
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_socket_errno,
"quic bpf setsockopt(SO_ATTACH_REUSEPORT_EBPF) failed");
failed = 1;
}
ngx_quic_bpf_close(cycle->log, progfd, "program");
if (failed) {
goto failed;
}
ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"quic bpf sockmap created fd:%i", grp->map_fd);
return grp;
failed:
if (grp->map_fd != -1) {
ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
}
ngx_queue_remove(&grp->queue);
return NULL;
}
static ngx_quic_sock_group_t *
ngx_quic_bpf_get_group(ngx_cycle_t *cycle, ngx_listening_t *ls)
{
ngx_quic_bpf_conf_t *bcf, *old_bcf;
ngx_quic_sock_group_t *grp, *ogrp;
bcf = ngx_quic_bpf_get_conf(cycle);
grp = ngx_quic_bpf_find_group(bcf, ls);
if (grp) {
return grp;
}
old_bcf = ngx_quic_bpf_get_old_conf(cycle);
if (old_bcf == NULL) {
return ngx_quic_bpf_create_group(cycle, ls);
}
ogrp = ngx_quic_bpf_find_group(old_bcf, ls);
if (ogrp == NULL) {
return ngx_quic_bpf_create_group(cycle, ls);
}
grp = ngx_quic_bpf_alloc_group(cycle, ls->sockaddr, ls->socklen);
if (grp == NULL) {
return NULL;
}
grp->map_fd = dup(ogrp->map_fd);
if (grp->map_fd == -1) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
"quic bpf failed to duplicate bpf map descriptor");
ngx_queue_remove(&grp->queue);
return NULL;
}
ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"quic bpf sockmap fd duplicated old:%i new:%i",
ogrp->map_fd, grp->map_fd);
return grp;
}
static ngx_int_t
ngx_quic_bpf_group_add_socket(ngx_cycle_t *cycle, ngx_listening_t *ls)
{
uint64_t cookie;
ngx_quic_bpf_conf_t *bcf;
ngx_quic_sock_group_t *grp;
bcf = ngx_quic_bpf_get_conf(cycle);
grp = ngx_quic_bpf_get_group(cycle, ls);
if (grp == NULL) {
if (!bcf->enabled) {
return NGX_OK;
}
return NGX_ERROR;
}
grp->unused = 0;
cookie = ngx_quic_bpf_socket_key(ls->fd, cycle->log);
if (cookie == (uint64_t) NGX_ERROR) {
return NGX_ERROR;
}
/* map[cookie] = socket; for use in kernel helper */
if (ngx_bpf_map_update(grp->map_fd, &cookie, &ls->fd, BPF_ANY) == -1) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, ngx_errno,
"quic bpf failed to update socket map key=%xL", cookie);
return NGX_ERROR;
}
ngx_log_debug4(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"quic bpf sockmap fd:%d add socket:%d cookie:0x%xL worker:%d",
grp->map_fd, ls->fd, cookie, ls->worker);
/* do not inherit this socket */
ls->ignore = 1;
return NGX_OK;
}
static uint64_t
ngx_quic_bpf_socket_key(ngx_fd_t fd, ngx_log_t *log)
{
uint64_t cookie;
socklen_t optlen;
optlen = sizeof(cookie);
if (getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &optlen) == -1) {
ngx_log_error(NGX_LOG_EMERG, log, ngx_socket_errno,
"quic bpf getsockopt(SO_COOKIE) failed");
return (ngx_uint_t) NGX_ERROR;
}
return cookie;
}
static ngx_int_t
ngx_quic_bpf_export_maps(ngx_cycle_t *cycle)
{
u_char *p, *buf;
size_t len;
ngx_str_t *var;
ngx_queue_t *q;
ngx_core_conf_t *ccf;
ngx_quic_bpf_conf_t *bcf;
ngx_quic_sock_group_t *grp;
ccf = ngx_core_get_conf(cycle);
bcf = ngx_quic_bpf_get_conf(cycle);
len = sizeof(NGX_QUIC_BPF_VARNAME) + 1;
q = ngx_queue_head(&bcf->groups);
while (q != ngx_queue_sentinel(&bcf->groups)) {
grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
q = ngx_queue_next(q);
if (grp->unused) {
/*
* map was inherited, but it is not used in this configuration;
* do not pass such map further and drop the group to prevent
* interference with changes during reload
*/
ngx_quic_bpf_close(cycle->log, grp->map_fd, "map");
ngx_queue_remove(&grp->queue);
continue;
}
len += NGX_INT32_LEN + 1 + NGX_SOCKADDR_STRLEN + 1;
}
len++;
buf = ngx_palloc(cycle->pool, len);
if (buf == NULL) {
return NGX_ERROR;
}
p = ngx_cpymem(buf, NGX_QUIC_BPF_VARNAME "=",
sizeof(NGX_QUIC_BPF_VARNAME));
for (q = ngx_queue_head(&bcf->groups);
q != ngx_queue_sentinel(&bcf->groups);
q = ngx_queue_next(q))
{
grp = ngx_queue_data(q, ngx_quic_sock_group_t, queue);
p = ngx_sprintf(p, "%ud", grp->map_fd);
*p++ = NGX_QUIC_BPF_ADDRSEP;
p += ngx_sock_ntop(grp->sockaddr, grp->socklen, p,
NGX_SOCKADDR_STRLEN, 1);
*p++ = NGX_QUIC_BPF_VARSEP;
}
*p = '\0';
var = ngx_array_push(&ccf->env);
if (var == NULL) {
return NGX_ERROR;
}
var->data = buf;
var->len = sizeof(NGX_QUIC_BPF_VARNAME) - 1;
return NGX_OK;
}
static ngx_int_t
ngx_quic_bpf_import_maps(ngx_cycle_t *cycle)
{
int s;
u_char *inherited, *p, *v;
ngx_uint_t in_fd;
ngx_addr_t tmp;
ngx_quic_bpf_conf_t *bcf;
ngx_quic_sock_group_t *grp;
inherited = (u_char *) getenv(NGX_QUIC_BPF_VARNAME);
if (inherited == NULL) {
return NGX_OK;
}
bcf = ngx_quic_bpf_get_conf(cycle);
#if (NGX_SUPPRESS_WARN)
s = -1;
#endif
in_fd = 1;
for (p = inherited, v = p; *p; p++) {
switch (*p) {
case NGX_QUIC_BPF_ADDRSEP:
if (!in_fd) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"quic bpf failed to parse inherited env");
return NGX_ERROR;
}
in_fd = 0;
s = ngx_atoi(v, p - v);
if (s == NGX_ERROR) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"quic bpf failed to parse inherited map fd");
return NGX_ERROR;
}
v = p + 1;
break;
case NGX_QUIC_BPF_VARSEP:
if (in_fd) {
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"quic bpf failed to parse inherited env");
return NGX_ERROR;
}
in_fd = 1;
grp = ngx_pcalloc(cycle->pool,
sizeof(ngx_quic_sock_group_t));
if (grp == NULL) {
return NGX_ERROR;
}
grp->map_fd = s;
if (ngx_parse_addr_port(cycle->pool, &tmp, v, p - v)
!= NGX_OK)
{
ngx_log_error(NGX_LOG_EMERG, cycle->log, 0,
"quic bpf failed to parse inherited"
" address '%*s'", p - v , v);
ngx_quic_bpf_close(cycle->log, s, "inherited map");
return NGX_ERROR;
}
grp->sockaddr = tmp.sockaddr;
grp->socklen = tmp.socklen;
grp->unused = 1;
ngx_queue_insert_tail(&bcf->groups, &grp->queue);
ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0,
"quic bpf sockmap inherited with "
"fd:%i address:%*s",
grp->map_fd, p - v, v);
v = p + 1;
break;
default:
break;
}
}
return NGX_OK;
}

View file

@ -0,0 +1,90 @@
/* AUTO-GENERATED, DO NOT EDIT. */
#include <stddef.h>
#include <stdint.h>
#include "ngx_bpf.h"
static ngx_bpf_reloc_t bpf_reloc_prog_ngx_quic_reuseport_helper[] = {
{ "ngx_quic_sockmap", 56 },
};
static struct bpf_insn bpf_insn_prog_ngx_quic_reuseport_helper[] = {
/* opcode dst src offset imm */
{ 0x79, BPF_REG_4, BPF_REG_1, (int16_t) 0, 0x0 },
{ 0x79, BPF_REG_3, BPF_REG_1, (int16_t) 8, 0x0 },
{ 0xbf, BPF_REG_2, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0x8 },
{ 0x2d, BPF_REG_2, BPF_REG_3, (int16_t) 55, 0x0 },
{ 0xbf, BPF_REG_5, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x9 },
{ 0x2d, BPF_REG_5, BPF_REG_3, (int16_t) 52, 0x0 },
{ 0xb7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x14 },
{ 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x9 },
{ 0x71, BPF_REG_6, BPF_REG_2, (int16_t) 0, 0x0 },
{ 0x67, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x38 },
{ 0xc7, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x38 },
{ 0x65, BPF_REG_6, BPF_REG_0, (int16_t) 10, 0xffffffff },
{ 0xbf, BPF_REG_2, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0xd },
{ 0x2d, BPF_REG_2, BPF_REG_3, (int16_t) 43, 0x0 },
{ 0xbf, BPF_REG_5, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0xe },
{ 0x2d, BPF_REG_5, BPF_REG_3, (int16_t) 40, 0x0 },
{ 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0xe },
{ 0x71, BPF_REG_5, BPF_REG_4, (int16_t) 12, 0x0 },
{ 0xb7, BPF_REG_6, BPF_REG_0, (int16_t) 0, 0x8 },
{ 0x2d, BPF_REG_6, BPF_REG_5, (int16_t) 36, 0x0 },
{ 0xf, BPF_REG_5, BPF_REG_0, (int16_t) 0, 0x0 },
{ 0xf, BPF_REG_4, BPF_REG_5, (int16_t) 0, 0x0 },
{ 0x2d, BPF_REG_4, BPF_REG_3, (int16_t) 33, 0x0 },
{ 0xbf, BPF_REG_4, BPF_REG_2, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x9 },
{ 0x2d, BPF_REG_4, BPF_REG_3, (int16_t) 30, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 1, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x38 },
{ 0x71, BPF_REG_3, BPF_REG_2, (int16_t) 2, 0x0 },
{ 0x67, BPF_REG_3, BPF_REG_0, (int16_t) 0, 0x30 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 3, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x28 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 4, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x20 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 5, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x38 },
{ 0xc7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x20 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 6, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x10 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_4, BPF_REG_2, (int16_t) 7, 0x0 },
{ 0x67, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x8 },
{ 0x4f, BPF_REG_3, BPF_REG_4, (int16_t) 0, 0x0 },
{ 0x71, BPF_REG_2, BPF_REG_2, (int16_t) 8, 0x0 },
{ 0x4f, BPF_REG_3, BPF_REG_2, (int16_t) 0, 0x0 },
{ 0x7b, BPF_REG_10, BPF_REG_3, (int16_t) 65528, 0x0 },
{ 0xbf, BPF_REG_3, BPF_REG_10, (int16_t) 0, 0x0 },
{ 0x7, BPF_REG_3, BPF_REG_0, (int16_t) 0, 0xfffffff8 },
{ 0x18, BPF_REG_2, BPF_REG_0, (int16_t) 0, 0x0 },
{ 0x0, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x0 },
{ 0xb7, BPF_REG_4, BPF_REG_0, (int16_t) 0, 0x0 },
{ 0x85, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x52 },
{ 0xb7, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x1 },
{ 0x95, BPF_REG_0, BPF_REG_0, (int16_t) 0, 0x0 },
};
ngx_bpf_program_t ngx_quic_reuseport_helper = {
.relocs = bpf_reloc_prog_ngx_quic_reuseport_helper,
.nrelocs = sizeof(bpf_reloc_prog_ngx_quic_reuseport_helper)
/ sizeof(bpf_reloc_prog_ngx_quic_reuseport_helper[0]),
.ins = bpf_insn_prog_ngx_quic_reuseport_helper,
.nins = sizeof(bpf_insn_prog_ngx_quic_reuseport_helper)
/ sizeof(bpf_insn_prog_ngx_quic_reuseport_helper[0]),
.license = "BSD",
.type = BPF_PROG_TYPE_SK_REUSEPORT,
};

View file

@ -43,6 +43,17 @@
#endif
#define ngx_quic_write_uint64(p, s) \
((p)[0] = (u_char) ((s) >> 56), \
(p)[1] = (u_char) ((s) >> 48), \
(p)[2] = (u_char) ((s) >> 40), \
(p)[3] = (u_char) ((s) >> 32), \
(p)[4] = (u_char) ((s) >> 24), \
(p)[5] = (u_char) ((s) >> 16), \
(p)[6] = (u_char) ((s) >> 8), \
(p)[7] = (u_char) (s), \
(p) + sizeof(uint64_t))
#define ngx_quic_write_uint24(p, s) \
((p)[0] = (u_char) ((s) >> 16), \
(p)[1] = (u_char) ((s) >> 8), \
@ -1981,3 +1992,10 @@ ngx_quic_create_close(u_char *p, ngx_quic_close_frame_t *cl)
return p - start;
}
void
ngx_quic_dcid_encode_key(u_char *dcid, uint64_t key)
{
(void) ngx_quic_write_uint64(dcid, key);
}

View file

@ -353,4 +353,6 @@ ngx_int_t ngx_quic_parse_transport_params(u_char *p, u_char *end,
ssize_t ngx_quic_create_transport_params(u_char *p, u_char *end,
ngx_quic_tp_t *tp, size_t *clen);
void ngx_quic_dcid_encode_key(u_char *dcid, uint64_t key);
#endif /* _NGX_EVENT_QUIC_WIRE_H_INCLUDED_ */