diff --git a/include/haproxy/fd-t.h b/include/haproxy/fd-t.h index 27266dce9..97ae5002b 100644 --- a/include/haproxy/fd-t.h +++ b/include/haproxy/fd-t.h @@ -31,23 +31,18 @@ enum { DIR_WR=1, }; -/* Polling status flags returned in fdtab[].ev : - * FD_POLL_IN remains set as long as some data is pending for read. - * FD_POLL_OUT remains set as long as the fd accepts to write data. - * FD_POLL_ERR and FD_POLL_ERR remain set forever (until processed). - */ -#define FD_POLL_IN 0x00000100 -#define FD_POLL_PRI 0x00000200 -#define FD_POLL_OUT 0x00000400 -#define FD_POLL_ERR 0x00000800 -#define FD_POLL_HUP 0x00001000 -#define FD_POLL_UPDT_MASK (FD_POLL_IN | FD_POLL_PRI | FD_POLL_OUT) - -/* FD_EV_* are the values used in fdtab[].state to define the polling states in - * each direction. Most of them are manipulated using test-and-set operations - * which require the bit position in the mask, which is given in the _BIT - * variant. +/* fdtab[].state is a composite state describing what is known about the FD. + * For now, the following information are stored in it: + * - event configuration and status for each direction (R,W) split into + * active, ready, shutdown categories (FD_EV_*). These are known by their + * bit values as well so that test-and-set bit operations are possible. + * + * - last known polling status (FD_POLL_*). For ease of troubleshooting, + * avoid visually mixing these ones with the other ones above. 3 of these + * flags are updated on each poll() report (FD_POLL_IN, FD_POLL_OUT, + * FD_POLL_PRI). FD_POLL_HUP and FD_POLL_ERR are "sticky" in that once they + * are reported, they will not be cleared until the FD is closed. */ /* bits positions for a few flags */ @@ -61,6 +56,13 @@ enum { #define FD_EV_SHUT_W_BIT 6 #define FD_EV_ERR_RW_BIT 7 +#define FD_POLL_IN_BIT 8 +#define FD_POLL_PRI_BIT 9 +#define FD_POLL_OUT_BIT 10 +#define FD_POLL_ERR_BIT 11 +#define FD_POLL_HUP_BIT 12 + + /* and flag values */ #define FD_EV_ACTIVE_R (1U << FD_EV_ACTIVE_R_BIT) #define FD_EV_ACTIVE_W (1U << FD_EV_ACTIVE_W_BIT) @@ -80,6 +82,18 @@ enum { */ #define FD_EV_ERR_RW (1U << FD_EV_ERR_RW_BIT) +/* mask covering all use cases above */ +#define FD_EV_ANY (FD_EV_ACTIVE_RW | FD_EV_READY_RW | FD_EV_SHUT_RW | FD_EV_ERR_RW) + +/* polling status */ +#define FD_POLL_IN (1U << FD_POLL_IN_BIT) +#define FD_POLL_PRI (1U << FD_POLL_PRI_BIT) +#define FD_POLL_OUT (1U << FD_POLL_OUT_BIT) +#define FD_POLL_ERR (1U << FD_POLL_ERR_BIT) +#define FD_POLL_HUP (1U << FD_POLL_HUP_BIT) +#define FD_POLL_UPDT_MASK (FD_POLL_IN | FD_POLL_PRI | FD_POLL_OUT) +#define FD_POLL_ANY_MASK (FD_POLL_IN | FD_POLL_PRI | FD_POLL_OUT | FD_POLL_ERR | FD_POLL_HUP) + /* This is the value used to mark a file descriptor as dead. This value is * negative, this is important so that tests on fd < 0 properly match. It @@ -127,8 +141,7 @@ struct fdtab { struct fdlist_entry update; /* Entry in the global update list */ void (*iocb)(int fd); /* I/O handler */ void *owner; /* the connection or listener associated with this fd, NULL if closed */ - unsigned char state; /* FD state for read and write directions (FD_EV_*) */ - unsigned int ev; /* event seen in return of poll() : FD_POLL_* */ + unsigned int state; /* FD state for read and write directions (FD_EV_*) + FD_POLL_* */ unsigned char linger_risk:1; /* 1 if we must kill lingering before closing */ unsigned char cloned:1; /* 1 if a cloned socket, requires EPOLL_CTL_DEL on close */ unsigned char initialized:1; /* 1 if init phase was done on this fd (e.g. set non-blocking) */ diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h index 4863c4aa2..6d6da6918 100644 --- a/include/haproxy/fd.h +++ b/include/haproxy/fd.h @@ -367,7 +367,7 @@ static inline void fd_update_events(int fd, uint evts) ((evts & FD_EV_ERR_RW) ? FD_POLL_ERR : 0); /* SHUTW reported while FD was active for writes is an error */ - if ((fdtab[fd].ev & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W)) + if ((fdtab[fd].state & FD_EV_ACTIVE_W) && (evts & FD_EV_SHUT_W)) new_flags |= FD_POLL_ERR; /* compute the inactive events reported late that must be stopped */ @@ -385,22 +385,22 @@ static inline void fd_update_events(int fd, uint evts) must_stop = FD_POLL_OUT; } - old = fdtab[fd].ev; + old = fdtab[fd].state; new = (old & ~FD_POLL_UPDT_MASK) | new_flags; if (unlikely(locked)) { /* Locked FDs (those with more than 2 threads) are atomically updated */ - while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].ev, &old, new))) + while (unlikely(new != old && !_HA_ATOMIC_CAS(&fdtab[fd].state, &old, new))) new = (old & ~FD_POLL_UPDT_MASK) | new_flags; } else { if (new != old) - fdtab[fd].ev = new; + fdtab[fd].state = new; } - if (fdtab[fd].ev & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR)) + if (fdtab[fd].state & (FD_POLL_IN | FD_POLL_HUP | FD_POLL_ERR)) fd_may_recv(fd); - if (fdtab[fd].ev & (FD_POLL_OUT | FD_POLL_ERR)) + if (fdtab[fd].state & (FD_POLL_OUT | FD_POLL_ERR)) fd_may_send(fd); if (fdtab[fd].iocb && fd_active(fd)) { @@ -432,7 +432,7 @@ static inline void fd_insert(int fd, void *owner, void (*iocb)(int fd), unsigned fdtab[fd].owner = owner; fdtab[fd].iocb = iocb; - fdtab[fd].ev = 0; + fdtab[fd].state = 0; fdtab[fd].linger_risk = 0; fdtab[fd].cloned = 0; fdtab[fd].et_possible = 0; diff --git a/src/cli.c b/src/cli.c index 1e4520353..76956e5c2 100644 --- a/src/cli.c +++ b/src/cli.c @@ -1190,19 +1190,18 @@ static int cli_io_handler_show_fd(struct appctx *appctx) suspicious = 1; chunk_printf(&trash, - " %5d : st=0x%02x(R:%c%c W:%c%c) ev=0x%02x(%c%c%c%c%c) [%c%c] tmask=0x%lx umask=0x%lx owner=%p iocb=%p(", + " %5d : st=0x%04x(R:%c%c W:%c%c %c%c%c%c%c) [%c%c] tmask=0x%lx umask=0x%lx owner=%p iocb=%p(", fd, fdt.state, (fdt.state & FD_EV_READY_R) ? 'R' : 'r', (fdt.state & FD_EV_ACTIVE_R) ? 'A' : 'a', (fdt.state & FD_EV_READY_W) ? 'R' : 'r', (fdt.state & FD_EV_ACTIVE_W) ? 'A' : 'a', - fdt.ev >> 8, - (fdt.ev & FD_POLL_HUP) ? 'H' : 'h', - (fdt.ev & FD_POLL_ERR) ? 'E' : 'e', - (fdt.ev & FD_POLL_OUT) ? 'O' : 'o', - (fdt.ev & FD_POLL_PRI) ? 'P' : 'p', - (fdt.ev & FD_POLL_IN) ? 'I' : 'i', + (fdt.state & FD_POLL_HUP) ? 'H' : 'h', + (fdt.state & FD_POLL_ERR) ? 'E' : 'e', + (fdt.state & FD_POLL_OUT) ? 'O' : 'o', + (fdt.state & FD_POLL_PRI) ? 'P' : 'p', + (fdt.state & FD_POLL_IN) ? 'I' : 'i', fdt.linger_risk ? 'L' : 'l', fdt.cloned ? 'C' : 'c', fdt.thread_mask, fdt.update_mask, diff --git a/src/dns.c b/src/dns.c index 56a19457e..19915cc23 100644 --- a/src/dns.c +++ b/src/dns.c @@ -251,7 +251,7 @@ static void dns_resolve_recv(struct dgram_conn *dgram) /* no need to go further if we can't retrieve the nameserver */ if ((ns = dgram->owner) == NULL) { - _HA_ATOMIC_AND(&fdtab[fd].ev, ~(FD_POLL_HUP|FD_POLL_ERR)); + _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR)); fd_stop_recv(fd); return; } @@ -277,7 +277,7 @@ static void dns_resolve_send(struct dgram_conn *dgram) /* no need to go further if we can't retrieve the nameserver */ if ((ns = dgram->owner) == NULL) { - _HA_ATOMIC_AND(&fdtab[fd].ev, ~(FD_POLL_HUP|FD_POLL_ERR)); + _HA_ATOMIC_AND(&fdtab[fd].state, ~(FD_POLL_HUP|FD_POLL_ERR)); fd_stop_send(fd); return; } diff --git a/src/log.c b/src/log.c index 2b4b8388d..21a75a750 100644 --- a/src/log.c +++ b/src/log.c @@ -3679,7 +3679,7 @@ void syslog_fd_handler(int fd) if(!l) ABORT_NOW(); - if (fdtab[fd].ev & FD_POLL_IN) { + if (fdtab[fd].state & FD_POLL_IN) { if (!fd_recv_ready(fd)) return; diff --git a/src/proto_sockpair.c b/src/proto_sockpair.c index a1e9d2540..48659c769 100644 --- a/src/proto_sockpair.c +++ b/src/proto_sockpair.c @@ -495,13 +495,13 @@ struct connection *sockpair_accept_conn(struct listener *l, int *status) switch (errno) { case EAGAIN: ret = CO_AC_DONE; /* nothing more to accept */ - if (fdtab[l->rx.fd].ev & (FD_POLL_HUP|FD_POLL_ERR)) { + if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) { /* the listening socket might have been disabled in a shared * process and we're a collateral victim. We'll just pause for * a while in case it comes back. In the mean time, we need to * clear this sticky flag. */ - _HA_ATOMIC_AND(&fdtab[l->rx.fd].ev, ~(FD_POLL_HUP|FD_POLL_ERR)); + _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR)); ret = CO_AC_PAUSE; } fd_cant_recv(l->rx.fd); diff --git a/src/quic_sock.c b/src/quic_sock.c index 56e7ee220..8ded82890 100644 --- a/src/quic_sock.c +++ b/src/quic_sock.c @@ -204,7 +204,7 @@ void quic_sock_fd_iocb(int fd) if (!l) ABORT_NOW(); - if (!(fdtab[fd].ev & FD_POLL_IN) || !fd_recv_ready(fd)) + if (!(fdtab[fd].state & FD_POLL_IN) || !fd_recv_ready(fd)) return; buf = get_trash_chunk(); diff --git a/src/raw_sock.c b/src/raw_sock.c index 4fbc1a521..6d5c88551 100644 --- a/src/raw_sock.c +++ b/src/raw_sock.c @@ -71,13 +71,13 @@ int raw_sock_to_pipe(struct connection *conn, void *xprt_ctx, struct pipe *pipe, * Since older splice() implementations were buggy and returned * EAGAIN on end of read, let's bypass the call to splice() now. */ - if (unlikely(!(fdtab[conn->handle.fd].ev & FD_POLL_IN))) { + if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) { /* stop here if we reached the end of data */ - if ((fdtab[conn->handle.fd].ev & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) + if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) goto out_read0; /* report error on POLL_ERR before connection establishment */ - if ((fdtab[conn->handle.fd].ev & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { + if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; errno = 0; /* let the caller do a getsockopt() if it wants it */ goto leave; @@ -239,13 +239,13 @@ static size_t raw_sock_to_buf(struct connection *conn, void *xprt_ctx, struct bu conn->flags &= ~CO_FL_WAIT_ROOM; errno = 0; - if (unlikely(!(fdtab[conn->handle.fd].ev & FD_POLL_IN))) { + if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) { /* stop here if we reached the end of data */ - if ((fdtab[conn->handle.fd].ev & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) + if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) goto read0; /* report error on POLL_ERR before connection establishment */ - if ((fdtab[conn->handle.fd].ev & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { + if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; goto leave; } @@ -282,7 +282,7 @@ static size_t raw_sock_to_buf(struct connection *conn, void *xprt_ctx, struct bu * to read an unlikely close from the client since we'll * close first anyway. */ - if (fdtab[conn->handle.fd].ev & FD_POLL_HUP) + if (fdtab[conn->handle.fd].state & FD_POLL_HUP) goto read0; if ((!fdtab[conn->handle.fd].linger_risk) || @@ -326,7 +326,7 @@ static size_t raw_sock_to_buf(struct connection *conn, void *xprt_ctx, struct bu * of recv()'s return value 0, so we have no way to tell there was * an error without checking. */ - if (unlikely(fdtab[conn->handle.fd].ev & FD_POLL_ERR)) + if (unlikely(fdtab[conn->handle.fd].state & FD_POLL_ERR)) conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; goto leave; } diff --git a/src/sock.c b/src/sock.c index 9ad5b67c9..19191c82e 100644 --- a/src/sock.c +++ b/src/sock.c @@ -103,13 +103,13 @@ struct connection *sock_accept_conn(struct listener *l, int *status) switch (errno) { case EAGAIN: ret = CO_AC_DONE; /* nothing more to accept */ - if (fdtab[l->rx.fd].ev & (FD_POLL_HUP|FD_POLL_ERR)) { + if (fdtab[l->rx.fd].state & (FD_POLL_HUP|FD_POLL_ERR)) { /* the listening socket might have been disabled in a shared * process and we're a collateral victim. We'll just pause for * a while in case it comes back. In the mean time, we need to * clear this sticky flag. */ - _HA_ATOMIC_AND(&fdtab[l->rx.fd].ev, ~(FD_POLL_HUP|FD_POLL_ERR)); + _HA_ATOMIC_AND(&fdtab[l->rx.fd].state, ~(FD_POLL_HUP|FD_POLL_ERR)); ret = CO_AC_PAUSE; } fd_cant_recv(l->rx.fd); @@ -683,11 +683,11 @@ int sock_conn_check(struct connection *conn) */ if (cur_poller.flags & HAP_POLL_F_ERRHUP) { /* modern poller, able to report ERR/HUP */ - if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_IN) + if ((fdtab[fd].state & (FD_POLL_IN|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_IN) goto done; - if ((fdtab[fd].ev & (FD_POLL_OUT|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_OUT) + if ((fdtab[fd].state & (FD_POLL_OUT|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_OUT) goto done; - if (!(fdtab[fd].ev & (FD_POLL_ERR|FD_POLL_HUP))) + if (!(fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP))) goto wait; /* error present, fall through common error check path */ } @@ -832,7 +832,7 @@ int sock_drain(struct connection *conn) int fd = conn->handle.fd; int len; - if (fdtab[fd].ev & (FD_POLL_ERR|FD_POLL_HUP)) + if (fdtab[fd].state & (FD_POLL_ERR|FD_POLL_HUP)) goto shut; if (!fd_recv_ready(fd)) diff --git a/src/xprt_quic.c b/src/xprt_quic.c index f57506ffd..862499ccd 100644 --- a/src/xprt_quic.c +++ b/src/xprt_quic.c @@ -3890,13 +3890,13 @@ static size_t quic_conn_to_buf(struct connection *conn, void *xprt_ctx, struct b conn->flags &= ~CO_FL_WAIT_ROOM; errno = 0; - if (unlikely(!(fdtab[conn->handle.fd].ev & FD_POLL_IN))) { + if (unlikely(!(fdtab[conn->handle.fd].state & FD_POLL_IN))) { /* stop here if we reached the end of data */ - if ((fdtab[conn->handle.fd].ev & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) + if ((fdtab[conn->handle.fd].state & (FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_HUP) goto read0; /* report error on POLL_ERR before connection establishment */ - if ((fdtab[conn->handle.fd].ev & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { + if ((fdtab[conn->handle.fd].state & FD_POLL_ERR) && (conn->flags & CO_FL_WAIT_L4_CONN)) { conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; goto leave; } @@ -3930,7 +3930,7 @@ static size_t quic_conn_to_buf(struct connection *conn, void *xprt_ctx, struct b * to read an unlikely close from the client since we'll * close first anyway. */ - if (fdtab[conn->handle.fd].ev & FD_POLL_HUP) + if (fdtab[conn->handle.fd].state & FD_POLL_HUP) goto read0; if ((!fdtab[conn->handle.fd].linger_risk) || @@ -3970,7 +3970,7 @@ static size_t quic_conn_to_buf(struct connection *conn, void *xprt_ctx, struct b * of recv()'s return value 0, so we have no way to tell there was * an error without checking. */ - if (unlikely(fdtab[conn->handle.fd].ev & FD_POLL_ERR)) + if (unlikely(fdtab[conn->handle.fd].state & FD_POLL_ERR)) conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH; goto leave; } @@ -4297,7 +4297,7 @@ static size_t quic_conn_handler(int fd, void *ctx, qpkt_read_func *func) */ void quic_fd_handler(int fd) { - if (fdtab[fd].ev & FD_POLL_IN) + if (fdtab[fd].state & FD_POLL_IN) quic_conn_handler(fd, fdtab[fd].owner, &qc_lstnr_pkt_rcv); } @@ -4306,7 +4306,7 @@ void quic_fd_handler(int fd) */ void quic_conn_fd_handler(int fd) { - if (fdtab[fd].ev & FD_POLL_IN) + if (fdtab[fd].state & FD_POLL_IN) quic_conn_handler(fd, fdtab[fd].owner, &qc_srv_pkt_rcv); }