From 4f60f16dd3ea2087f9e9759e00a28d101ecb0167 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sun, 8 Apr 2007 16:39:58 +0200 Subject: [PATCH] [MAJOR] modularize the polling mechanisms select, poll and epoll now have their dedicated functions and have been split into distinct files. Several FD manipulation primitives have been provided with each poller. The rest of the code needs to be cleaned to remove traces of StaticReadEvent/StaticWriteEvent. A trick involving a macro has temporarily been used right now. Some work needs to be done to factorize tests and sets everywhere. --- Makefile | 27 +- Makefile.bsd | 2 +- Makefile.osx | 2 +- include/proto/fd.h | 44 ++-- include/types/fd.h | 37 +++ include/types/polling.h | 4 +- src/ev_epoll.c | 355 ++++++++++++++++++++++++++ src/ev_poll.c | 264 ++++++++++++++++++++ src/ev_select.c | 264 ++++++++++++++++++++ src/fd.c | 533 +++++----------------------------------- src/haproxy.c | 95 +++---- 11 files changed, 1088 insertions(+), 539 deletions(-) create mode 100644 src/ev_epoll.c create mode 100644 src/ev_poll.c create mode 100644 src/ev_select.c diff --git a/Makefile b/Makefile index faa37c059..d3d5c9789 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,16 @@ TARGET = linux24 #TARGET = linux22 #TARGET = solaris +USE_POLL = 1 + +ifeq ($(TARGET),linux24e) +USE_EPOLL = 1 +endif + +ifeq ($(TARGET),linux26) +USE_EPOLL = 1 +endif + # pass CPU= to make to optimize for a particular CPU CPU = generic #CPU = i586 @@ -35,26 +45,26 @@ PCREDIR := $(shell pcre-config --prefix 2>/dev/null || :) TCPSPLICEDIR := # This is for standard Linux 2.6 with netfilter and epoll() -COPTS.linux26 = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL +COPTS.linux26 = -DNETFILTER LIBS.linux26 = # This is for enhanced Linux 2.4 with netfilter and epoll() patch. # Warning! If kernel is 2.4 with epoll-lt <= 0.21, then you must add # -DEPOLL_CTL_MOD_WORKAROUND to workaround a very rare bug. -#COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND -COPTS.linux24e = -DNETFILTER -DENABLE_POLL -DENABLE_EPOLL -DUSE_MY_EPOLL +#COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL -DEPOLL_CTL_MOD_WORKAROUND +COPTS.linux24e = -DNETFILTER -DUSE_MY_EPOLL LIBS.linux24e = # This is for standard Linux 2.4 with netfilter but without epoll() -COPTS.linux24 = -DNETFILTER -DENABLE_POLL +COPTS.linux24 = -DNETFILTER LIBS.linux24 = # This is for Linux 2.2 -COPTS.linux22 = -DUSE_GETSOCKNAME -DENABLE_POLL +COPTS.linux22 = -DUSE_GETSOCKNAME LIBS.linux22 = # This is for Solaris 8 -COPTS.solaris = -fomit-frame-pointer -DENABLE_POLL -DFD_SETSIZE=65536 +COPTS.solaris = -fomit-frame-pointer -DFD_SETSIZE=65536 LIBS.solaris = -lnsl -lsocket # CPU dependant optimizations @@ -92,7 +102,6 @@ ADDINC = ADDLIB = # set some defines when needed. -# Known ones are -DENABLE_POLL, -DENABLE_EPOLL, and -DUSE_MY_EPOLL # - use -DTPROXY to compile with transparent proxy support. DEFINE = -DTPROXY @@ -136,10 +145,12 @@ endif ifneq ($(USE_POLL),) OPTIONS += -DENABLE_POLL +OPT_OBJS += src/ev_poll.o endif ifneq ($(USE_EPOLL),) OPTIONS += -DENABLE_EPOLL +OPT_OBJS += src/ev_epoll.o endif ifneq ($(USE_MY_EPOLL),) @@ -199,7 +210,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o haproxy: $(OBJS) $(OPT_OBJS) $(LD) $(LDFLAGS) -o $@ $^ $(LIBS) diff --git a/Makefile.bsd b/Makefile.bsd index 67f51d9ca..043ff04dc 100644 --- a/Makefile.bsd +++ b/Makefile.bsd @@ -87,7 +87,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o all: haproxy diff --git a/Makefile.osx b/Makefile.osx index a4fda97ad..cb79d5779 100644 --- a/Makefile.osx +++ b/Makefile.osx @@ -87,7 +87,7 @@ OBJS = src/haproxy.o src/list.o src/chtbl.o src/hashpjw.o src/base64.o \ src/time.o src/fd.o src/regex.o src/cfgparse.o src/server.o \ src/checks.o src/queue.o src/capture.o src/client.o src/proxy.o \ src/proto_http.o src/stream_sock.o src/appsession.o src/backend.o \ - src/session.o src/hdr_idx.o src/rbtree.o + src/session.o src/hdr_idx.o src/rbtree.o src/ev_select.o src/ev_poll.o all: haproxy diff --git a/include/proto/fd.h b/include/proto/fd.h index e41fcd151..03a7add8e 100644 --- a/include/proto/fd.h +++ b/include/proto/fd.h @@ -2,7 +2,7 @@ include/proto/fd.h File descriptors states. - Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -34,29 +34,41 @@ */ void fd_delete(int fd); +/* registers all known pollers */ +void register_pollers(); + +/* disable the specified poller */ +void disable_poller(const char *poller_name); /* - * Benchmarks performed on a Pentium-M notebook show that using functions - * instead of the usual macros improve the FD_* performance by about 80%, - * and that marking them regparm(2) adds another 20%. + * Initialize the pollers till the best one is found. + * If none works, returns 0, otherwise 1. */ -#if defined(CONFIG_HAP_INLINE_FD_SET) +int init_pollers(); -# define MY_FD_SET FD_SET -# define MY_FD_CLR FD_CLR -# define MY_FD_ISSET FD_ISSET +/* + * Runs the polling loop + */ +void run_poller(); -#else -# define MY_FD_SET my_fd_set -# define MY_FD_CLR my_fd_clr -# define MY_FD_ISSET my_fd_isset +/* FIXME: dirty hack during code transition */ +#define dir_StaticWriteEvent DIR_WR +#define dir_StaticReadEvent DIR_RD +#define dir_DIR_RD DIR_RD +#define dir_DIR_WR DIR_WR -REGPRM2 void my_fd_set(const int fd, fd_set *ev); -REGPRM2 void my_fd_clr(const int fd, fd_set *ev); -REGPRM2 int my_fd_isset(const int fd, const fd_set *ev); +#define MY_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev)) +#define MY_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev)) +#define MY_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev)) -#endif +#define EV_FD_SET(fd, ev) (cur_poller.set((fd), dir_##ev)) +#define EV_FD_CLR(fd, ev) (cur_poller.clr((fd), dir_##ev)) +#define EV_FD_ISSET(fd, ev) (cur_poller.isset((fd), dir_##ev)) +#define EV_FD_COND_S(fd, ev) (cur_poller.cond_s((fd), dir_##ev)) +#define EV_FD_COND_C(fd, ev) (cur_poller.cond_c((fd), dir_##ev)) +#define EV_FD_REM(fd) (cur_poller.rem(fd)) +#define EV_FD_CLO(fd) (cur_poller.clo(fd)) /* recomputes the maxfd limit from the fd */ diff --git a/include/types/fd.h b/include/types/fd.h index ae8872bee..6d8b31fb7 100644 --- a/include/types/fd.h +++ b/include/types/fd.h @@ -53,6 +53,43 @@ struct fdtab { int state; /* the state of this fd */ }; +/* + * Poller descriptors. + * - is initialized by the poller's register() function, and should not + * be allocated, just linked to. + * - is initialized by the poller's register() function. It is set to 0 + * by default, meaning the poller is disabled. init() should set it to 0 in + * case of failure. term() must set it to 0. A generic unoptimized select() + * poller should set it to 100. + * - is initialized by the poller's init() function, and cleaned by + * the term() function. + * - cond_s() checks if fd was not set then sets it and returns 1. Otherwise 0. + * - cond_c() checks if fd was set then clears it and returns 1. Otherwise 0. + * - clo() should be used to do indicate the poller that fd will be closed. It + * may be the same as rem() on some pollers. + * - poll() calls the poller, waiting at most wait_time ms. + */ +struct poller { + void *private; /* any private data for the poller */ + REGPRM2 int (*isset)(const int fd, const int dir); /* check if is being polled for dir */ + REGPRM2 void (*set)(const int fd, const int dir); /* set polling on for */ + REGPRM2 void (*clr)(const int fd, const int dir); /* clear polling on for */ + REGPRM2 int (*cond_s)(const int fd, const int dir); /* set polling on for if unset */ + REGPRM2 int (*cond_c)(const int fd, const int dir); /* clear polling on for if set */ + REGPRM1 void (*rem)(const int fd); /* remove any polling on */ + REGPRM1 void (*clo)(const int fd); /* mark as closed */ + REGPRM2 void (*poll)(struct poller *p, int wait_time); /* the poller itself */ + REGPRM1 int (*init)(struct poller *p); /* poller initialization */ + REGPRM1 void (*term)(struct poller *p); /* termination of this poller */ + const char *name; /* poller name */ + int pref; /* try pollers with higher preference first */ +}; + +extern struct poller cur_poller; /* the current poller */ +extern int nbpollers; +#define MAX_POLLERS 10 +extern struct poller pollers[MAX_POLLERS]; /* all registered pollers */ + extern struct fdtab *fdtab; /* array of all the file descriptors */ extern int maxfd; /* # of the highest fd + 1 */ extern int totalconn; /* total # of terminated sessions */ diff --git a/include/types/polling.h b/include/types/polling.h index 821698ee7..ed3cf64dc 100644 --- a/include/types/polling.h +++ b/include/types/polling.h @@ -2,7 +2,7 @@ include/types/polling.h File descriptors and polling definitions. - Copyright (C) 2000-2006 Willy Tarreau - w@1wt.eu + Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -53,8 +53,6 @@ #define POLL_USE_POLL (1<<1) #define POLL_USE_EPOLL (1<<2) -/* fd states */ -extern fd_set *StaticReadEvent, *StaticWriteEvent; extern int cfg_polling_mechanism; /* POLL_USE_{SELECT|POLL|EPOLL} */ diff --git a/src/ev_epoll.c b/src/ev_epoll.c new file mode 100644 index 000000000..ff49505e2 --- /dev/null +++ b/src/ev_epoll.c @@ -0,0 +1,355 @@ +/* + * FD polling functions for linux epoll() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + +#if defined(USE_MY_EPOLL) +#include +#include +_syscall1 (int, epoll_create, int, size); +_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event); +_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout); +#endif + + +static fd_set *StaticReadEvent, *StaticWriteEvent; +static fd_set *PrevReadEvent, *PrevWriteEvent; + +/* private data */ +static struct epoll_event *epoll_events; +static int epoll_fd; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + +REGPRM1 static void __fd_clo(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); + FD_CLR(fd, PrevReadEvent); + FD_CLR(fd, PrevWriteEvent); +} + + + +/* + * Initialization of the epoll() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int epoll_init(struct poller *p) +{ + __label__ fail_pwevt, fail_prevt, fail_swevt, fail_srevt, fail_ee, fail_fd; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + epoll_fd = epoll_create(global.maxsock + 1); + if (epoll_fd < 0) + goto fail_fd; + + epoll_events = (struct epoll_event*) + calloc(1, sizeof(struct epoll_event) * global.maxsock); + + if (epoll_events == NULL) + goto fail_ee; + + if ((PrevReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_prevt; + + if ((PrevWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_pwevt; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(PrevWriteEvent); + fail_pwevt: + free(PrevReadEvent); + fail_prevt: + free(epoll_events); + fail_ee: + close(epoll_fd); + epoll_fd = 0; + fail_fd: + p->pref = 0; + return 0; +} + +/* + * Termination of the epoll() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void epoll_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + + if (StaticReadEvent) + free(StaticReadEvent); + + if (PrevWriteEvent) + free(PrevWriteEvent); + + if (PrevReadEvent) + free(PrevReadEvent); + + if (epoll_events) + free(epoll_events); + + close(epoll_fd); + epoll_fd = 0; + + p->private = NULL; + p->pref = 0; +} + +/* + * epoll() poller + */ +REGPRM2 static void epoll_poll(struct poller *p, int wait_time) +{ + int status; + int fd; + + int fds, count; + int pr, pw, sr, sw; + unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */ + struct epoll_event ev; + + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + + rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds]; + wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds]; + + if ((ro^rn) | (wo^wn)) { + for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; + pw = (wo >> count) & 1; + sr = (rn >> count) & 1; + sw = (wn >> count) & 1; +#else + pr = FD_ISSET(fd&((1<name = "epoll"; + p->pref = 300; + p->private = NULL; + + p->init = epoll_init; + p->term = epoll_term; + p->poll = epoll_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->rem = __fd_rem; + p->clo = __fd_clo; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/ev_poll.c b/src/ev_poll.c new file mode 100644 index 000000000..2d6d98474 --- /dev/null +++ b/src/ev_poll.c @@ -0,0 +1,264 @@ +/* + * FD polling functions for generic poll() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +static fd_set *StaticReadEvent, *StaticWriteEvent; + +/* private data */ +static struct pollfd *poll_events = NULL; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + + + +/* + * Initialization of the poll() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int poll_init(struct poller *p) +{ + __label__ fail_swevt, fail_srevt, fail_pe; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + poll_events = (struct pollfd*) + calloc(1, sizeof(struct pollfd) * global.maxsock); + + if (poll_events == NULL) + goto fail_pe; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(poll_events); + fail_pe: + p->pref = 0; + return 0; +} + +/* + * Termination of the poll() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void poll_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + if (StaticReadEvent) + free(StaticReadEvent); + if (poll_events) + free(poll_events); + p->private = NULL; + p->pref = 0; +} + +/* + * Poll() poller + */ +REGPRM2 static void poll_poll(struct poller *p, int wait_time) +{ + int status; + int fd, nbfd; + + int fds, count; + int sr, sw; + unsigned rn, wn; /* read new, write new */ + + nbfd = 0; + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + + rn = ((int*)StaticReadEvent)[fds]; + wn = ((int*)StaticWriteEvent)[fds]; + + if ((rn|wn)) { + for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; + sw = (wn >> count) & 1; +#else + sr = FD_ISSET(fd&((1< 0 && count < nbfd; count++) { + fd = poll_events[count].fd; + + if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP ))) + continue; + + /* ok, we found one active fd */ + status--; + + if (FD_ISSET(fd, StaticReadEvent)) { + if (fdtab[fd].state == FD_STCLOSE) + continue; + if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP )) + fdtab[fd].cb[DIR_RD].f(fd); + } + + if (FD_ISSET(fd, StaticWriteEvent)) { + if (fdtab[fd].state == FD_STCLOSE) + continue; + if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP )) + fdtab[fd].cb[DIR_WR].f(fd); + } + } + +} + +/* + * The only exported function. Returns 1. + */ +int poll_register(struct poller *p) +{ + p->name = "poll"; + p->pref = 200; + p->private = NULL; + + p->init = poll_init; + p->term = poll_term; + p->poll = poll_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->clo = p->rem = __fd_rem; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/ev_select.c b/src/ev_select.c new file mode 100644 index 000000000..b1cd44ef3 --- /dev/null +++ b/src/ev_select.c @@ -0,0 +1,264 @@ +/* + * FD polling functions for generic select() + * + * Copyright 2000-2007 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +static fd_set *ReadEvent, *WriteEvent; +static fd_set *StaticReadEvent, *StaticWriteEvent; + + +/* + * Benchmarks performed on a Pentium-M notebook show that using functions + * instead of the usual macros improve the FD_* performance by about 80%, + * and that marking them regparm(2) adds another 20%. + */ +REGPRM2 static int __fd_isset(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + return FD_ISSET(fd, ev); +} + +REGPRM2 static void __fd_set(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_SET(fd, ev); +} + +REGPRM2 static void __fd_clr(const int fd, const int dir) +{ + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + FD_CLR(fd, ev); +} + +REGPRM2 static int __fd_cond_s(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = !FD_ISSET(fd, ev); + if (ret) + FD_SET(fd, ev); + return ret; +} + +REGPRM2 static int __fd_cond_c(const int fd, const int dir) +{ + int ret; + fd_set *ev; + if (dir == DIR_RD) + ev = StaticReadEvent; + else + ev = StaticWriteEvent; + + ret = FD_ISSET(fd, ev); + if (ret) + FD_CLR(fd, ev); + return ret; +} + +REGPRM1 static void __fd_rem(const int fd) +{ + FD_CLR(fd, StaticReadEvent); + FD_CLR(fd, StaticWriteEvent); +} + + +/* + * Initialization of the select() poller. + * Returns 0 in case of failure, non-zero in case of success. If it fails, it + * disables the poller by setting its pref to 0. + */ +REGPRM1 static int select_init(struct poller *p) +{ + __label__ fail_swevt, fail_srevt, fail_wevt, fail_revt; + int fd_set_bytes; + + p->private = NULL; + fd_set_bytes = sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE; + + if ((ReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_revt; + + if ((WriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_wevt; + + if ((StaticReadEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_srevt; + + if ((StaticWriteEvent = (fd_set *)calloc(1, fd_set_bytes)) == NULL) + goto fail_swevt; + + return 1; + + fail_swevt: + free(StaticReadEvent); + fail_srevt: + free(WriteEvent); + fail_wevt: + free(ReadEvent); + fail_revt: + p->pref = 0; + return 0; +} + +/* + * Termination of the select() poller. + * Memory is released and the poller is marked as unselectable. + */ +REGPRM1 static void select_term(struct poller *p) +{ + if (StaticWriteEvent) + free(StaticWriteEvent); + if (StaticReadEvent) + free(StaticReadEvent); + if (WriteEvent) + free(WriteEvent); + if (ReadEvent) + free(ReadEvent); + p->private = NULL; + p->pref = 0; +} + +/* + * Select() poller + */ +REGPRM2 static void select_poll(struct poller *p, int wait_time) +{ + int status; + int fd, i; + struct timeval delta; + int readnotnull, writenotnull; + int fds; + char count; + + /* allow select to return immediately when needed */ + delta.tv_sec = delta.tv_usec = 0; + if (wait_time > 0) { /* FIXME */ + /* Convert to timeval */ + /* to avoid eventual select loops due to timer precision */ + wait_time += SCHEDULER_RESOLUTION; + delta.tv_sec = wait_time / 1000; + delta.tv_usec = (wait_time % 1000) * 1000; + } + + /* let's restore fdset state */ + + readnotnull = 0; writenotnull = 0; + for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { + readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; + writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; + } + + // /* just a verification code, needs to be removed for performance */ + // for (i=0; i= 0) ? &delta : NULL); + + tv_now(&now); + + if (status <= 0) + return; + + for (fds = 0; (fds << INTBITS) < maxfd; fds++) { + if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) == 0) + continue; + + for (count = 1<name = "select"; + p->pref = 150; + p->private = NULL; + + p->init = select_init; + p->term = select_term; + p->poll = select_poll; + p->isset = __fd_isset; + p->set = __fd_set; + p->clr = __fd_clr; + p->clo = p->rem = __fd_rem; + p->cond_s = __fd_cond_s; + p->cond_c = __fd_cond_c; + return 1; +} + + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/fd.c b/src/fd.c index 5fd3c27c0..18ccb2456 100644 --- a/src/fd.c +++ b/src/fd.c @@ -1,7 +1,7 @@ /* * File descriptors management functions. * - * Copyright 2000-2006 Willy Tarreau + * Copyright 2000-2007 Willy Tarreau * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -10,501 +10,49 @@ * */ -/* - * FIXME: - * - we still use 'listeners' to check whether we want to stop or not. - * - the various pollers should be moved to other external files, possibly - * dynamic libs. - */ - +#include #include -#include #include #include #include -#include #include #include #include -#include -#include struct fdtab *fdtab = NULL; /* array of all the file descriptors */ int maxfd; /* # of the highest fd + 1 */ int totalconn; /* total # of terminated sessions */ int actconn; /* # of active sessions */ -fd_set *StaticReadEvent, *StaticWriteEvent; int cfg_polling_mechanism = 0; /* POLL_USE_{SELECT|POLL|EPOLL} */ - -/****************************** - * pollers - ******************************/ - - -#if !defined(CONFIG_HAP_INLINE_FD_SET) -/* - * Benchmarks performed on a Pentium-M notebook show that using functions - * instead of the usual macros improve the FD_* performance by about 80%, - * and that marking them regparm(2) adds another 20%. - */ -REGPRM2 void my_fd_set(const int fd, fd_set *ev) -{ - FD_SET(fd, ev); -} - -REGPRM2 void my_fd_clr(const int fd, fd_set *ev) -{ - FD_CLR(fd, ev); -} - -REGPRM2 int my_fd_isset(const int fd, const fd_set *ev) -{ - return FD_ISSET(fd, ev); -} -#endif - - -/* - * FIXME: this is dirty, but at the moment, there's no other solution to remove - * the old FDs from outside the loop. Perhaps we should export a global 'poll' - * structure with pointers to functions such as init_fd() and close_fd(), plus - * a private structure with several pointers to places such as below. - */ - -#if defined(ENABLE_EPOLL) -fd_set *PrevReadEvent = NULL, *PrevWriteEvent = NULL; - -#if defined(USE_MY_EPOLL) -#include -#include -_syscall1 (int, epoll_create, int, size); -_syscall4 (int, epoll_ctl, int, epfd, int, op, int, fd, struct epoll_event *, event); -_syscall4 (int, epoll_wait, int, epfd, struct epoll_event *, events, int, maxevents, int, timeout); -#endif - -/* - * Main epoll() loop. - * does 3 actions : - * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures - * 1 (POLL_LOOP_ACTION_RUN) : runs the loop - * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up - * - * returns 0 if initialization failed, !0 otherwise. - */ - -int epoll_loop(int action) -{ - int next_time; - int status; - int fd; - - int fds, count; - int pr, pw, sr, sw; - unsigned rn, ro, wn, wo; /* read new, read old, write new, write old */ - struct epoll_event ev; - - /* private data */ - static struct epoll_event *epoll_events = NULL; - static int epoll_fd; - - if (action == POLL_LOOP_ACTION_INIT) { - epoll_fd = epoll_create(global.maxsock + 1); - if (epoll_fd < 0) - return 0; - else { - epoll_events = (struct epoll_event*) - calloc(1, sizeof(struct epoll_event) * global.maxsock); - PrevReadEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - PrevWriteEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - } - return 1; - } - else if (action == POLL_LOOP_ACTION_CLEAN) { - if (PrevWriteEvent) free(PrevWriteEvent); - if (PrevReadEvent) free(PrevReadEvent); - if (epoll_events) free(epoll_events); - close(epoll_fd); - epoll_fd = 0; - return 1; - } - - /* OK, it's POLL_LOOP_ACTION_RUN */ - - tv_now(&now); - - while (1) { - next_time = process_runnable_tasks(); - - /* stop when there's no connection left and we don't allow them anymore */ - if (!actconn && listeners == 0) - break; - - for (fds = 0; (fds << INTBITS) < maxfd; fds++) { - - rn = ((int*)StaticReadEvent)[fds]; ro = ((int*)PrevReadEvent)[fds]; - wn = ((int*)StaticWriteEvent)[fds]; wo = ((int*)PrevWriteEvent)[fds]; - - if ((ro^rn) | (wo^wn)) { - for (count = 0, fd = fds << INTBITS; count < (1<> count) & 1; - pw = (wo >> count) & 1; - sr = (rn >> count) & 1; - sw = (wn >> count) & 1; -#else - pr = FD_ISSET(fd&((1<> count) & 1; - sw = (wn >> count) & 1; -#else - sr = FD_ISSET(fd&((1< 0 && count < nbfd; count++) { - fd = poll_events[count].fd; - - if (!(poll_events[count].revents & ( POLLOUT | POLLIN | POLLERR | POLLHUP ))) - continue; - - /* ok, we found one active fd */ - status--; - - if (FD_ISSET(fd, StaticReadEvent)) { - if (fdtab[fd].state == FD_STCLOSE) - continue; - if (poll_events[count].revents & ( POLLIN | POLLERR | POLLHUP )) - fdtab[fd].cb[DIR_RD].f(fd); - } - - if (FD_ISSET(fd, StaticWriteEvent)) { - if (fdtab[fd].state == FD_STCLOSE) - continue; - if (poll_events[count].revents & ( POLLOUT | POLLERR | POLLHUP )) - fdtab[fd].cb[DIR_WR].f(fd); - } - } - } - return 1; -} -#endif - - - -/* - * Main select() loop. - * does 3 actions : - * 0 (POLL_LOOP_ACTION_INIT) : initializes necessary private structures - * 1 (POLL_LOOP_ACTION_RUN) : runs the loop - * 2 (POLL_LOOP_ACTION_CLEAN) : cleans up - * - * returns 0 if initialization failed, !0 otherwise. - */ - - -int select_loop(int action) -{ - int next_time; - int status; - int fd,i; - struct timeval delta; - int readnotnull, writenotnull; - static fd_set *ReadEvent = NULL, *WriteEvent = NULL; - - if (action == POLL_LOOP_ACTION_INIT) { - ReadEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - WriteEvent = (fd_set *) - calloc(1, sizeof(fd_set) * (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - return 1; - } - else if (action == POLL_LOOP_ACTION_CLEAN) { - if (WriteEvent) free(WriteEvent); - if (ReadEvent) free(ReadEvent); - return 1; - } - - /* OK, it's POLL_LOOP_ACTION_RUN */ - - tv_now(&now); - - while (1) { - next_time = process_runnable_tasks(); - - /* stop when there's no connection left and we don't allow them anymore */ - if (!actconn && listeners == 0) - break; - - if (next_time > 0) { /* FIXME */ - /* Convert to timeval */ - /* to avoid eventual select loops due to timer precision */ - next_time += SCHEDULER_RESOLUTION; - delta.tv_sec = next_time / 1000; - delta.tv_usec = (next_time % 1000) * 1000; - } - else if (next_time == 0) { /* allow select to return immediately when needed */ - delta.tv_sec = delta.tv_usec = 0; - } - - - /* let's restore fdset state */ - - readnotnull = 0; writenotnull = 0; - for (i = 0; i < (maxfd + FD_SETSIZE - 1)/(8*sizeof(int)); i++) { - readnotnull |= (*(((int*)ReadEvent)+i) = *(((int*)StaticReadEvent)+i)) != 0; - writenotnull |= (*(((int*)WriteEvent)+i) = *(((int*)StaticWriteEvent)+i)) != 0; - } - - // /* just a verification code, needs to be removed for performance */ - // for (i=0; i= 0) ? &delta : NULL); - - /* this is an experiment on the separation of the select work */ - // status = (readnotnull ? select(maxfd, ReadEvent, NULL, NULL, (next_time >= 0) ? &delta : NULL) : 0); - // status |= (writenotnull ? select(maxfd, NULL, WriteEvent, NULL, (next_time >= 0) ? &delta : NULL) : 0); - - tv_now(&now); - - if (status > 0) { /* must proceed with events */ - - int fds; - char count; - - for (fds = 0; (fds << INTBITS) < maxfd; fds++) - if ((((int *)(ReadEvent))[fds] | ((int *)(WriteEvent))[fds]) != 0) - for (count = 1< bp->pref)) + bp = &pollers[p]; + + if (!bp || bp->pref == 0) + break; + + if (bp->init(bp)) { + memcpy(&cur_poller, bp, sizeof(*bp)); + return 1; + } + } while (!bp || bp->pref == 0); + return 0; +} + /* * Local variables: * c-indent-level: 8 diff --git a/src/haproxy.c b/src/haproxy.c index 5b3ade2b2..7b23e2fa5 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -507,18 +507,34 @@ void init(int argc, char **argv) if (global.nbproc < 1) global.nbproc = 1; - StaticReadEvent = (fd_set *)calloc(1, - sizeof(fd_set) * - (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - StaticWriteEvent = (fd_set *)calloc(1, - sizeof(fd_set) * - (global.maxsock + FD_SETSIZE - 1) / FD_SETSIZE); - fdtab = (struct fdtab *)calloc(1, sizeof(struct fdtab) * (global.maxsock)); for (i = 0; i < global.maxsock; i++) { fdtab[i].state = FD_STCLOSE; } + + register_pollers(); + /* Note: we could register external pollers here */ + + if (!(cfg_polling_mechanism & POLL_USE_EPOLL)) + disable_poller("epoll"); + + if (!(cfg_polling_mechanism & POLL_USE_POLL)) + disable_poller("poll"); + + if (!(cfg_polling_mechanism & POLL_USE_SELECT)) + disable_poller("select"); + + /* Note: we could disable any poller by name here */ + + if (!init_pollers()) { + Alert("No polling mechanism available\n"); + exit(1); + } + if (global.mode & MODE_DEBUG) { + printf("Note: using %s() as the polling mechanism.\n", cur_poller.name); + } + } void deinit(void) @@ -603,8 +619,6 @@ void deinit(void) if (global.chroot) free(global.chroot); if (global.pidfile) free(global.pidfile); - if (StaticReadEvent) free(StaticReadEvent); - if (StaticWriteEvent) free(StaticWriteEvent); if (fdtab) free(fdtab); pool_destroy(pool_session); @@ -628,6 +642,30 @@ static void tell_old_pids(int sig) kill(oldpids[p], sig); } +/* + * Runs the polling loop + * + * FIXME: + * - we still use 'listeners' to check whether we want to stop or not. + * + */ +void run_poll_loop() +{ + int next_time; + tv_now(&now); + + while (1) { + next_time = process_runnable_tasks(); + + /* stop when there's no connection left and we don't allow them anymore */ + if (!actconn && listeners == 0) + break; + + cur_poller.poll(&cur_poller, next_time); + } +} + + int main(int argc, char **argv) { int err, retry; @@ -860,41 +898,10 @@ int main(int argc, char **argv) setsid(); } -#if defined(ENABLE_EPOLL) - if (cfg_polling_mechanism & POLL_USE_EPOLL) { - if (epoll_loop(POLL_LOOP_ACTION_INIT)) { - epoll_loop(POLL_LOOP_ACTION_RUN); - epoll_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_EPOLL; - } - else { - Warning("epoll() is not available. Using poll()/select() instead.\n"); - cfg_polling_mechanism &= ~POLL_USE_EPOLL; - } - } -#endif - -#if defined(ENABLE_POLL) - if (cfg_polling_mechanism & POLL_USE_POLL) { - if (poll_loop(POLL_LOOP_ACTION_INIT)) { - poll_loop(POLL_LOOP_ACTION_RUN); - poll_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_POLL; - } - else { - Warning("poll() is not available. Using select() instead.\n"); - cfg_polling_mechanism &= ~POLL_USE_POLL; - } - } -#endif - if (cfg_polling_mechanism & POLL_USE_SELECT) { - if (select_loop(POLL_LOOP_ACTION_INIT)) { - select_loop(POLL_LOOP_ACTION_RUN); - select_loop(POLL_LOOP_ACTION_CLEAN); - cfg_polling_mechanism &= POLL_USE_SELECT; - } - } - + /* + * That's it : the central polling loop. Run until we stop. + */ + run_poll_loop(); /* Free all Hash Keys and all Hash elements */ appsession_cleanup();