mirror of
http://git.haproxy.org/git/haproxy.git
synced 2026-02-11 14:22:49 +02:00
By default, pools of comparable sizes are merged together. However, the
current algorithm is dumb: it rounds the requested size to the next
multiple of 16 and compares the sizes like this. This results in many
entries which are already multiples of 16 not being merged, for example
1024 and 1032 are separate, 65536 and 65540 are separate, 48 and 56 are
separate (though 56 merges with 64).
This commit changes this to consider not just the entry size but also the
average entry size, that is, it compares the average size of all objects
sharing the pool with the size of the object looking for a pool. If the
object is not more than 1% bigger nor smaller than the current average
size or if it neither 16 bytes smaller nor larger, then it can be merged.
Also, it always respects exact matches in order to avoid merging objects
into larger pools or worse, extending existing ones for no reason, and
when there's a tie, it always avoids extending an existing pool.
Also, we now visit all existing pools in order to spot the best one, we
do not stop anymore at the smallest one large enough. Theoretically this
could cost a bit of CPU but in practice it's O(N^2) with N quite small
(typically in the order of 100) and the cost at each step is very low
(compare a few integer values). But as a side effect, pools are no
longer sorted by size, "show pools bysize" is needed for this.
This causes the objects to be much better grouped together, accepting to
use a little bit more sometimes to avoid fragmentation, without causing
everyone to be merged into the same pool. Thanks to this we're now
seeing 36 pools instead of 48 by default, with some very nice examples
of compact grouping:
- Pool qc_stream_r (80 bytes) : 13 users
> qc_stream_r : size=72 flags=0x1 align=0
> quic_cstrea : size=80 flags=0x1 align=0
> qc_stream_a : size=64 flags=0x1 align=0
> hlua_esub : size=64 flags=0x1 align=0
> stconn : size=80 flags=0x1 align=0
> dns_query : size=64 flags=0x1 align=0
> vars : size=80 flags=0x1 align=0
> filter : size=64 flags=0x1 align=0
> session pri : size=64 flags=0x1 align=0
> fcgi_hdr_ru : size=72 flags=0x1 align=0
> fcgi_param_ : size=72 flags=0x1 align=0
> pendconn : size=80 flags=0x1 align=0
> capture : size=64 flags=0x1 align=0
- Pool h3s (56 bytes) : 17 users
> h3s : size=56 flags=0x1 align=0
> qf_crypto : size=48 flags=0x1 align=0
> quic_tls_se : size=48 flags=0x1 align=0
> quic_arng : size=56 flags=0x1 align=0
> hlua_flt_ct : size=56 flags=0x1 align=0
> promex_metr : size=48 flags=0x1 align=0
> conn_hash_n : size=56 flags=0x1 align=0
> resolv_requ : size=48 flags=0x1 align=0
> mux_pt : size=40 flags=0x1 align=0
> comp_state : size=40 flags=0x1 align=0
> notificatio : size=48 flags=0x1 align=0
> tasklet : size=56 flags=0x1 align=0
> bwlim_state : size=48 flags=0x1 align=0
> xprt_handsh : size=48 flags=0x1 align=0
> email_alert : size=56 flags=0x1 align=0
> caphdr : size=41 flags=0x1 align=0
> caphdr : size=41 flags=0x1 align=0
- Pool quic_cids (32 bytes) : 13 users
> quic_cids : size=16 flags=0x1 align=0
> quic_tls_ke : size=32 flags=0x1 align=0
> quic_tls_iv : size=12 flags=0x1 align=0
> cbuf : size=32 flags=0x1 align=0
> hlua_queuew : size=24 flags=0x1 align=0
> hlua_queue : size=24 flags=0x1 align=0
> promex_modu : size=24 flags=0x1 align=0
> cache_st : size=24 flags=0x1 align=0
> spoe_appctx : size=32 flags=0x1 align=0
> ehdl_sub_tc : size=32 flags=0x1 align=0
> fcgi_flt_ct : size=16 flags=0x1 align=0
> sig_handler : size=32 flags=0x1 align=0
> pipe : size=24 flags=0x1 align=0
- Pool quic_crypto (1032 bytes) : 2 users
> quic_crypto : size=1032 flags=0x1 align=0
> requri : size=1024 flags=0x1 align=0
- Pool quic_conn_r (65544 bytes) : 2 users
> quic_conn_r : size=65536 flags=0x1 align=0
> dns_msg_buf : size=65540 flags=0x1 align=0
On a very unscientific test consisting in sending 1 million H1 requests
and 1 million H2 requests to the stats page, we're seeing an ~6% lower
memory usage with the patch:
before the patch:
Total: 48 pools, 4120832 bytes allocated, 4120832 used (~3555680 by thread caches).
after the patch:
Total: 36 pools, 3880648 bytes allocated, 3880648 used (~3299064 by thread caches).
This should be taken with care however since pools allocate and release
in batches.
163 lines
6.4 KiB
C
163 lines
6.4 KiB
C
/*
|
|
* include/haproxy/pool-t.h
|
|
* Memory pools configuration and type definitions.
|
|
*
|
|
* Copyright (C) 2000-2020 Willy Tarreau - w@1wt.eu
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, version 2.1
|
|
* exclusively.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef _HAPROXY_POOL_T_H
|
|
#define _HAPROXY_POOL_T_H
|
|
|
|
#include <haproxy/api-t.h>
|
|
#include <haproxy/list-t.h>
|
|
|
|
#define MEM_F_SHARED 0x1
|
|
#define MEM_F_EXACT 0x2
|
|
|
|
/* A special pointer for the pool's free_list that indicates someone is
|
|
* currently manipulating it. Serves as a short-lived lock.
|
|
*/
|
|
#define POOL_BUSY ((void *)1)
|
|
|
|
#define POOL_AVG_SAMPLES 1024
|
|
|
|
/* possible flags for __pool_alloc() */
|
|
#define POOL_F_NO_POISON 0x00000001 // do not poison the area
|
|
#define POOL_F_MUST_ZERO 0x00000002 // zero the returned area
|
|
#define POOL_F_NO_FAIL 0x00000004 // do not randomly fail
|
|
|
|
/* pool debugging flags */
|
|
#define POOL_DBG_FAIL_ALLOC 0x00000001 // randomly fail memory allocations
|
|
#define POOL_DBG_DONT_MERGE 0x00000002 // do not merge same-size pools
|
|
#define POOL_DBG_COLD_FIRST 0x00000004 // pick cold objects first
|
|
#define POOL_DBG_INTEGRITY 0x00000008 // perform integrity checks on cache
|
|
#define POOL_DBG_NO_GLOBAL 0x00000010 // disable global pools
|
|
#define POOL_DBG_NO_CACHE 0x00000020 // disable thread-local pool caches
|
|
#define POOL_DBG_CALLER 0x00000040 // trace last caller's location
|
|
#define POOL_DBG_TAG 0x00000080 // place a tag at the end of the area
|
|
#define POOL_DBG_POISON 0x00000100 // poison memory area on pool_alloc()
|
|
#define POOL_DBG_UAF 0x00000200 // enable use-after-free protection
|
|
|
|
|
|
/* This is the head of a thread-local cache */
|
|
struct pool_cache_head {
|
|
struct list list; /* head of objects in this pool */
|
|
unsigned int count; /* number of objects in this pool */
|
|
unsigned int tid; /* thread id, for debugging only */
|
|
struct pool_head *pool; /* assigned pool, for debugging only */
|
|
ulong fill_pattern; /* pattern used to fill the area on free */
|
|
} THREAD_ALIGNED(64);
|
|
|
|
/* This describes a pool registration, which is what was passed to
|
|
* create_pool() and that might have been merged with an existing pool.
|
|
*/
|
|
struct pool_registration {
|
|
struct list list; /* link element */
|
|
char name[12]; /* name of the pool */
|
|
unsigned int size; /* expected object size */
|
|
unsigned int flags; /* MEM_F_* */
|
|
unsigned int align; /* expected alignment; 0=unspecified */
|
|
};
|
|
|
|
/* This represents one item stored in the thread-local cache. <by_pool> links
|
|
* the object to the list of objects in the pool, and <by_lru> links the object
|
|
* to the local thread's list of hottest objects. This way it's possible to
|
|
* allocate a fresh object from the cache, or to release cold objects from any
|
|
* pool (no bookkeeping is needed since shared pools do not know how many
|
|
* objects they store).
|
|
*/
|
|
struct pool_cache_item {
|
|
struct list by_pool; /* link to objects in this pool */
|
|
struct list by_lru; /* link to objects by LRU order */
|
|
};
|
|
|
|
/* This structure is used to represent an element in the pool's shared
|
|
* free_list. An item may carry a series of other items allocated or released
|
|
* as a same cluster. The storage then looks like this:
|
|
* +------+ +------+ +------+
|
|
* -->| next |-->| next |-->| NULL |
|
|
* +------+ +------+ +------+
|
|
* | NULL | | down | | down |
|
|
* +------+ +--|---+ +--|---+
|
|
* V V
|
|
* +------+ +------+
|
|
* | NULL | | NULL |
|
|
* +------+ +------+
|
|
* | down | | NULL |
|
|
* +--|---+ +------+
|
|
* V
|
|
* +------+
|
|
* | NULL |
|
|
* +------+
|
|
* | NULL |
|
|
* +------+
|
|
*/
|
|
struct pool_item {
|
|
struct pool_item *next;
|
|
struct pool_item *down; // link to other items of the same cluster
|
|
};
|
|
|
|
/* This describes a complete pool, with its status, usage statistics and the
|
|
* thread-local caches if any. Even if pools are disabled, these descriptors
|
|
* are valid and are used at least to get names and sizes. For small builds
|
|
* using neither threads nor pools, this structure might be reduced, and
|
|
* alignment could be removed.
|
|
*/
|
|
struct pool_head {
|
|
/* read-mostly part, purely configuration */
|
|
unsigned int limit; /* hard limit on the number of chunks */
|
|
unsigned int minavail; /* how many chunks are expected to be used */
|
|
unsigned int size; /* chunk size */
|
|
unsigned int flags; /* MEM_F_* */
|
|
unsigned int users; /* number of pools sharing this zone */
|
|
unsigned int alloc_sz; /* allocated size (includes hidden fields) */
|
|
unsigned int sum_size; /* sum of all registered users' size */
|
|
struct list list; /* list of all known pools */
|
|
void *base_addr; /* allocation address, for free() */
|
|
char name[12]; /* name of the pool */
|
|
struct list regs; /* registrations: alt names for this pool */
|
|
|
|
/* heavily read-write part */
|
|
THREAD_ALIGN(64);
|
|
|
|
/* these entries depend on the pointer value, they're used to reduce
|
|
* the contention on fast-changing values. The alignment here is
|
|
* important since the purpose is to lower the thread contention.
|
|
* The free_list and used/allocated are not related, the array is
|
|
* just meant to shard elements and there are no per-free_list stats.
|
|
*/
|
|
struct {
|
|
THREAD_ALIGN(64);
|
|
struct pool_item *free_list; /* list of free shared objects */
|
|
unsigned int allocated; /* how many chunks have been allocated */
|
|
unsigned int used; /* how many chunks are currently in use */
|
|
unsigned int needed_avg;/* floating indicator between used and allocated */
|
|
unsigned int failed; /* failed allocations (indexed by hash of TID) */
|
|
} buckets[CONFIG_HAP_POOL_BUCKETS];
|
|
|
|
struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */
|
|
} __attribute__((aligned(64)));
|
|
|
|
#endif /* _HAPROXY_POOL_T_H */
|
|
|
|
/*
|
|
* Local variables:
|
|
* c-indent-level: 8
|
|
* c-basic-offset: 8
|
|
* End:
|
|
*/
|