diff --git a/doc/configuration.txt b/doc/configuration.txt index 4b0074d08..9a064160b 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -4219,7 +4219,10 @@ tune.h2.be.glitches-threshold zero value here should probably be in the hundreds or thousands to be effective without affecting slightly bogus servers. It is also possible to only kill connections when the CPU usage crosses a certain level, by using - "tune.glitches.kill.cpu-usage". + "tune.glitches.kill.cpu-usage". Note that a graceful close is attempted at + 75% of the configured threshold by advertising a GOAWAY for a future stream. + This ensures that a slightly faulty connection will stop being used after + some time without risking to interrupt ongoing transfers. See also: tune.h2.fe.glitches-threshold, bc_glitches, and tune.glitches.kill.cpu-usage @@ -4276,7 +4279,11 @@ tune.h2.fe.glitches-threshold zero value here should probably be in the hundreds or thousands to be effective without affecting slightly bogus clients. It is also possible to only kill connections when the CPU usage crosses a certain level, by using - "tune.glitches.kill.cpu-usage". + "tune.glitches.kill.cpu-usage". Note that a graceful close is attempted at + 75% of the configured threshold by advertising a GOAWAY for a future stream. + This ensures that a slightly non-compliant client will have the opportunity + to create a new connection and continue to work unaffected without ever + triggering the hard close thus risking to interrupt ongoing transfers. See also: tune.h2.be.glitches-threshold, fc_glitches, and tune.glitches.kill.cpu-usage diff --git a/src/mux_h2.c b/src/mux_h2.c index 9c741b43b..9df9f2399 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -533,6 +533,7 @@ struct task *h2_timeout_task(struct task *t, void *context, unsigned int state); static int h2_send(struct h2c *h2c); static int h2_recv(struct h2c *h2c); static int h2_process(struct h2c *h2c); +static int h2c_send_goaway_error(struct h2c *h2c, struct h2s *h2s); /* h2_io_cb is exported to see it resolved in "show fd" */ struct task *h2_io_cb(struct task *t, void *ctx, unsigned int state); static inline struct h2s *h2c_st_by_id(struct h2c *h2c, int id); @@ -1709,10 +1710,25 @@ static inline int _h2c_report_glitch(struct h2c *h2c, int increment) h2_be_glitches_threshold : h2_fe_glitches_threshold; h2c->glitches += increment; - if (thres && h2c->glitches >= thres && - (th_ctx->idle_pct <= global.tune.glitch_kill_maxidle)) { - h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); - return 1; + if (unlikely(thres && h2c->glitches >= (thres * 3 + 1) / 4)) { + /* at 75% of the threshold, we switch to close mode + * to force clients to periodically reconnect. + */ + if (h2c->last_sid <= 0 || + h2c->last_sid > h2c->max_id + 2 * h2c_max_concurrent_streams(h2c)) { + /* not set yet or was too high */ + h2c->last_sid = h2c->max_id + 2 * h2c_max_concurrent_streams(h2c); + h2c_send_goaway_error(h2c, NULL); + } + + /* at 100% of the threshold and excess of CPU usage we also + * actively kill the connection. + */ + if (h2c->glitches >= thres && + (th_ctx->idle_pct <= global.tune.glitch_kill_maxidle)) { + h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); + return 1; + } } return 0; }