From d865e09d9d62073ff6d723a7a48058dd04784ae4 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Fri, 23 Jun 2023 19:16:05 +0200
Subject: [PATCH 01/22] core: Change filter input/output pending API to return
OK/AGAIN/DONE.
Rename the ap_filter_input_pending() and ap_filter_output_pending() core
hooks to ap_core_input_pending() and ap_core_output_pending() respectively.
Change them to return AGAIN when some data are pending.
Add ap_check_input_pending() and ap_check_output_pending() to run the hooks and
fix the return value (DECLINED -> OK, c->aborted -> DONE).
Adapt the callers to the new API (DECLINED -> OK, OK -> AGAIN, DONE).
---
include/ap_mmn.h | 4 ++-
include/httpd.h | 2 ++
include/mpm_common.h | 18 -------------
include/util_filter.h | 48 ++++++++++++++++++++++++++++-------
modules/http/http_request.c | 2 +-
modules/proxy/proxy_util.c | 10 ++++----
server/core.c | 4 +--
server/mpm/event/event.c | 12 ++++-----
server/mpm/motorz/motorz.c | 24 ++++++++++--------
server/mpm/simple/simple_io.c | 23 ++++++++++-------
server/util_filter.c | 41 ++++++++++++++++++++++--------
11 files changed, 117 insertions(+), 71 deletions(-)
diff --git a/include/ap_mmn.h b/include/ap_mmn.h
index ad4b77bd33a..acfa61e22b5 100644
--- a/include/ap_mmn.h
+++ b/include/ap_mmn.h
@@ -731,6 +731,8 @@
* and AP_REQUEST_TRUSTED_CT BNOTE.
* 20211221.24 (2.5.1-dev) Add ap_proxy_fixup_uds_filename()
* 20211221.25 (2.5.1-dev) AP_SLASHES and AP_IS_SLASH
+ * 20211221.26 (2.5.1-dev) Add AGAIN, ap_check_input_pending() and
+ * ap_check_output_pending()
*/
#define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */
@@ -738,7 +740,7 @@
#ifndef MODULE_MAGIC_NUMBER_MAJOR
#define MODULE_MAGIC_NUMBER_MAJOR 20211221
#endif
-#define MODULE_MAGIC_NUMBER_MINOR 25 /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 26 /* 0...n */
/**
* Determine if the server's current MODULE_MAGIC_NUMBER is at least a
diff --git a/include/httpd.h b/include/httpd.h
index c02b3b7849b..c3f72fceb7e 100644
--- a/include/httpd.h
+++ b/include/httpd.h
@@ -465,6 +465,8 @@ AP_DECLARE(const char *) ap_get_server_built(void);
*/
#define SUSPENDED -3 /**< Module will handle the remainder of the request.
* The core will never invoke the request again */
+#define AGAIN -4 /**< Module wants to be called again when
+ * more data is availble */
/** Returned by the bottom-most filter if no data was written.
* @see ap_pass_brigade(). */
diff --git a/include/mpm_common.h b/include/mpm_common.h
index 334624ee065..34c61e2a6c2 100644
--- a/include/mpm_common.h
+++ b/include/mpm_common.h
@@ -450,24 +450,6 @@ AP_DECLARE_HOOK(apr_status_t, mpm_resume_suspended, (conn_rec*))
*/
AP_DECLARE_HOOK(const char *,mpm_get_name,(void))
-/**
- * Hook called to determine whether we should stay within the write completion
- * phase.
- * @param c The current connection
- * @return OK if write completion should continue, DECLINED if write completion
- * should end gracefully, or a positive error if we should begin to linger.
- * @ingroup hooks
- */
-AP_DECLARE_HOOK(int, output_pending, (conn_rec *c))
-
-/**
- * Hook called to determine whether any data is pending in the input filters.
- * @param c The current connection
- * @return OK if we can read without blocking, DECLINED if a read would block.
- * @ingroup hooks
- */
-AP_DECLARE_HOOK(int, input_pending, (conn_rec *c))
-
/**
* Notification that connection handling is suspending (disassociating from the
* current thread)
diff --git a/include/util_filter.h b/include/util_filter.h
index a03e81c16ca..6a21c486810 100644
--- a/include/util_filter.h
+++ b/include/util_filter.h
@@ -645,16 +645,15 @@ AP_DECLARE(void) ap_filter_adopt_brigade(ap_filter_t *f,
AP_DECLARE(int) ap_filter_should_yield(ap_filter_t *f);
/**
- * This function determines whether there is unwritten data in the output
+ * This function determines whether there is pending data in the output
* filters, and if so, attempts to make a single write to each filter
- * with unwritten data.
+ * with pending data.
*
* @param c The connection.
- * @return If no unwritten data remains, this function returns DECLINED.
- * If some unwritten data remains, this function returns OK. If any
- * attempt to write data failed, this functions returns a positive integer.
+ * @return OK if no pending data remain, AGAIN if some remain, DONE
+ * if the connection is aborted, anything else on error.
*/
-AP_DECLARE_NONSTD(int) ap_filter_output_pending(conn_rec *c);
+AP_DECLARE(int) ap_check_output_pending(conn_rec *c);
/**
* This function determines whether there is pending data in the input
@@ -662,10 +661,41 @@ AP_DECLARE_NONSTD(int) ap_filter_output_pending(conn_rec *c);
* socket but not yet returned to the application.
*
* @param c The connection.
- * @return If no pending data remains, this function returns DECLINED.
- * If some pending data remains, this function returns OK.
+ * @return OK if no pending data remain, AGAIN if some remain, DONE
+ * if the connection is aborted, anything else on error.
*/
-AP_DECLARE_NONSTD(int) ap_filter_input_pending(conn_rec *c);
+AP_DECLARE(int) ap_check_input_pending(conn_rec *c);
+
+/**
+ * Hook called to determine whether we should stay within the write completion
+ * phase.
+ * @param c The current connection
+ * @return OK if we can write without blocking, AGAIN if a write would block,
+ * DECLINED to let the next hook decide, DONE if the connection is aborted,
+ * anything else on error.
+ * @ingroup hooks
+ */
+AP_DECLARE_HOOK(int, output_pending, (conn_rec *c))
+
+/**
+ * Hook called to determine whether any data is pending in the input filters.
+ * @param c The current connection
+ * @return OK if we can read without blocking, AGAIN if a read would block,
+ * DECLINED to let the next hook decide, DONE if the connection is aborted,
+ * anything else on error.
+ * @ingroup hooks
+ */
+AP_DECLARE_HOOK(int, input_pending, (conn_rec *c))
+
+/**
+ * The core output_pending hook.
+ */
+AP_DECLARE_NONSTD(int) ap_core_output_pending(conn_rec *c);
+
+/**
+ * The core input_pending hook.
+ */
+AP_DECLARE_NONSTD(int) ap_core_input_pending(conn_rec *c);
/**
* Flush function for apr_brigade_* calls. This calls ap_pass_brigade
diff --git a/modules/http/http_request.c b/modules/http/http_request.c
index 65c389125a7..77bf63edc5f 100644
--- a/modules/http/http_request.c
+++ b/modules/http/http_request.c
@@ -484,7 +484,7 @@ AP_DECLARE(void) ap_process_request(request_rec *r)
ap_process_async_request(r);
- if (ap_run_input_pending(c) != OK) {
+ if (ap_check_input_pending(c) != AGAIN) {
bb = ap_acquire_brigade(c);
b = apr_bucket_flush_create(c->bucket_alloc);
APR_BRIGADE_INSERT_HEAD(bb, b);
diff --git a/modules/proxy/proxy_util.c b/modules/proxy/proxy_util.c
index cbc31104c37..88d174220d8 100644
--- a/modules/proxy/proxy_util.c
+++ b/modules/proxy/proxy_util.c
@@ -5888,12 +5888,12 @@ PROXY_DECLARE(int) ap_proxy_tunnel_run(proxy_tunnel_rec *tunnel)
"proxy: %s: %s output ready",
scheme, out->name);
- rc = ap_filter_output_pending(out->c);
- if (rc == OK) {
- /* Keep polling out (only) */
+ rc = ap_check_output_pending(out->c);
+ if (rc == AGAIN) {
+ /* Keep polling (OUT only) */
continue;
}
- if (rc != DECLINED) {
+ if (rc != OK) {
/* Real failure, bail out */
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10221)
"proxy: %s: %s flushing failed (%i)",
@@ -5923,7 +5923,7 @@ PROXY_DECLARE(int) ap_proxy_tunnel_run(proxy_tunnel_rec *tunnel)
/* Flush any pending input data now, we don't know when
* the next POLLIN will trigger and retaining data might
* deadlock the underlying protocol. We don't check for
- * pending data first with ap_filter_input_pending() since
+ * pending data first with ap_check_input_pending() since
* the read from proxy_tunnel_transfer() is nonblocking
* anyway and returning OK if there's no data.
*/
diff --git a/server/core.c b/server/core.c
index 9f92981ef0d..4d5d569d93b 100644
--- a/server/core.c
+++ b/server/core.c
@@ -6150,9 +6150,9 @@ static void register_hooks(apr_pool_t *p)
ap_hook_get_pollfd_from_conn(core_get_pollfd_from_conn, NULL, NULL,
APR_HOOK_REALLY_LAST);
- ap_hook_input_pending(ap_filter_input_pending, NULL, NULL,
+ ap_hook_input_pending(ap_core_input_pending, NULL, NULL,
APR_HOOK_MIDDLE);
- ap_hook_output_pending(ap_filter_output_pending, NULL, NULL,
+ ap_hook_output_pending(ap_core_output_pending, NULL, NULL,
APR_HOOK_MIDDLE);
/* register the core's insert_filter hook and register core-provided
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 403f9a3c939..ab49f34cf44 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -1233,7 +1233,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
}
if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
- int pending = DECLINED;
+ int pending = OK;
/* Flush all pending outputs before going to CONN_STATE_KEEPALIVE or
* straight to CONN_STATE_PROCESSING if inputs are pending already.
@@ -1243,12 +1243,12 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
if (from_wc_q) {
from_wc_q = 0; /* one shot */
- pending = ap_run_output_pending(c);
+ pending = ap_check_output_pending(c);
}
else if (ap_filter_should_yield(c->output_filters)) {
- pending = OK;
+ pending = AGAIN;
}
- if (pending == OK) {
+ if (pending == AGAIN) {
/* Let the event thread poll for write */
cs->queue_timestamp = apr_time_now();
notify_suspend(cs);
@@ -1274,11 +1274,11 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
}
return;
}
- if (pending != DECLINED || c->aborted || c->keepalive != AP_CONN_KEEPALIVE) {
+ if (pending != OK || c->aborted || c->keepalive != AP_CONN_KEEPALIVE) {
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
- if (ap_run_input_pending(c) == OK) {
+ if (ap_check_input_pending(c) == AGAIN) {
goto process_connection;
}
if (listener_may_exit) {
diff --git a/server/mpm/motorz/motorz.c b/server/mpm/motorz/motorz.c
index 8feff2965c2..7026d08cd6e 100644
--- a/server/mpm/motorz/motorz.c
+++ b/server/mpm/motorz/motorz.c
@@ -408,8 +408,8 @@ static apr_status_t motorz_io_process(motorz_conn_t *scon)
ap_update_child_status(scon->sbh, SERVER_BUSY_WRITE, NULL);
- pending = ap_run_output_pending(c);
- if (pending == OK) {
+ pending = ap_check_output_pending(c);
+ if (pending == AGAIN) {
/* Still in WRITE_COMPLETION_STATE:
* Set a write timeout for this connection, and let the
* event thread poll for writeability.
@@ -432,18 +432,22 @@ static apr_status_t motorz_io_process(motorz_conn_t *scon)
}
return APR_SUCCESS;
}
- if (pending != DECLINED
- || c->keepalive != AP_CONN_KEEPALIVE
- || c->aborted) {
- scon->cs.state = CONN_STATE_LINGER;
+ if (c->keepalive != AP_CONN_KEEPALIVE) {
+ pending = DONE;
}
- else if (ap_run_input_pending(c) == OK) {
- scon->cs.state = CONN_STATE_PROCESSING;
- goto read_request;
+ else if (pending == OK) {
+ pending = ap_check_input_pending(c);
+ if (pending == AGAIN) {
+ scon->cs.state = CONN_STATE_PROCESSING;
+ goto read_request;
+ }
}
- else {
+ if (pending == OK) {
scon->cs.state = CONN_STATE_KEEPALIVE;
}
+ else {
+ scon->cs.state = CONN_STATE_LINGER;
+ }
}
if (scon->cs.state == CONN_STATE_LINGER) {
diff --git a/server/mpm/simple/simple_io.c b/server/mpm/simple/simple_io.c
index fb509ed756a..36c5ad87956 100644
--- a/server/mpm/simple/simple_io.c
+++ b/server/mpm/simple/simple_io.c
@@ -96,8 +96,8 @@ static apr_status_t simple_io_process(simple_conn_t * scon)
int pending;
ap_update_child_status(c->sbh, SERVER_BUSY_WRITE, NULL);
- pending = ap_run_output_pending(c);
- if (pending == OK) {
+ pending = ap_check_output_pending(c);
+ if (pending == AGAIN) {
/* Still in WRITE_COMPLETION_STATE:
* Set a write timeout for this connection, and let the
* event thread poll for writeability.
@@ -126,17 +126,22 @@ static apr_status_t simple_io_process(simple_conn_t * scon)
}
return APR_SUCCESS;
}
- if (pending != DECLINED
- || c->keepalive != AP_CONN_KEEPALIVE
- || c->aborted) {
- scon->cs.state = CONN_STATE_LINGER;
+ if (c->keepalive != AP_CONN_KEEPALIVE) {
+ pending = DONE;
}
- else if (ap_run_input_pending(c) == OK) {
- scon->cs.state = CONN_STATE_PROCESSING;
+ else if (pending == OK) {
+ pending = ap_check_input_pending(c);
+ if (pending == AGAIN) {
+ scon->cs.state = CONN_STATE_PROCESSING;
+ continue;
+ }
}
- else {
+ if (pending == OK) {
scon->cs.state = CONN_STATE_KEEPALIVE;
}
+ else {
+ scon->cs.state = CONN_STATE_LINGER;
+ }
}
if (scon->cs.state == CONN_STATE_LINGER) {
diff --git a/server/util_filter.c b/server/util_filter.c
index 3b1e96fb447..d8dc103d80f 100644
--- a/server/util_filter.c
+++ b/server/util_filter.c
@@ -393,7 +393,7 @@ static apr_status_t request_filter_cleanup(void *arg)
/* A request filter is cleaned up with an EOR bucket, so possibly
* while it is handling/passing the EOR, and we want each filter or
- * ap_filter_output_pending() to be able to dereference f until they
+ * ap_check_output_pending() to be able to dereference f until they
* return. So request filters are recycled in dead_filters and will only
* be moved to spare_filters when recycle_dead_filters() is called, i.e.
* in ap_filter_{in,out}put_pending(). Set f->r to NULL still for any use
@@ -978,7 +978,7 @@ AP_DECLARE(apr_status_t) ap_filter_setaside_brigade(ap_filter_t *f,
e = next) {
next = APR_BUCKET_NEXT(e);
- /* WC buckets will be added back by ap_filter_output_pending()
+ /* WC buckets will be added back by ap_check_output_pending()
* at the tail.
*/
if (AP_BUCKET_IS_WC(e)) {
@@ -1267,7 +1267,7 @@ AP_DECLARE(int) ap_filter_should_yield(ap_filter_t *f)
return 0;
}
-AP_DECLARE_NONSTD(int) ap_filter_output_pending(conn_rec *c)
+AP_DECLARE_NONSTD(int) ap_core_output_pending(conn_rec *c)
{
struct ap_filter_conn_ctx *x = c->filter_conn_ctx;
struct ap_filter_private *fp, *prev;
@@ -1312,7 +1312,7 @@ AP_DECLARE_NONSTD(int) ap_filter_output_pending(conn_rec *c)
}
if (ap_filter_should_yield(f)) {
- rc = OK;
+ rc = AGAIN;
break;
}
}
@@ -1320,15 +1320,26 @@ AP_DECLARE_NONSTD(int) ap_filter_output_pending(conn_rec *c)
ap_release_brigade(c, bb);
cleanup:
- /* All filters have returned, time to recycle/unleak ap_filter_t-s
+ /* All filters have returned, time to recycle/unleak dead filters
* before leaving (i.e. make them reusable).
*/
recycle_dead_filters(c);
return rc;
}
+AP_DECLARE(int) ap_check_output_pending(conn_rec *c)
+{
+ int rc = ap_run_output_pending(c);
+ if (rc == DECLINED) {
+ rc = OK;
+ }
+ if (rc == OK && c->aborted) {
+ rc = DONE;
+ }
+ return rc;
+}
-AP_DECLARE_NONSTD(int) ap_filter_input_pending(conn_rec *c)
+AP_DECLARE_NONSTD(int) ap_core_input_pending(conn_rec *c)
{
struct ap_filter_conn_ctx *x = c->filter_conn_ctx;
struct ap_filter_private *fp;
@@ -1349,21 +1360,31 @@ AP_DECLARE_NONSTD(int) ap_filter_input_pending(conn_rec *c)
*/
AP_DEBUG_ASSERT(fp->bb);
e = APR_BRIGADE_FIRST(fp->bb);
- if (e != APR_BRIGADE_SENTINEL(fp->bb)
- && e->length != (apr_size_t)(-1)) {
- rc = OK;
+ if (e != APR_BRIGADE_SENTINEL(fp->bb) && e->length != (apr_size_t)-1) {
+ rc = AGAIN;
break;
}
}
cleanup:
- /* All filters have returned, time to recycle/unleak ap_filter_t-s
+ /* All filters have returned, time to recycle/unleak dead filters
* before leaving (i.e. make them reusable).
*/
recycle_dead_filters(c);
return rc;
}
+AP_DECLARE(int) ap_check_input_pending(conn_rec *c)
+{
+ int rc = ap_run_input_pending(c);
+ if (rc == DECLINED) {
+ rc = OK;
+ }
+ if (rc == OK && c->aborted) {
+ rc = DONE;
+ }
+ return rc;
+}
AP_DECLARE_NONSTD(apr_status_t) ap_filter_flush(apr_bucket_brigade *bb,
void *ctx)
From a92dd49afaeb3cbaec448c17cf8ab0eee18f8e65 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 11 Jul 2023 20:26:41 +0200
Subject: [PATCH 02/22] mpm_event,listen: Improve/fix children maintenance when
num_buckets > 1.
---
server/listen.c | 47 +++++-----
server/mpm/event/event.c | 184 +++++++++++++++++++--------------------
2 files changed, 113 insertions(+), 118 deletions(-)
diff --git a/server/listen.c b/server/listen.c
index 3aed907e21b..f32826e4774 100644
--- a/server/listen.c
+++ b/server/listen.c
@@ -333,9 +333,7 @@ static apr_status_t alloc_systemd_listener(process_rec * process,
si.type = SOCK_STREAM;
si.protocol = APR_PROTO_TCP;
- rec = apr_palloc(process->pool, sizeof(ap_listen_rec));
- rec->active = 0;
- rec->next = 0;
+ rec = apr_pcalloc(process->pool, sizeof(ap_listen_rec));
rv = apr_os_sock_make(&rec->sd, &si, process->pool);
if (rv != APR_SUCCESS) {
@@ -462,8 +460,8 @@ static const char *alloc_listener(process_rec *process, const char *addr,
apr_pool_t *temp_pool, apr_uint32_t flags)
{
ap_listen_rec *last;
+ apr_sockaddr_t *sa, *next_sa;
apr_status_t status;
- apr_sockaddr_t *sa;
/* see if we've got a listener for this address:port, which is an error */
if (find_listeners(&ap_listeners, NULL, addr, port, scope_id, temp_pool)) {
@@ -494,22 +492,23 @@ static const char *alloc_listener(process_rec *process, const char *addr,
last = last->next;
}
- while (sa) {
+ for (; sa; sa = next_sa) {
ap_listen_rec *new;
+ /* Each listener has its own (unlinked) address */
+ next_sa = sa->next;
+ sa->next = NULL;
+
/* this has to survive restarts */
new = apr_palloc(process->pool, sizeof(ap_listen_rec));
new->active = 0;
- new->next = 0;
+ new->next = NULL;
new->bind_addr = sa;
new->protocol = apr_pstrdup(process->pool, proto);
new->flags = flags;
- /* Go to the next sockaddr. */
- sa = sa->next;
-
status = apr_socket_create(&new->sd, new->bind_addr->family,
- SOCK_STREAM, 0, process->pool);
+ SOCK_STREAM, 0, process->pool);
#if APR_HAVE_IPV6
/* What could happen is that we got an IPv6 address, but this system
@@ -861,36 +860,36 @@ AP_DECLARE(apr_status_t) ap_duplicate_listeners(apr_pool_t *p, server_rec *s,
lr = ap_listeners;
while (lr) {
ap_listen_rec *duplr;
- char *hostname;
- apr_port_t port;
- apr_sockaddr_t *sa;
#ifdef HAVE_SYSTEMD
if (use_systemd) {
int thesock;
apr_os_sock_get(&thesock, lr->sd);
if ((stat = alloc_systemd_listener(s->process, thesock,
- lr->protocol, &duplr)) != APR_SUCCESS) {
+ lr->protocol, &duplr))) {
return stat;
}
}
else
#endif
{
- duplr = apr_palloc(p, sizeof(ap_listen_rec));
- duplr->slave = NULL;
+ duplr = apr_pcalloc(p, sizeof(ap_listen_rec));
duplr->protocol = apr_pstrdup(p, lr->protocol);
- hostname = apr_pstrdup(p, lr->bind_addr->hostname);
- port = lr->bind_addr->port;
- stat = apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p);
+ duplr->flags = lr->flags;
+#if APR_VERSION_AT_LEAST(1,6,0)
+ stat = apr_sockaddr_info_copy(&duplr->bind_addr,
+ lr->bind_addr, p);
+#else
+ stat = apr_sockaddr_info_get(&duplr->bind_addr,
+ lr->bind_addr->hostname,
+ lr->bind_addr->family,
+ lr->bind_addr->port, 0, p);
+#endif
if (stat != APR_SUCCESS) {
ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO(10397)
- "failure looking up %s to duplicate "
- "listening socket", hostname);
+ "failure duplicating address %pI for "
+ "listening socket", lr->bind_addr);
return stat;
}
- duplr->bind_addr = sa;
- duplr->next = NULL;
- duplr->flags = lr->flags;
stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family,
SOCK_STREAM, 0, p);
if (stat != APR_SUCCESS) {
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index ab49f34cf44..29a7b2bd345 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -436,13 +436,15 @@ typedef struct event_retained_data {
* Workers that still active, i.e. are not shutting down gracefully.
*/
int active_daemons;
+
/*
* idle_spawn_rate is the number of children that will be spawned on the
* next maintenance cycle if there aren't enough idle servers. It is
- * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
- * reset only when a cycle goes by without the need to spawn.
+ * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
+ * without the need to spawn.
*/
- int *idle_spawn_rate;
+ int idle_spawn_rate;
+ int max_spawn_rate, *free_slots;
int hold_off_on_exponential_spawning;
} event_retained_data;
static event_retained_data *retained;
@@ -450,7 +452,6 @@ static event_retained_data *retained;
#ifndef MAX_SPAWN_RATE
#define MAX_SPAWN_RATE 32
#endif
-static int max_spawn_rate_per_bucket = MAX_SPAWN_RATE / 1;
struct event_srv_cfg_s {
struct timeout_queue *io_q,
@@ -3144,9 +3145,9 @@ static void child_main(int child_num_arg, int child_bucket)
clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
}
-static int make_child(server_rec * s, int slot, int bucket)
+static int make_child(server_rec *s, int slot)
{
- int pid;
+ int pid, bucket = slot % retained->mpm->num_buckets;
if (slot + 1 > retained->max_daemon_used) {
retained->max_daemon_used = slot + 1;
@@ -3230,32 +3231,27 @@ static void startup_children(int number_to_start)
if (ap_scoreboard_image->parent[i].pid != 0) {
continue;
}
- if (make_child(ap_server_conf, i, i % retained->mpm->num_buckets) < 0) {
+ if (make_child(ap_server_conf, i) < 0) {
break;
}
--number_to_start;
}
}
-static void perform_idle_server_maintenance(int child_bucket,
- int *max_daemon_used)
+static void perform_idle_server_maintenance(void)
{
- int num_buckets = retained->mpm->num_buckets;
- int idle_thread_count = 0;
- process_score *ps;
- int free_length = 0;
- int free_slots[MAX_SPAWN_RATE];
+ volatile process_score *ps;
+ const int num_buckets = retained->mpm->num_buckets;
int last_non_dead = -1;
+ int free_length = 0, free_bucket = 0;
+ int max_daemon_used = 0;
+ int idle_thread_count = 0;
int active_thread_count = 0;
int i, j;
for (i = 0; i < server_limit; ++i) {
- if (num_buckets > 1 && (i % num_buckets) != child_bucket) {
- /* We only care about child_bucket in this call */
- continue;
- }
if (i >= retained->max_daemon_used &&
- free_length == retained->idle_spawn_rate[child_bucket]) {
+ free_length == retained->idle_spawn_rate) {
/* short cut if all active processes have been examined and
* enough empty scoreboard slots have been found
*/
@@ -3299,12 +3295,16 @@ static void perform_idle_server_maintenance(int child_bucket,
}
last_non_dead = i;
}
- else if (free_length < retained->idle_spawn_rate[child_bucket]) {
- free_slots[free_length++] = i;
+ else if (free_length < retained->idle_spawn_rate
+ && (i % num_buckets) == free_bucket) {
+ retained->free_slots[free_length++] = i;
+ if (++free_bucket == num_buckets) {
+ free_bucket = 0;
+ }
}
}
- if (*max_daemon_used < last_non_dead + 1) {
- *max_daemon_used = last_non_dead + 1;
+ if (max_daemon_used < last_non_dead + 1) {
+ max_daemon_used = last_non_dead + 1;
}
if (retained->sick_child_detected) {
@@ -3315,10 +3315,6 @@ static void perform_idle_server_maintenance(int child_bucket,
*/
retained->sick_child_detected = 0;
}
- else if (child_bucket < num_buckets - 1) {
- /* check for had_healthy_child up to the last child bucket */
- return;
- }
else {
/* looks like a basket case, as no child ever fully initialized; give up.
*/
@@ -3338,7 +3334,7 @@ static void perform_idle_server_maintenance(int child_bucket,
&& retained->total_daemons <= retained->max_daemon_used
&& retained->max_daemon_used <= server_limit);
- if (idle_thread_count > max_spare_threads / num_buckets) {
+ if (idle_thread_count > max_spare_threads) {
/*
* Child processes that we ask to shut down won't die immediately
* but may stay around for a long time when they finish their
@@ -3367,17 +3363,19 @@ static void perform_idle_server_maintenance(int child_bucket,
retained->total_daemons, retained->max_daemon_used,
server_limit, idle_thread_count, max_workers);
if (do_kill) {
- ap_mpm_podx_signal(retained->buckets[child_bucket].pod,
- AP_MPM_PODX_GRACEFUL);
+ for (i = 0; i < num_buckets; ++i) {
+ ap_mpm_podx_signal(retained->buckets[i].pod,
+ AP_MPM_PODX_GRACEFUL);
+ }
}
else {
/* Wait for dying daemon(s) to exit */
}
- retained->idle_spawn_rate[child_bucket] = 1;
+ retained->idle_spawn_rate = num_buckets;
}
- else if (idle_thread_count < min_spare_threads / num_buckets) {
- if (active_thread_count >= max_workers / num_buckets) {
- if (0 == idle_thread_count) {
+ else if (idle_thread_count < min_spare_threads) {
+ if (active_thread_count >= max_workers) {
+ if (0 == idle_thread_count) {
if (!retained->maxclients_reported) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
"server reached MaxRequestWorkers setting, "
@@ -3395,18 +3393,15 @@ static void perform_idle_server_maintenance(int child_bucket,
retained->near_maxclients_reported = 1;
}
}
- retained->idle_spawn_rate[child_bucket] = 1;
+ retained->idle_spawn_rate = num_buckets;
}
else if (free_length == 0) { /* scoreboard is full, can't fork */
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03490)
"scoreboard is full, not at MaxRequestWorkers."
"Increase ServerLimit.");
- retained->idle_spawn_rate[child_bucket] = 1;
+ retained->idle_spawn_rate = num_buckets;
}
else {
- if (free_length > retained->idle_spawn_rate[child_bucket]) {
- free_length = retained->idle_spawn_rate[child_bucket];
- }
if (free_length + retained->active_daemons > active_daemons_limit) {
if (retained->active_daemons < active_daemons_limit) {
free_length = active_daemons_limit - retained->active_daemons;
@@ -3418,14 +3413,13 @@ static void perform_idle_server_maintenance(int child_bucket,
"total %d/%d/%d, rate %d", free_length,
retained->active_daemons, active_daemons_limit,
retained->total_daemons, retained->max_daemon_used,
- server_limit, retained->idle_spawn_rate[child_bucket]);
+ server_limit, retained->idle_spawn_rate);
/* reset the spawning rate and prevent its growth below */
- retained->idle_spawn_rate[child_bucket] = 1;
- ++retained->hold_off_on_exponential_spawning;
+ retained->idle_spawn_rate = num_buckets;
free_length = 0;
}
}
- if (retained->idle_spawn_rate[child_bucket] >= 8) {
+ if (retained->idle_spawn_rate >= retained->max_spawn_rate / 4) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
"server seems busy, (you may need "
"to increase StartServers, ThreadsPerChild "
@@ -3436,13 +3430,14 @@ static void perform_idle_server_maintenance(int child_bucket,
idle_thread_count, retained->active_daemons,
retained->total_daemons);
}
+ free_length = (free_length / num_buckets) * num_buckets;
for (i = 0; i < free_length; ++i) {
- int slot = free_slots[i];
- if (make_child(ap_server_conf, slot, child_bucket) < 0) {
+ int slot = retained->free_slots[i];
+ if (make_child(ap_server_conf, slot) < 0) {
continue;
}
- if (*max_daemon_used < slot + 1) {
- *max_daemon_used = slot + 1;
+ if (max_daemon_used < slot + 1) {
+ max_daemon_used = slot + 1;
}
}
/* the next time around we want to spawn twice as many if this
@@ -3451,31 +3446,41 @@ static void perform_idle_server_maintenance(int child_bucket,
if (retained->hold_off_on_exponential_spawning) {
--retained->hold_off_on_exponential_spawning;
}
- else if (retained->idle_spawn_rate[child_bucket]
- < max_spawn_rate_per_bucket) {
- int new_rate = retained->idle_spawn_rate[child_bucket] * 2;
- if (new_rate > max_spawn_rate_per_bucket) {
- new_rate = max_spawn_rate_per_bucket;
+ else if (free_length && retained->idle_spawn_rate < retained->max_spawn_rate) {
+ int new_rate = retained->idle_spawn_rate * 2;
+ new_rate = ((new_rate + num_buckets - 1) / num_buckets) * num_buckets;
+ if (new_rate > retained->max_spawn_rate) {
+ new_rate = retained->max_spawn_rate;
}
- retained->idle_spawn_rate[child_bucket] = new_rate;
+ retained->idle_spawn_rate = new_rate;
}
}
}
else {
- retained->idle_spawn_rate[child_bucket] = 1;
+ retained->idle_spawn_rate = num_buckets;
+ }
+
+ retained->max_daemon_used = max_daemon_used;
+ if (APLOGdebug(ap_server_conf)) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "score: idlers:%d, "
+ "threads active:%d/%d max:%d, "
+ "daemons active:%d/%d max:%d used:%d/%d/%d",
+ idle_thread_count,
+ active_thread_count, retained->active_daemons * threads_per_child,
+ max_workers, retained->active_daemons, retained->total_daemons,
+ active_daemons_limit, max_daemon_used, retained->max_daemon_used,
+ server_limit);
}
}
static void server_main_loop(int remaining_children_to_start)
{
- int num_buckets = retained->mpm->num_buckets;
- int max_daemon_used = 0;
int successive_kills = 0;
int child_slot;
apr_exit_why_e exitwhy;
int status, processed_status;
apr_proc_t pid;
- int i;
while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
@@ -3520,14 +3525,13 @@ static void server_main_loop(int remaining_children_to_start)
if (processed_status == APEXIT_CHILDSICK) {
/* resource shortage, minimize the fork rate */
- retained->idle_spawn_rate[child_slot % num_buckets] = 1;
+ retained->idle_spawn_rate = retained->mpm->num_buckets;
}
else if (remaining_children_to_start) {
/* we're still doing a 1-for-1 replacement of dead
* children with new children
*/
- make_child(ap_server_conf, child_slot,
- child_slot % num_buckets);
+ make_child(ap_server_conf, child_slot);
--remaining_children_to_start;
}
}
@@ -3586,11 +3590,7 @@ static void server_main_loop(int remaining_children_to_start)
continue;
}
- max_daemon_used = 0;
- for (i = 0; i < num_buckets; i++) {
- perform_idle_server_maintenance(i, &max_daemon_used);
- }
- retained->max_daemon_used = max_daemon_used;
+ perform_idle_server_maintenance();
}
}
@@ -3680,35 +3680,36 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
retained->buckets[i].listeners = listen_buckets[i];
}
- if (retained->mpm->max_buckets < num_buckets) {
- int new_max, *new_ptr;
- new_max = retained->mpm->max_buckets * 2;
- if (new_max < num_buckets) {
- new_max = num_buckets;
+ /* If num_buckets changed, adjust max_spawn_rate and the free_slots buffer */
+ if (retained->mpm->num_buckets != num_buckets) {
+ if (retained->mpm->max_buckets < num_buckets) {
+ int new_max, new_slots;
+ new_max = retained->mpm->max_buckets * 2;
+ if (new_max < num_buckets) {
+ new_max = num_buckets;
+ }
+ else {
+ new_max = ((new_max + num_buckets - 1) / num_buckets) * num_buckets;
+ }
+ new_slots = ((MAX_SPAWN_RATE + new_max - 1) / new_max) * new_max;
+ retained->free_slots = apr_palloc(ap_pglobal, new_slots * sizeof(int));
+ retained->mpm->max_buckets = new_max;
}
- new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
- if (retained->mpm->num_buckets) /* idle_spawn_rate NULL at startup */
- memcpy(new_ptr, retained->idle_spawn_rate,
- retained->mpm->num_buckets * sizeof(int));
- retained->idle_spawn_rate = new_ptr;
- retained->mpm->max_buckets = new_max;
- }
- if (retained->mpm->num_buckets < num_buckets) {
- int rate_max = 1;
- /* If new buckets are added, set their idle spawn rate to
- * the highest so far, so that they get filled as quickly
- * as the existing ones.
+ /* We always spawn/kill children in a multiple of num_buckets (as needed),
+ * so align (round up) max_spawn_rate and idle_spawn_rate to num_buckets.
*/
- for (i = 0; i < retained->mpm->num_buckets; i++) {
- if (rate_max < retained->idle_spawn_rate[i]) {
- rate_max = retained->idle_spawn_rate[i];
- }
+ retained->max_spawn_rate = (((MAX_SPAWN_RATE + num_buckets - 1)
+ / num_buckets) * num_buckets);
+ retained->idle_spawn_rate = (((retained->idle_spawn_rate + num_buckets - 1)
+ / num_buckets) * num_buckets);
+ if (retained->idle_spawn_rate < num_buckets) {
+ retained->idle_spawn_rate = num_buckets;
}
- for (/* up to date i */; i < num_buckets; i++) {
- retained->idle_spawn_rate[i] = rate_max;
+ else if (retained->idle_spawn_rate > retained->max_spawn_rate) {
+ retained->idle_spawn_rate = retained->max_spawn_rate;
}
+ retained->mpm->num_buckets = num_buckets;
}
- retained->mpm->num_buckets = num_buckets;
/* Don't thrash since num_buckets depends on the
* system and the number of online CPU cores...
@@ -3728,11 +3729,6 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
- max_spawn_rate_per_bucket = (MAX_SPAWN_RATE + num_buckets - 1) / num_buckets;
- if (max_spawn_rate_per_bucket < 1) {
- max_spawn_rate_per_bucket = 1;
- }
-
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
* below (because we just sent them AP_SIG_GRACEFUL). This happens pretty
From b85d7387f59eff51261744997abf81b3cc7c9d55 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 3 Jun 2024 15:35:47 +0200
Subject: [PATCH 03/22] mpm_event: Add helpers, simplify code and improve
logging before functional changes.
---
modules/http/http_request.c | 3 +-
server/mpm/event/event.c | 1338 +++++++++++++++++++--------------
server/mpm/motorz/motorz.c | 14 +-
server/mpm/simple/simple_io.c | 10 +-
4 files changed, 787 insertions(+), 578 deletions(-)
diff --git a/modules/http/http_request.c b/modules/http/http_request.c
index 77bf63edc5f..c8f157eca80 100644
--- a/modules/http/http_request.c
+++ b/modules/http/http_request.c
@@ -431,7 +431,8 @@ void ap_process_async_request(request_rec *r)
const apr_array_header_t *t_h = apr_table_elts(r->headers_in);
const apr_table_entry_t *t_elt = (apr_table_entry_t *)t_h->elts;
ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r,
- "Headers received from client:");
+ "Header received from client:");
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, " %s", r->the_request);
for (i = 0; i < t_h->nelts; i++, t_elt++) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r, " %s: %s",
ap_escape_logitem(r->pool, t_elt->key),
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 29a7b2bd345..64ff1e30ead 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -149,7 +149,7 @@
#define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
#if !APR_VERSION_AT_LEAST(1,4,0)
-#define apr_time_from_msec(x) (x * 1000)
+#define apr_time_from_msec(x) ((x) * 1000)
#endif
#define CONN_STATE_IS_LINGERING_CLOSE(s) ((s) >= CONN_STATE_LINGER && \
@@ -159,6 +159,21 @@
#endif
#define SECONDS_TO_LINGER 2
+/* Don't wait more time in poll() if APR_POLLSET_WAKEABLE is not implemented */
+#define NON_WAKEABLE_TIMEOUT apr_time_from_msec(100)
+
+/* Prevent extra poll/wakeup calls for timeouts close in the future (queues
+ * have the granularity of a second anyway).
+ * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
+ */
+#define QUEUES_FUDGE_TIMEOUT apr_time_from_msec(100)
+
+/* Same goal as for QUEUES_FUDGE_TIMEOUT, but applied to timers.
+ * XXX: Since their timeouts are custom (user defined), we can't be too
+ * approximative here (using 5ms).
+ */
+#define TIMERS_FUDGE_TIMEOUT apr_time_from_msec(5)
+
/*
* Actual definitions of config globals
*/
@@ -178,26 +193,27 @@ static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChi
static int max_workers = 0; /* MaxRequestWorkers */
static int server_limit = 0; /* ServerLimit */
static int thread_limit = 0; /* ThreadLimit */
-static int had_healthy_child = 0;
+static int conns_this_child = 0; /* MaxConnectionsPerChild, only accessed
+ in listener thread */
static volatile int dying = 0;
static volatile int workers_may_exit = 0;
static volatile int start_thread_may_exit = 0;
static volatile int listener_may_exit = 0;
-static int listener_is_wakeable = 0; /* Pollset supports APR_POLLSET_WAKEABLE */
-static int num_listensocks = 0;
-static apr_int32_t conns_this_child; /* MaxConnectionsPerChild, only access
- in listener thread */
static apr_uint32_t connection_count = 0; /* Number of open connections */
static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */
static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
early during graceful termination */
+static int had_healthy_child = 0;
static int resource_shortage = 0;
+
static fd_queue_t *worker_queue;
static fd_queue_info_t *worker_queue_info;
-static apr_thread_mutex_t *timeout_mutex;
+static int num_listensocks = 0;
+static int listener_is_wakeable = 0; /* Pollset supports APR_POLLSET_WAKEABLE */
+static apr_pollfd_t *listener_pollfd;
module AP_MODULE_DECLARE_DATA mpm_event_module;
@@ -205,7 +221,8 @@ module AP_MODULE_DECLARE_DATA mpm_event_module;
struct event_srv_cfg_s;
typedef struct event_srv_cfg_s event_srv_cfg;
-static apr_pollfd_t *listener_pollfd;
+struct timeout_queue;
+static apr_thread_mutex_t *timeout_mutex;
/*
* The pollset for sockets that are in any of the timeout queues. Currently
@@ -219,18 +236,13 @@ static apr_pollfd_t *listener_pollfd;
static apr_pollset_t *event_pollset;
typedef struct event_conn_state_t event_conn_state_t;
-
-/*
- * The chain of connections to be shutdown by a worker thread (deferred),
- * linked list updated atomically.
- */
-static event_conn_state_t *volatile defer_linger_chain;
-
struct event_conn_state_t {
/** APR_RING of expiration timeouts */
APR_RING_ENTRY(event_conn_state_t) timeout_list;
- /** the time when the entry was queued */
- apr_time_t queue_timestamp;
+ /** public parts of the connection state */
+ conn_state_t pub;
+ /** memory pool allocated on and to allocate from (ptrans) */
+ apr_pool_t *p;
/** connection record this struct refers to */
conn_rec *c;
/** request record (if any) this struct refers to */
@@ -239,37 +251,101 @@ struct event_conn_state_t {
event_srv_cfg *sc;
/** scoreboard handle for the conn_rec */
ap_sb_handle_t *sbh;
- /** is the current conn_rec suspended? (disassociated with
- * a particular MPM thread; for suspend_/resume_connection
- * hooks)
- */
- int suspended;
- /** memory pool to allocate from */
- apr_pool_t *p;
/** bucket allocator */
apr_bucket_alloc_t *bucket_alloc;
+
+ /*
+ * when queued to the listener
+ */
/** poll file descriptor information */
apr_pollfd_t pfd;
- /** public parts of the connection state */
- conn_state_t pub;
+ /** the time when the entry was queued */
+ apr_time_t queue_timestamp;
+ /** the timeout queue for this entry */
+ struct timeout_queue *q;
+
+ /*
+ * when queued to workers
+ */
/** chaining in defer_linger_chain */
struct event_conn_state_t *chain;
- unsigned int
+
+ /*
+ * bools as bits
+ */
+ unsigned int
+ /** Is the current conn_rec suspended? (disassociated with
+ * a particular MPM thread; for suspend_/resume_connection
+ * hooks)
+ */
+ suspended :1,
/** Is lingering close from defer_lingering_close()? */
deferred_linger :1,
/** Has ap_start_lingering_close() been called? */
linger_started :1;
};
-APR_RING_HEAD(timeout_head_t, event_conn_state_t);
+static APR_INLINE apr_socket_t *cs_sd(event_conn_state_t *cs)
+{
+ ap_assert(cs != NULL);
+ return cs->pfd.desc.s;
+}
+static APR_INLINE int cs_fd(event_conn_state_t *cs)
+{
+ apr_os_sock_t fd = -1;
+ apr_os_sock_get(&fd, cs_sd(cs));
+ return fd;
+}
+static APR_INLINE apr_sockaddr_t *cs_raddr(event_conn_state_t *cs)
+{
+ apr_sockaddr_t *addr = NULL;
+ apr_socket_addr_get(&addr, APR_REMOTE, cs_sd(cs));
+ return addr;
+}
+static APR_INLINE const char *cs_state_str(event_conn_state_t *cs)
+{
+ switch (cs->pub.state) {
+ case CONN_STATE_PROCESSING:
+ return "STATE_PROCESSING";
+ case CONN_STATE_HANDLER:
+ return "STATE_HANDLER";
+ case CONN_STATE_ASYNC_WAITIO:
+ return "STATE_ASYNC_WAITIO";
+ case CONN_STATE_WRITE_COMPLETION:
+ return "STATE_WRITE_COMPLETION";
+ case CONN_STATE_KEEPALIVE:
+ return "STATE_KEEPALIVE";
+ case CONN_STATE_LINGER:
+ case CONN_STATE_LINGER_NORMAL:
+ case CONN_STATE_LINGER_SHORT:
+ return "STATE_LINGER";
+ case CONN_STATE_SUSPENDED:
+ return "STATE_SUSPENDED";
+ default:
+ return "STATE_UNKNOWN";
+ }
+}
+#define CS_FMT "pp:%s:%i"
+#define CS_ARG(cs) (cs), cs_state_str(cs), cs_fd(cs)
+#define CS_FMT_TO CS_FMT " to [%pI]"
+#define CS_ARG_TO(cs) CS_ARG(cs), cs_raddr(cs)
+/*
+ * The chain of connections to be shutdown by a worker thread (deferred),
+ * linked list updated atomically.
+ */
+static event_conn_state_t *volatile defer_linger_chain;
+
+APR_RING_HEAD(timeout_head_t, event_conn_state_t);
struct timeout_queue {
struct timeout_head_t head;
apr_interval_time_t timeout;
apr_uint32_t count; /* for this queue */
apr_uint32_t *total; /* for all chained/related queues */
+ const char *name; /* for logging */
struct timeout_queue *next; /* chaining */
};
+
/*
* Several timeout queues that use different timeouts, so that we always can
* simply append to the end.
@@ -279,39 +355,38 @@ struct timeout_queue {
* linger_q uses MAX_SECS_TO_LINGER
* short_linger_q uses SECONDS_TO_LINGER
*/
-static struct timeout_queue *waitio_q,
- *write_completion_q,
- *keepalive_q,
- *linger_q,
- *short_linger_q;
-static volatile apr_time_t queues_next_expiry;
+static struct timeout_queue *waitio_q, /* wait for I/O to happen */
+ *write_completion_q, /* completion or user async poll */
+ *keepalive_q, /* in between requests */
+ *linger_q, /* lingering (read) before close */
+ *short_linger_q; /* lingering (read) before close (short timeout) */
-/* Prevent extra poll/wakeup calls for timeouts close in the future (queues
- * have the granularity of a second anyway).
- * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
- */
-#define TIMEOUT_FUDGE_FACTOR apr_time_from_msec(100)
+static volatile apr_time_t queues_next_expiry; /* next expiry time accross all queues */
/*
* Macros for accessing struct timeout_queue.
* For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
*/
-static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
+static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *cs)
{
apr_time_t elem_expiry;
apr_time_t next_expiry;
- APR_RING_INSERT_TAIL(&q->head, el, event_conn_state_t, timeout_list);
+ ap_assert(q && !cs->q);
+
+ cs->q = q;
+ cs->queue_timestamp = apr_time_now();
+ APR_RING_INSERT_TAIL(&q->head, cs, event_conn_state_t, timeout_list);
++*q->total;
++q->count;
/* Cheaply update the global queues_next_expiry with the one of the
* first entry of this queue (oldest) if it expires before.
*/
- el = APR_RING_FIRST(&q->head);
- elem_expiry = el->queue_timestamp + q->timeout;
+ cs = APR_RING_FIRST(&q->head);
+ elem_expiry = cs->queue_timestamp + q->timeout;
next_expiry = queues_next_expiry;
- if (!next_expiry || next_expiry > elem_expiry + TIMEOUT_FUDGE_FACTOR) {
+ if (!next_expiry || next_expiry > elem_expiry + QUEUES_FUDGE_TIMEOUT) {
queues_next_expiry = elem_expiry;
/* Unblock the poll()ing listener for it to update its timeout. */
if (listener_is_wakeable) {
@@ -320,29 +395,51 @@ static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
}
}
-static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *el)
+static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *cs)
{
- APR_RING_REMOVE(el, timeout_list);
- APR_RING_ELEM_INIT(el, timeout_list);
+ ap_assert(q && cs->q == q);
+ cs->q = NULL;
+
+ APR_RING_REMOVE(cs, timeout_list);
+ APR_RING_ELEM_INIT(cs, timeout_list);
--*q->total;
--q->count;
}
-static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p, apr_time_t t,
+static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p,
+ const char *name,
+ apr_interval_time_t t,
struct timeout_queue *ref)
{
struct timeout_queue *q;
-
+
q = apr_pcalloc(p, sizeof *q);
APR_RING_INIT(&q->head, event_conn_state_t, timeout_list);
q->total = (ref) ? ref->total : apr_pcalloc(p, sizeof *q->total);
q->timeout = t;
+ q->name = name;
return q;
}
-#define TO_QUEUE_ELEM_INIT(el) \
- APR_RING_ELEM_INIT((el), timeout_list)
+static struct timeout_queue *TO_QUEUE_CHAIN(apr_pool_t *p,
+ const char *name,
+ apr_interval_time_t t,
+ struct timeout_queue **ref,
+ apr_hash_t *ht, apr_pool_t *hp)
+{
+ struct timeout_queue *q = apr_hash_get(ht, &t, sizeof t);
+
+ if (!q) {
+ q = TO_QUEUE_MAKE(p, name, t, *ref);
+ q->next = *ref;
+ *ref = q;
+
+ apr_hash_set(ht, apr_pmemdup(hp, &t, sizeof t), sizeof t, q);
+ }
+
+ return q;
+}
#if HAVE_SERF
typedef struct {
@@ -454,6 +551,7 @@ static event_retained_data *retained;
#endif
struct event_srv_cfg_s {
+ /* Per server timeout queues */
struct timeout_queue *io_q,
*wc_q,
*ka_q;
@@ -512,37 +610,59 @@ static void disable_listensocks(void)
if (apr_atomic_cas32(&listensocks_disabled, 1, 0) != 0) {
return;
}
- if (event_pollset) {
- for (i = 0; i < num_listensocks; i++) {
- apr_pollset_remove(event_pollset, &listener_pollfd[i]);
- }
- }
+
+ ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(10381)
+ "Suspend listening sockets: idlers:%i conns:%u "
+ "waitio:%u write:%u keepalive:%u linger:%u/%u "
+ "suspended:%u clogged:%u",
+ ap_queue_info_num_idlers(worker_queue_info),
+ apr_atomic_read32(&connection_count),
+ apr_atomic_read32(waitio_q->total),
+ apr_atomic_read32(write_completion_q->total),
+ apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(linger_q->total),
+ apr_atomic_read32(short_linger_q->total),
+ apr_atomic_read32(&suspended_count),
+ apr_atomic_read32(&clogged_count));
+
ap_scoreboard_image->parent[ap_child_slot].not_accepting = 1;
+
+ for (i = 0; i < num_listensocks; i++) {
+ apr_pollset_remove(event_pollset, &listener_pollfd[i]);
+ }
}
static void enable_listensocks(void)
{
int i;
if (listener_may_exit
- || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
+ || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
return;
}
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
- "Accepting new connections again: "
- "%u active conns (%u lingering/%u clogged/%u suspended), "
- "%u idle workers",
+
+ ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(00457)
+ "Resume listening sockets: idlers:%i conns:%u "
+ "waitio:%u write:%u keepalive:%u linger:%u/%u "
+ "suspended:%u clogged:%u",
+ ap_queue_info_num_idlers(worker_queue_info),
apr_atomic_read32(&connection_count),
- apr_atomic_read32(&lingering_count),
- apr_atomic_read32(&clogged_count),
+ apr_atomic_read32(waitio_q->total),
+ apr_atomic_read32(write_completion_q->total),
+ apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(linger_q->total),
+ apr_atomic_read32(short_linger_q->total),
apr_atomic_read32(&suspended_count),
- ap_queue_info_num_idlers(worker_queue_info));
- for (i = 0; i < num_listensocks; i++)
- apr_pollset_add(event_pollset, &listener_pollfd[i]);
+ apr_atomic_read32(&clogged_count));
+
/*
* XXX: This is not yet optimal. If many workers suddenly become available,
* XXX: the parent may kill some processes off too soon.
*/
ap_scoreboard_image->parent[ap_child_slot].not_accepting = 0;
+
+ for (i = 0; i < num_listensocks; i++) {
+ apr_pollset_add(event_pollset, &listener_pollfd[i]);
+ }
}
static APR_INLINE apr_uint32_t listeners_disabled(void)
@@ -575,21 +695,23 @@ static APR_INLINE int should_enable_listensocks(void)
return !dying && listeners_disabled() && !connections_above_limit(NULL);
}
-static void close_socket_nonblocking_(apr_socket_t *csd,
- const char *from, int line)
+static void close_socket_at(apr_socket_t *csd,
+ const char *at, int line)
{
- apr_status_t rv;
apr_os_sock_t fd = -1;
+ apr_status_t rv = apr_os_sock_get(&fd, csd);
/* close_worker_sockets() may have closed it already */
- rv = apr_os_sock_get(&fd, csd);
- ap_log_error(APLOG_MARK, APLOG_TRACE8, 0, ap_server_conf,
- "closing socket %i/%pp from %s:%i", (int)fd, csd, from, line);
if (rv == APR_SUCCESS && fd == -1) {
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "dead socket %pp at %s:%i", csd, at, line);
return;
}
- apr_socket_timeout_set(csd, 0);
+ ap_log_error(APLOG_MARK, APLOG_TRACE7, rv, ap_server_conf,
+ "closing socket %pp:%i at %s:%i", csd, (int)fd, at, line);
+
+ apr_socket_opt_set(csd, APR_SO_NONBLOCK, 1);
rv = apr_socket_close(csd);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468)
@@ -597,8 +719,8 @@ static void close_socket_nonblocking_(apr_socket_t *csd,
AP_DEBUG_ASSERT(0);
}
}
-#define close_socket_nonblocking(csd) \
- close_socket_nonblocking_(csd, __FUNCTION__, __LINE__)
+#define close_socket(csd) \
+ close_socket_at(csd, __FUNCTION__, __LINE__)
static void close_worker_sockets(void)
{
@@ -607,15 +729,16 @@ static void close_worker_sockets(void)
apr_socket_t *csd = worker_sockets[i];
if (csd) {
worker_sockets[i] = NULL;
- close_socket_nonblocking(csd);
+ close_socket(csd);
}
}
}
-static void wakeup_listener(void)
+static void shutdown_listener(void)
{
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
- "wake up listener%s", listener_may_exit ? " again" : "");
+ "shutting down listener%s",
+ listener_may_exit ? " again" : "");
listener_may_exit = 1;
disable_listensocks();
@@ -667,7 +790,7 @@ static void signal_threads(int mode)
/* in case we weren't called from the listener thread, wake up the
* listener thread
*/
- wakeup_listener();
+ shutdown_listener();
/* for ungraceful termination, let the workers exit now;
* for graceful termination, the listener thread will notify the
@@ -841,8 +964,10 @@ static apr_status_t decrement_connection_count(void *cs_)
{
int is_last_connection;
event_conn_state_t *cs = cs_;
- ap_log_cerror(APLOG_MARK, APLOG_TRACE8, 0, cs->c,
- "cleanup connection from state %i", (int)cs->pub.state);
+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
+ "connection %" CS_FMT_TO " cleaned up",
+ CS_ARG_TO(cs));
+
switch (cs->pub.state) {
case CONN_STATE_LINGER:
case CONN_STATE_LINGER_NORMAL:
@@ -861,8 +986,8 @@ static apr_status_t decrement_connection_count(void *cs_)
*/
is_last_connection = !apr_atomic_dec32(&connection_count);
if (listener_is_wakeable
- && ((is_last_connection && listener_may_exit)
- || should_enable_listensocks())) {
+ && ((is_last_connection && listener_may_exit)
+ || should_enable_listensocks())) {
apr_pollset_wakeup(event_pollset);
}
if (dying) {
@@ -895,7 +1020,7 @@ static void notify_resume(event_conn_state_t *cs, int cleanup)
static int defer_lingering_close(event_conn_state_t *cs)
{
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "deferring close from state %i", (int)cs->pub.state);
+ "deferring close for connection %" CS_FMT, CS_ARG(cs));
/* The connection is not shutdown() yet strictly speaking, but it's not
* in any queue nor handled by a worker either (will be very soon), so
@@ -922,14 +1047,28 @@ static int defer_lingering_close(event_conn_state_t *cs)
* Pre-condition: nonblocking, can be called from anywhere provided cs is not
* in any timeout queue or in the pollset.
*/
-static void close_connection(event_conn_state_t *cs)
+static void close_connection_at(event_conn_state_t *cs,
+ const char *at, int line)
{
- ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "closing connection from state %i", (int)cs->pub.state);
+ if (cs->c) {
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "closing connection %" CS_FMT " at %s:%i",
+ CS_ARG(cs), at, line);
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
+ "closing connection %" CS_FMT_TO " at %s:%i",
+ CS_ARG_TO(cs), at, line);
+ }
- close_socket_nonblocking(cs->pfd.desc.s);
+ close_socket_at(cs_sd(cs), at, line);
ap_queue_info_push_pool(worker_queue_info, cs->p);
}
+#define close_connection(cs) \
+ close_connection_at((cs), __FUNCTION__, __LINE__)
+
+/* forward declare */
+static void set_conn_state_sense(event_conn_state_t *cs, int sense);
/* Shutdown the connection in case of timeout, error or resources shortage.
* This starts short lingering close if not already there, or directly closes
@@ -1015,11 +1154,145 @@ static int event_post_read_request(request_rec *r)
return OK;
}
+static int pollset_add_at(event_conn_state_t *cs, int sense,
+ struct timeout_queue *q,
+ const char *at, int line)
+{
+ apr_status_t rv;
+
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE7, 0, cs->c,
+ "pollset: add %s=%" APR_TIME_T_FMT " events=%x"
+ " for connection %" CS_FMT " at %s:%i",
+ (q) ? "q" : "t",
+ (q) ? q->timeout : -1,
+ (int)cs->pfd.reqevents,
+ CS_ARG(cs), at, line);
+
+ ap_assert(cs->q == NULL && q != NULL);
+
+ set_conn_state_sense(cs, sense);
+
+ if (q) {
+ apr_thread_mutex_lock(timeout_mutex);
+ TO_QUEUE_APPEND(q, cs);
+ }
+ rv = apr_pollset_add(event_pollset, &cs->pfd);
+ if (rv != APR_SUCCESS) {
+ if (q) {
+ TO_QUEUE_REMOVE(q, cs);
+ apr_thread_mutex_unlock(timeout_mutex);
+ }
+
+ /* close_worker_sockets() may have closed it already */
+ if (workers_may_exit) {
+ AP_DEBUG_ASSERT(APR_STATUS_IS_EBADF(rv));
+ }
+ else {
+ ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(03093)
+ "pollset add failed for connection %" CS_FMT " at %s:%i",
+ CS_ARG(cs), at, line);
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
+ }
+ return 0;
+ }
+ if (q) {
+ apr_thread_mutex_unlock(timeout_mutex);
+ }
+ return 1;
+}
+#define pollset_add(cs, sense, q) \
+ pollset_add_at((cs), (sense), (q), __FUNCTION__, __LINE__)
+
+static int pollset_del_at(event_conn_state_t *cs, int locked,
+ const char *at, int line)
+{
+ apr_status_t rv;
+
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE7, 0, cs->c,
+ "pollset: del %s=%" APR_TIME_T_FMT " events=%x"
+ " for connection %" CS_FMT " at %s:%i",
+ (cs->q) ? "q" : "t",
+ (cs->q) ? cs->q->timeout : -1,
+ (int)cs->pfd.reqevents,
+ CS_ARG(cs), at, line);
+
+ ap_assert(cs->q != NULL);
+
+ if (cs->q) {
+ if (!locked) {
+ apr_thread_mutex_lock(timeout_mutex);
+ }
+ TO_QUEUE_REMOVE(cs->q, cs);
+ if (!locked) {
+ apr_thread_mutex_unlock(timeout_mutex);
+ }
+ }
+
+ /*
+ * Some of the pollset backends, like KQueue or Epoll
+ * automagically remove the FD if the socket is closed,
+ * therefore, we can accept _SUCCESS or _NOTFOUND,
+ * and we still want to keep going
+ */
+ rv = apr_pollset_remove(event_pollset, &cs->pfd);
+ if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
+ ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(03094)
+ "pollset remove failed for connection %" CS_FMT " at %s:%i",
+ CS_ARG(cs), at, line);
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
+ return 0;
+ }
+
+ return 1;
+}
+#define pollset_del(cs, locked) \
+ pollset_del_at((cs), (locked), __FUNCTION__, __LINE__)
+
/* Forward declare */
static void process_lingering_close(event_conn_state_t *cs);
-static void update_reqevents_from_sense(event_conn_state_t *cs,
- int default_sense)
+static event_conn_state_t *make_conn_state(apr_pool_t *p, apr_socket_t *csd)
+{
+ event_conn_state_t *cs = apr_pcalloc(p, sizeof(*cs));
+ listener_poll_type *pt;
+
+ cs->p = p;
+ cs->pfd.desc.s = csd;
+ cs->pfd.desc_type = APR_POLL_SOCKET;
+ cs->pfd.client_data = pt = apr_pcalloc(p, sizeof(*pt));
+ pt->type = PT_CSD;
+ pt->baton = cs;
+
+ APR_RING_ELEM_INIT(cs, timeout_list);
+
+ cs->sc = ap_get_module_config(ap_server_conf->module_config,
+ &mpm_event_module);
+
+ /**
+ * XXX If the platform does not have a usable way of bundling
+ * accept() with a socket readability check, like Win32,
+ * and there are measurable delays before the
+ * socket is readable due to the first data packet arriving,
+ * it might be better to create the cs on the listener thread
+ * with the state set to CONN_STATE_KEEPALIVE
+ *
+ * FreeBSD users will want to enable the HTTP accept filter
+ * module in their kernel for the highest performance
+ * When the accept filter is active, sockets are kept in the
+ * kernel until a HTTP request is received.
+ */
+ cs->pub.state = CONN_STATE_PROCESSING;
+ cs->pub.sense = CONN_SENSE_DEFAULT;
+
+ apr_atomic_inc32(&connection_count);
+ apr_pool_cleanup_register(p, cs, decrement_connection_count,
+ apr_pool_cleanup_null);
+ return cs;
+}
+
+static void set_conn_state_sense(event_conn_state_t *cs, int default_sense)
{
int sense = default_sense;
@@ -1046,80 +1319,51 @@ static void update_reqevents_from_sense(event_conn_state_t *cs,
/*
* process one connection in the worker
*/
-static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
- event_conn_state_t * cs, int my_child_num,
- int my_thread_num)
+static void process_socket(apr_thread_t *thd, apr_pool_t *p,
+ apr_socket_t *sock, event_conn_state_t *cs,
+ int my_child_num, int my_thread_num)
{
- conn_rec *c;
+ conn_rec *c = cs->c;
long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
- int clogging = 0, from_wc_q = 0;
- apr_status_t rv;
- int rc = OK;
+ int rc = OK, processed = 0, clogging;
- if (cs == NULL) { /* This is a new connection */
- listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
- cs = apr_pcalloc(p, sizeof(event_conn_state_t));
+ if (!c) { /* This is a new connection */
cs->bucket_alloc = apr_bucket_alloc_create(p);
ap_create_sb_handle(&cs->sbh, p, my_child_num, my_thread_num);
- c = ap_run_create_connection(p, ap_server_conf, sock,
- conn_id, cs->sbh, cs->bucket_alloc);
+ cs->c = c = ap_run_create_connection(p, ap_server_conf, sock, conn_id,
+ cs->sbh, cs->bucket_alloc);
if (!c) {
ap_queue_info_push_pool(worker_queue_info, p);
return;
}
- apr_atomic_inc32(&connection_count);
- apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
- apr_pool_cleanup_null);
+ apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
ap_set_module_config(c->conn_config, &mpm_event_module, cs);
c->current_thread = thd;
c->cs = &cs->pub;
- cs->c = c;
- cs->p = p;
- cs->sc = ap_get_module_config(ap_server_conf->module_config,
- &mpm_event_module);
- cs->pfd.desc_type = APR_POLL_SOCKET;
- cs->pfd.desc.s = sock;
- pt->type = PT_CSD;
- pt->baton = cs;
- cs->pfd.client_data = pt;
- apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
- TO_QUEUE_ELEM_INIT(cs);
ap_update_vhost_given_ip(c);
-
rc = ap_pre_connection(c, sock);
if (rc != OK && rc != DONE) {
ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
- "process_socket: connection aborted");
+ "process_socket: connection aborted (%d)", rc);
close_connection(cs);
return;
}
- /**
- * XXX If the platform does not have a usable way of bundling
- * accept() with a socket readability check, like Win32,
- * and there are measurable delays before the
- * socket is readable due to the first data packet arriving,
- * it might be better to create the cs on the listener thread
- * with the state set to CONN_STATE_KEEPALIVE
- *
- * FreeBSD users will want to enable the HTTP accept filter
- * module in their kernel for the highest performance
- * When the accept filter is active, sockets are kept in the
- * kernel until a HTTP request is received.
- */
- cs->pub.state = CONN_STATE_PROCESSING;
cs->pub.sense = CONN_SENSE_DEFAULT;
}
- else {
+ else { /* The connection is scheduled back */
c = cs->c;
+ c->current_thread = thd;
+ c->id = conn_id; /* thread number is part of ID */
ap_update_sb_handle(cs->sbh, my_child_num, my_thread_num);
notify_resume(cs, 0);
- c->current_thread = thd;
- /* Subsequent request on a conn, and thread number is part of ID */
- c->id = conn_id;
}
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "processing connection %" CS_FMT " (aborted %d, clogging %d)",
+ CS_ARG(cs), c->aborted, c->clogging_input_filters);
+
if (CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
goto lingering_close;
}
@@ -1133,8 +1377,8 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
*/
|| c->clogging_input_filters) {
process_connection:
+ processed = 1;
cs->pub.state = CONN_STATE_PROCESSING;
-
clogging = c->clogging_input_filters;
if (clogging) {
apr_atomic_inc32(&clogged_count);
@@ -1197,40 +1441,24 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
goto lingering_close;
}
}
- else if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
- from_wc_q = 1;
- }
if (cs->pub.state == CONN_STATE_ASYNC_WAITIO) {
/* Set a read/write timeout for this connection, and let the
* event thread poll for read/writeability.
*/
- cs->queue_timestamp = apr_time_now();
- notify_suspend(cs);
-
ap_update_child_status(cs->sbh, SERVER_BUSY_READ, NULL);
+ notify_suspend(cs);
/* Modules might set c->cs->sense to CONN_SENSE_WANT_WRITE,
* the default is CONN_SENSE_WANT_READ still.
*/
- update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_APPEND(cs->sc->io_q, cs);
- rv = apr_pollset_add(event_pollset, &cs->pfd);
- if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
- AP_DEBUG_ASSERT(0);
- TO_QUEUE_REMOVE(cs->sc->io_q, cs);
- apr_thread_mutex_unlock(timeout_mutex);
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(10503)
- "process_socket: apr_pollset_add failure in "
- "CONN_STATE_ASYNC_WAITIO");
- close_connection(cs);
- signal_threads(ST_GRACEFUL);
- }
- else {
- apr_thread_mutex_unlock(timeout_mutex);
+ if (pollset_add(cs, CONN_SENSE_WANT_READ, cs->sc->io_q)) {
+ apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ cs->pub.state = CONN_STATE_LINGER;
+ goto lingering_close;
}
- return;
+
+ return; /* queued */
}
if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
@@ -1239,11 +1467,9 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
/* Flush all pending outputs before going to CONN_STATE_KEEPALIVE or
* straight to CONN_STATE_PROCESSING if inputs are pending already.
*/
-
ap_update_child_status(cs->sbh, SERVER_BUSY_WRITE, NULL);
- if (from_wc_q) {
- from_wc_q = 0; /* one shot */
+ if (!processed) {
pending = ap_check_output_pending(c);
}
else if (ap_filter_should_yield(c->output_filters)) {
@@ -1251,38 +1477,24 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
}
if (pending == AGAIN) {
/* Let the event thread poll for write */
- cs->queue_timestamp = apr_time_now();
notify_suspend(cs);
-
- /* Add work to pollset. */
cs->pub.sense = CONN_SENSE_DEFAULT;
- update_reqevents_from_sense(cs, CONN_SENSE_WANT_WRITE);
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_APPEND(cs->sc->wc_q, cs);
- rv = apr_pollset_add(event_pollset, &cs->pfd);
- if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
- AP_DEBUG_ASSERT(0);
- TO_QUEUE_REMOVE(cs->sc->wc_q, cs);
- apr_thread_mutex_unlock(timeout_mutex);
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03465)
- "process_socket: apr_pollset_add failure in "
- "CONN_STATE_WRITE_COMPLETION");
- close_connection(cs);
- signal_threads(ST_GRACEFUL);
- }
- else {
- apr_thread_mutex_unlock(timeout_mutex);
+ if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q)) {
+ return; /* queued */
}
- return;
+ /* Fall through lingering close */
+ apr_table_setn(cs->c->notes, "short-lingering-close", "1");
}
- if (pending != OK || c->aborted || c->keepalive != AP_CONN_KEEPALIVE) {
- cs->pub.state = CONN_STATE_LINGER;
- goto lingering_close;
- }
- if (ap_check_input_pending(c) == AGAIN) {
- goto process_connection;
+ else if (pending == OK) {
+ /* Some data to process immediately? */
+ pending = (c->keepalive == AP_CONN_KEEPALIVE
+ ? ap_check_input_pending(c)
+ : DONE);
+ if (pending == AGAIN) {
+ goto process_connection;
+ }
}
- if (listener_may_exit) {
+ if (pending != OK || listener_may_exit) {
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
@@ -1302,40 +1514,25 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
* timeout today. With a normal client, the socket will be readable in
* a few milliseconds anyway.
*/
- cs->queue_timestamp = apr_time_now();
notify_suspend(cs);
- /* Add work to pollset. */
- cs->pub.sense = CONN_SENSE_DEFAULT;
- update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_APPEND(cs->sc->ka_q, cs);
- rv = apr_pollset_add(event_pollset, &cs->pfd);
- if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
- AP_DEBUG_ASSERT(0);
- TO_QUEUE_REMOVE(cs->sc->ka_q, cs);
- apr_thread_mutex_unlock(timeout_mutex);
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03093)
- "process_socket: apr_pollset_add failure for "
- "keep alive");
- close_connection(cs);
- signal_threads(ST_GRACEFUL);
- }
- else {
- apr_thread_mutex_unlock(timeout_mutex);
+ if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->sc->ka_q)) {
+ apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ cs->pub.state = CONN_STATE_LINGER;
+ goto lingering_close;
}
- return;
+
+ return; /* queued */
}
if (cs->pub.state == CONN_STATE_SUSPENDED) {
cs->c->suspended_baton = cs;
apr_atomic_inc32(&suspended_count);
notify_suspend(cs);
- return;
+ return; /* done */
}
lingering_close:
- /* CONN_STATE_LINGER[_*] fall through process_lingering_close() */
process_lingering_close(cs);
}
@@ -1347,31 +1544,29 @@ static apr_status_t event_resume_suspended (conn_rec *c)
ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02615)
"event_resume_suspended: suspended_baton is NULL");
return APR_EGENERAL;
- } else if (!cs->suspended) {
+ }
+ if (!cs->suspended) {
ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02616)
"event_resume_suspended: Thread isn't suspended");
return APR_EGENERAL;
}
+
apr_atomic_dec32(&suspended_count);
c->suspended_baton = NULL;
+ cs->pub.sense = CONN_SENSE_DEFAULT;
if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
- cs->queue_timestamp = apr_time_now();
- notify_suspend(cs);
-
- cs->pub.sense = CONN_SENSE_DEFAULT;
cs->pub.state = CONN_STATE_WRITE_COMPLETION;
- update_reqevents_from_sense(cs, CONN_SENSE_WANT_WRITE);
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_APPEND(cs->sc->wc_q, cs);
- apr_pollset_add(event_pollset, &cs->pfd);
- apr_thread_mutex_unlock(timeout_mutex);
- }
- else {
- process_lingering_close(cs);
- }
+ if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q)) {
+ return APR_SUCCESS; /* queued */
+ }
- return OK;
+ /* fall through lingering close on error */
+ apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ }
+ cs->pub.state = CONN_STATE_LINGER;
+ process_lingering_close(cs);
+ return APR_SUCCESS;
}
/* conns_this_child has gone to zero or below. See if the admin coded
@@ -1388,36 +1583,31 @@ static void check_infinite_requests(void)
conns_this_child = APR_INT32_MAX;
}
-static int close_listeners(int *closed)
+static void set_child_dying(void)
{
- if (!*closed) {
- int i;
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, "quiescing");
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
- "closing listeners (connection_count=%u)",
- apr_atomic_read32(&connection_count));
- ap_close_listeners_ex(my_bucket->listeners);
+ dying = 1;
+ ap_scoreboard_image->parent[ap_child_slot].quiescing = 1;
+ ap_close_listeners_ex(my_bucket->listeners);
- dying = 1;
- ap_scoreboard_image->parent[ap_child_slot].quiescing = 1;
+#if 0
+ {
+ int i;
for (i = 0; i < threads_per_child; ++i) {
ap_update_child_status_from_indexes(ap_child_slot, i,
SERVER_GRACEFUL, NULL);
}
- /* wake up the main thread */
- kill(ap_my_pid, SIGTERM);
-
- ap_queue_info_free_idle_pools(worker_queue_info);
- ap_queue_interrupt_all(worker_queue);
-
- *closed = 1; /* once */
- return 1;
}
+#endif
- ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
- "closed listeners (connection_count=%u)",
- apr_atomic_read32(&connection_count));
- return 0;
+ /* wake up idle worker threads */
+ ap_queue_interrupt_all(worker_queue);
+ /* wake up the main thread */
+ kill(ap_my_pid, SIGTERM);
+
+ /* No new connections will use the idle pools */
+ ap_queue_info_free_idle_pools(worker_queue_info);
}
static void unblock_signal(int sig)
@@ -1500,9 +1690,10 @@ static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
apr_status_t rc;
if (cs) {
- csd = cs->pfd.desc.s;
ptrans = cs->p;
+ csd = cs_sd(cs);
}
+
rc = ap_queue_push_socket(worker_queue, csd, cs, ptrans);
if (rc != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
@@ -1515,7 +1706,7 @@ static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
}
else {
if (csd) {
- close_socket_nonblocking(csd);
+ close_socket(csd);
}
if (ptrans) {
ap_queue_info_push_pool(worker_queue_info, ptrans);
@@ -1572,43 +1763,30 @@ static timer_event_t timer_free_ring;
static apr_skiplist *timer_skiplist;
static volatile apr_time_t timers_next_expiry;
-/* Same goal as for TIMEOUT_FUDGE_FACTOR (avoid extra poll calls), but applied
- * to timers. Since their timeouts are custom (user defined), we can't be too
- * approximative here (hence using 0.01s).
- */
-#define EVENT_FUDGE_FACTOR apr_time_from_msec(10)
-
-/* The following compare function is used by apr_skiplist_insert() to keep the
- * elements (timers) sorted and provide O(log n) complexity (this is also true
- * for apr_skiplist_{find,remove}(), but those are not used in MPM event where
- * inserted timers are not searched nor removed, but with apr_skiplist_pop()
- * which does use any compare function). It is meant to return 0 when a == b,
- * <0 when a < b, and >0 when a > b. However apr_skiplist_insert() will not
- * add duplicates (i.e. a == b), and apr_skiplist_add() is only available in
- * APR 1.6, yet multiple timers could possibly be created in the same micro-
- * second (duplicates with regard to apr_time_t); therefore we implement the
- * compare function to return +1 instead of 0 when compared timers are equal,
- * thus duplicates are still added after each other (in order of insertion).
+/* The timer_comp() function is used by apr_skiplist_insert() to keep the
+ * elements/timers sorted, but it should never return 0 because inserting
+ * duplicates is not possible (apr_skiplist_add() would allow this but it's
+ * not available before APR 1.6). Thus duplicates are sorted by order of
+ * insertion and timers are never equal for the skiplist (not an issue
+ * because MPM event does not use apr_skiplist_{find,remove}() but
+ * apr_skiplist_pop() only).
*/
static int timer_comp(void *a, void *b)
{
- apr_time_t t1 = (apr_time_t) ((timer_event_t *)a)->when;
- apr_time_t t2 = (apr_time_t) ((timer_event_t *)b)->when;
- AP_DEBUG_ASSERT(t1);
- AP_DEBUG_ASSERT(t2);
- return ((t1 < t2) ? -1 : 1);
+ const timer_event_t *ta = a, *tb = b;
+ return (ta->when < tb->when) ? -1 : 1;
}
static apr_thread_mutex_t *g_timer_skiplist_mtx;
-static timer_event_t * event_get_timer_event(apr_time_t t,
- ap_mpm_callback_fn_t *cbfn,
- void *baton,
- int insert,
- apr_array_header_t *pfds)
+static timer_event_t *get_timer_event(apr_time_t timeout,
+ ap_mpm_callback_fn_t *cbfn,
+ void *baton,
+ int insert,
+ apr_array_header_t *pfds)
{
timer_event_t *te;
- apr_time_t now = (t < 0) ? 0 : apr_time_now();
+ apr_time_t now = (timeout < 0) ? 0 : apr_time_now();
/* oh yeah, and make locking smarter/fine grained. */
@@ -1620,16 +1798,16 @@ static timer_event_t * event_get_timer_event(apr_time_t t,
}
else {
te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
- APR_RING_ELEM_INIT(te, link);
+ memset(te, 0, sizeof(*te));
}
+ APR_RING_ELEM_INIT(te, link);
te->cbfunc = cbfn;
te->baton = baton;
- te->canceled = 0;
- te->when = now + t;
+ te->when = now + timeout;
te->pfds = pfds;
- if (insert) {
+ if (insert) {
apr_time_t next_expiry;
/* Okay, add sorted by when.. */
@@ -1639,33 +1817,51 @@ static timer_event_t * event_get_timer_event(apr_time_t t,
* if it expires before.
*/
next_expiry = timers_next_expiry;
- if (!next_expiry || next_expiry > te->when + EVENT_FUDGE_FACTOR) {
+ if (!next_expiry || next_expiry > te->when + TIMERS_FUDGE_TIMEOUT) {
timers_next_expiry = te->when;
- /* Unblock the poll()ing listener for it to update its timeout. */
+ /* Wake up the listener to eventually update its poll()ing timeout. */
if (listener_is_wakeable) {
apr_pollset_wakeup(event_pollset);
}
}
}
+
apr_thread_mutex_unlock(g_timer_skiplist_mtx);
return te;
}
-static apr_status_t event_register_timed_callback_ex(apr_time_t t,
+static void put_timer_event(timer_event_t *te, int locked)
+{
+ if (!locked) {
+ apr_thread_mutex_lock(g_timer_skiplist_mtx);
+ }
+
+ memset(te, 0, sizeof(*te));
+ APR_RING_INSERT_TAIL(&timer_free_ring.link, te, timer_event_t, link);
+
+ if (!locked) {
+ apr_thread_mutex_unlock(g_timer_skiplist_mtx);
+ }
+}
+
+static apr_status_t event_register_timed_callback_ex(apr_time_t timeout,
ap_mpm_callback_fn_t *cbfn,
- void *baton,
+ void *baton,
apr_array_header_t *pfds)
{
- event_get_timer_event(t, cbfn, baton, 1, pfds);
+ if (!cbfn) {
+ return APR_EINVAL;
+ }
+ get_timer_event(timeout, cbfn, baton, 1, pfds);
return APR_SUCCESS;
}
-static apr_status_t event_register_timed_callback(apr_time_t t,
+static apr_status_t event_register_timed_callback(apr_time_t timeout,
ap_mpm_callback_fn_t *cbfn,
void *baton)
{
- event_register_timed_callback_ex(t, cbfn, baton, NULL);
+ event_register_timed_callback_ex(timeout, cbfn, baton, NULL);
return APR_SUCCESS;
}
@@ -1687,6 +1883,10 @@ static apr_status_t event_cleanup_poll_callback(void *data)
}
}
+ if (final_rc) {
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
+ }
return final_rc;
}
@@ -1697,18 +1897,24 @@ static apr_status_t event_register_poll_callback_ex(apr_pool_t *p,
void *baton,
apr_time_t timeout)
{
- socket_callback_baton_t *scb = apr_pcalloc(p, sizeof(*scb));
- listener_poll_type *pt = apr_palloc(p, sizeof(*pt));
+ listener_poll_type *pt;
+ socket_callback_baton_t *scb;
apr_status_t rc, final_rc = APR_SUCCESS;
int i;
- pt->type = PT_USER;
- pt->baton = scb;
+ if (!cbfn || !tofn) {
+ return APR_EINVAL;
+ }
+ scb = apr_pcalloc(p, sizeof(*scb));
scb->cbfunc = cbfn;
scb->user_baton = baton;
scb->pfds = apr_array_copy(p, pfds);
+ pt = apr_palloc(p, sizeof(*pt));
+ pt->type = PT_USER;
+ pt->baton = scb;
+
apr_pool_pre_cleanup_register(p, scb->pfds, event_cleanup_poll_callback);
for (i = 0; i < scb->pfds->nelts; i++) {
@@ -1725,9 +1931,12 @@ static apr_status_t event_register_poll_callback_ex(apr_pool_t *p,
}
}
- if (timeout > 0) {
- /* XXX: This cancel timer event can fire before the pollset is updated */
- scb->cancel_event = event_get_timer_event(timeout, tofn, baton, 1, scb->pfds);
+ if (timeout > 0) {
+ /* Prevent the timer from firing before the pollset is updated */
+ if (timeout < TIMERS_FUDGE_TIMEOUT) {
+ timeout = TIMERS_FUDGE_TIMEOUT;
+ }
+ scb->cancel_event = get_timer_event(timeout, tofn, baton, 1, scb->pfds);
}
for (i = 0; i < scb->pfds->nelts; i++) {
apr_pollfd_t *pfd = (apr_pollfd_t *)scb->pfds->elts + i;
@@ -1766,14 +1975,13 @@ static apr_status_t event_register_poll_callback(apr_pool_t *p,
#define LINGERING_BUF_SIZE (32 * 1024)
static void process_lingering_close(event_conn_state_t *cs)
{
- apr_socket_t *csd = ap_get_conn_socket(cs->c);
char dummybuf[LINGERING_BUF_SIZE];
- apr_size_t nbytes;
+ apr_socket_t *csd = cs_sd(cs);
apr_status_t rv;
- struct timeout_queue *q;
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "lingering close from state %i", (int)cs->pub.state);
+ "lingering close for connection %" CS_FMT,
+ CS_ARG(cs));
AP_DEBUG_ASSERT(CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state));
if (!cs->linger_started) {
@@ -1791,7 +1999,9 @@ static void process_lingering_close(event_conn_state_t *cs)
close_connection(cs);
return;
}
-
+
+ notify_suspend(cs);
+
/* All nonblocking from now, no need for APR_INCOMPLETE_READ either */
apr_socket_timeout_set(csd, 0);
apr_socket_opt_set(csd, APR_INCOMPLETE_READ, 0);
@@ -1808,7 +2018,6 @@ static void process_lingering_close(event_conn_state_t *cs)
cs->pub.state = CONN_STATE_LINGER_NORMAL;
}
cs->pub.sense = CONN_SENSE_DEFAULT;
- notify_suspend(cs);
/* One timestamp/duration for the whole lingering close time.
* XXX: This makes the (short_)linger_q not sorted/ordered by expiring
@@ -1821,32 +2030,18 @@ static void process_lingering_close(event_conn_state_t *cs)
}
do {
- nbytes = sizeof(dummybuf);
+ apr_size_t nbytes = sizeof(dummybuf);
rv = apr_socket_recv(csd, dummybuf, &nbytes);
} while (rv == APR_SUCCESS);
-
- if (!APR_STATUS_IS_EAGAIN(rv)) {
- close_connection(cs);
- return;
- }
-
- /* (Re)queue the connection to come back when readable */
- update_reqevents_from_sense(cs, CONN_SENSE_WANT_READ);
- q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_APPEND(q, cs);
- rv = apr_pollset_add(event_pollset, &cs->pfd);
- if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
- AP_DEBUG_ASSERT(0);
- TO_QUEUE_REMOVE(q, cs);
- apr_thread_mutex_unlock(timeout_mutex);
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
- "process_lingering_close: apr_pollset_add failure");
- close_connection(cs);
- signal_threads(ST_GRACEFUL);
- return;
+ if (APR_STATUS_IS_EAGAIN(rv)) {
+ struct timeout_queue *q;
+ /* (Re)queue the connection to come back when readable */
+ q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
+ if (pollset_add(cs, CONN_SENSE_WANT_READ, q)) {
+ return; /* queued */
+ }
}
- apr_thread_mutex_unlock(timeout_mutex);
+ close_connection(cs);
}
/* call 'func' for all elements of 'q' above 'expiry'.
@@ -1860,7 +2055,6 @@ static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
event_conn_state_t *first, *cs, *last;
struct event_conn_state_t trash;
struct timeout_queue *qp;
- apr_status_t rv;
if (!*q->total) {
return;
@@ -1891,19 +2085,29 @@ static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
apr_time_t elem_expiry = cs->queue_timestamp + qp->timeout;
apr_time_t next_expiry = queues_next_expiry;
if (!next_expiry
- || next_expiry > elem_expiry + TIMEOUT_FUDGE_FACTOR) {
+ || next_expiry > elem_expiry + QUEUES_FUDGE_TIMEOUT) {
queues_next_expiry = elem_expiry;
}
break;
}
- last = cs;
- rv = apr_pollset_remove(event_pollset, &cs->pfd);
- if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
- AP_DEBUG_ASSERT(0);
- ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
- "apr_pollset_remove failed");
+ TO_QUEUE_REMOVE(qp, cs);
+ if (!pollset_del(cs, 1)) {
+ shutdown_connection(cs);
+ continue;
}
+
+ if (cs == first) {
+ APR_RING_INSERT_HEAD(&qp->head, cs, event_conn_state_t,
+ timeout_list);
+ }
+ else {
+ APR_RING_INSERT_AFTER(last, cs, timeout_list);
+ }
+ ++*qp->total;
+ ++qp->count;
+
+ last = cs;
cs = APR_RING_NEXT(cs, timeout_list);
count++;
}
@@ -1925,7 +2129,7 @@ static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
first = APR_RING_FIRST(&trash.timeout_list);
do {
cs = APR_RING_NEXT(first, timeout_list);
- TO_QUEUE_ELEM_INIT(first);
+ APR_RING_ELEM_INIT(cs, timeout_list);
func(first);
first = cs;
} while (--total);
@@ -1950,8 +2154,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
apr_status_t rc;
proc_info *ti = dummy;
int process_slot = ti->pslot;
- struct process_score *ps = ap_get_scoreboard_process(process_slot);
- int closed = 0;
+ process_score *ps = ap_get_scoreboard_process(process_slot);
int have_idle_worker = 0;
apr_time_t last_log;
@@ -1969,31 +2172,37 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
unblock_signal(LISTENER_SIGNAL);
for (;;) {
- timer_event_t *te;
- const apr_pollfd_t *out_pfd;
apr_int32_t num = 0;
- apr_interval_time_t timeout;
- socket_callback_baton_t *user_chain;
- apr_time_t now, expiry = -1;
+ apr_time_t next_expiry = -1;
+ apr_interval_time_t timeout = -1;
int workers_were_busy = 0;
+ socket_callback_baton_t *user_chain;
+ const apr_pollfd_t *out_pfd;
+ apr_time_t now;
+ event_conn_state_t *cs;
+ timer_event_t *te;
- if (conns_this_child <= 0)
+ if (conns_this_child <= 0) {
+ /* Gracefuly stop (eventually) and keep going */
check_infinite_requests();
+ }
if (listener_may_exit) {
- int first_close = close_listeners(&closed);
+ int once = !dying;
+ if (once) {
+ set_child_dying();
+ }
if (terminate_mode == ST_UNGRACEFUL
|| apr_atomic_read32(&connection_count) == 0)
break;
- /* Don't wait in poll() for the first close (i.e. dying now), we
- * want to maintain the queues and schedule defer_linger_chain ASAP
- * to kill kept-alive connection and shutdown the workers and child
- * faster.
- */
- if (first_close) {
- goto do_maintenance; /* with expiry == -1 */
+ if (once) {
+ /* Don't wait in poll() the first time (i.e. dying now), we
+ * want to maintain the queues ASAP to shutdown the workers
+ * and exit the child faster.
+ */
+ goto do_maintenance; /* with next_expiry == -1 */
}
}
@@ -2002,8 +2211,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* trace log status every second */
if (now - last_log > apr_time_from_sec(1)) {
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
- "connections: %u (waitio:%u write-completion:%u"
- "keep-alive:%u lingering:%u suspended:%u clogged:%u), "
+ "connections: %u (waitio:%u write:%u keepalive:%u "
+ "lingering:%u suspended:%u clogged:%u), "
"workers: %u/%u shutdown",
apr_atomic_read32(&connection_count),
apr_atomic_read32(waitio_q->total),
@@ -2034,11 +2243,11 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
now = apr_time_now();
timeout = -1;
- /* Push expired timers to a worker, the first remaining one determines
- * the maximum time to poll() below, if any.
+ /* Push expired timers to a worker, the first remaining one (if any)
+ * determines the maximum time to poll() below.
*/
- expiry = timers_next_expiry;
- if (expiry && expiry < now) {
+ next_expiry = timers_next_expiry;
+ if (next_expiry && next_expiry <= now) {
apr_thread_mutex_lock(g_timer_skiplist_mtx);
while ((te = apr_skiplist_peek(timer_skiplist))) {
if (te->when > now) {
@@ -2047,56 +2256,67 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
break;
}
apr_skiplist_pop(timer_skiplist, NULL);
- if (!te->canceled) {
- if (te->pfds) {
- /* remove all sockets from the pollset */
- apr_pool_cleanup_run(te->pfds->pool, te->pfds,
- event_cleanup_poll_callback);
- }
- push_timer2worker(te);
+
+ if (te->canceled) {
+ put_timer_event(te, 1);
+ continue;
}
- else {
- APR_RING_INSERT_TAIL(&timer_free_ring.link, te,
- timer_event_t, link);
+
+ if (te->pfds) {
+ /* remove all sockets from the pollset */
+ apr_pool_cleanup_run(te->pfds->pool, te->pfds,
+ event_cleanup_poll_callback);
}
+ push_timer2worker(te);
+ }
+ if (te) {
+ next_expiry = te->when;
}
- if (!te) {
- timers_next_expiry = 0;
+ else {
+ next_expiry = 0;
}
+ timers_next_expiry = next_expiry;
apr_thread_mutex_unlock(g_timer_skiplist_mtx);
}
+ if (next_expiry) {
+ timeout = next_expiry > now ? next_expiry - now : 0;
+ }
/* Same for queues, use their next expiry, if any. */
- expiry = queues_next_expiry;
- if (expiry
- && (timeout < 0
- || expiry <= now
- || timeout > expiry - now)) {
- timeout = expiry > now ? expiry - now : 0;
+ next_expiry = queues_next_expiry;
+ if (next_expiry && (timeout < 0 || next_expiry - now < timeout)) {
+ timeout = next_expiry > now ? next_expiry - now : 0;
}
/* When non-wakeable, don't wait more than 100 ms, in any case. */
-#define NON_WAKEABLE_POLL_TIMEOUT apr_time_from_msec(100)
- if (!listener_is_wakeable
- && (timeout < 0
- || timeout > NON_WAKEABLE_POLL_TIMEOUT)) {
- timeout = NON_WAKEABLE_POLL_TIMEOUT;
+ if (!listener_is_wakeable && (timeout < 0 || timeout > NON_WAKEABLE_TIMEOUT)) {
+ timeout = NON_WAKEABLE_TIMEOUT;
}
else if (timeout > 0) {
- /* apr_pollset_poll() might round down the timeout to milliseconds,
- * let's forcibly round up here to never return before the timeout.
+ /* apr_pollset_poll() might round down the timeout to
+ * milliseconds, let's forcibly round up here to never
+ * return before the timeout.
*/
timeout = apr_time_from_msec(
apr_time_as_msec(timeout + apr_time_from_msec(1) - 1)
);
}
+ /* Unpause listening sockets before poll()ing if possible */
+ if (should_enable_listensocks()) {
+ enable_listensocks();
+ }
+
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
- "polling with timeout=%" APR_TIME_T_FMT
+ "pollset: wait for timeout=%" APR_TIME_T_FMT
" queues_timeout=%" APR_TIME_T_FMT
- " timers_timeout=%" APR_TIME_T_FMT,
- timeout, queues_next_expiry - now,
- timers_next_expiry - now);
+ " timers_timeout=%" APR_TIME_T_FMT
+ " conns=%d exit=%d/%d",
+ timeout,
+ queues_next_expiry ? queues_next_expiry - now : -1,
+ timers_next_expiry ? timers_next_expiry - now : -1,
+ apr_atomic_read32(&connection_count),
+ listener_may_exit, dying);
rc = apr_pollset_poll(event_pollset, timeout, &num, &out_pfd);
if (rc != APR_SUCCESS) {
@@ -2105,59 +2325,55 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
APLOGNO(03267)
"apr_pollset_poll failed. Attempting to "
"shutdown process gracefully");
+ AP_DEBUG_ASSERT(0);
signal_threads(ST_GRACEFUL);
}
num = 0;
}
if (APLOGtrace7(ap_server_conf)) {
+ apr_time_t old_now = now;
now = apr_time_now();
+
ap_log_error(APLOG_MARK, APLOG_TRACE7, rc, ap_server_conf,
- "polled with num=%u exit=%d/%d conns=%d"
+ "pollset: have #%i time=%" APR_TIME_T_FMT "/%" APR_TIME_T_FMT
" queues_timeout=%" APR_TIME_T_FMT
- " timers_timeout=%" APR_TIME_T_FMT,
- num, listener_may_exit, dying,
+ " timers_timeout=%" APR_TIME_T_FMT
+ " conns=%d exit=%d/%d",
+ (int)num, now - old_now, timeout,
+ queues_next_expiry ? queues_next_expiry - now : -1,
+ timers_next_expiry ? timers_next_expiry - now : -1,
apr_atomic_read32(&connection_count),
- queues_next_expiry - now, timers_next_expiry - now);
+ listener_may_exit, dying);
}
/* XXX possible optimization: stash the current time for use as
* r->request_time for new requests or queues maintenance
*/
- for (user_chain = NULL; num; --num, ++out_pfd) {
- listener_poll_type *pt = (listener_poll_type *) out_pfd->client_data;
+ for (user_chain = NULL; num > 0; --num, ++out_pfd) {
+ listener_poll_type *pt = out_pfd->client_data;
+
if (pt->type == PT_CSD) {
/* one of the sockets is readable */
- event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
- struct timeout_queue *remove_from_q = NULL;
- /* don't wait for a worker for a keepalive request or
- * lingering close processing. */
- int blocking = 0;
-
- switch (cs->pub.state) {
- case CONN_STATE_WRITE_COMPLETION:
- remove_from_q = cs->sc->wc_q;
- blocking = 1;
- break;
+ int blocking = 1;
- case CONN_STATE_ASYNC_WAITIO:
- cs->pub.state = CONN_STATE_PROCESSING;
- remove_from_q = cs->sc->io_q;
- blocking = 1;
- break;
+ cs = (event_conn_state_t *) pt->baton;
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "polled connection %" CS_FMT,
+ CS_ARG(cs));
+ switch (cs->pub.state) {
case CONN_STATE_KEEPALIVE:
+ case CONN_STATE_ASYNC_WAITIO:
cs->pub.state = CONN_STATE_PROCESSING;
- remove_from_q = cs->sc->ka_q;
+ case CONN_STATE_WRITE_COMPLETION:
break;
case CONN_STATE_LINGER_NORMAL:
- remove_from_q = linger_q;
- break;
-
case CONN_STATE_LINGER_SHORT:
- remove_from_q = short_linger_q;
+ /* don't wait for a worker for lingering close processing. */
+ blocking = 0;
break;
default:
@@ -2168,26 +2384,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ap_assert(0);
}
- if (remove_from_q) {
- apr_thread_mutex_lock(timeout_mutex);
- TO_QUEUE_REMOVE(remove_from_q, cs);
- rc = apr_pollset_remove(event_pollset, &cs->pfd);
- apr_thread_mutex_unlock(timeout_mutex);
- /*
- * Some of the pollset backends, like KQueue or Epoll
- * automagically remove the FD if the socket is closed,
- * therefore, we can accept _SUCCESS or _NOTFOUND,
- * and we still want to keep going
- */
- if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
- AP_DEBUG_ASSERT(0);
- ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
- APLOGNO(03094) "pollset remove failed");
- close_connection(cs);
- signal_threads(ST_GRACEFUL);
- break;
- }
+ if (!pollset_del(cs, 0)) {
+ shutdown_connection(cs);
+ continue;
+ }
+ {
/* If we don't get a worker immediately (nonblocking), we
* close the connection; the client can re-connect to a
* different process for keepalive, and for lingering close
@@ -2269,14 +2471,21 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
resource_shortage = 1;
signal_threads(ST_GRACEFUL);
}
- else if (ap_accept_error_is_nonfatal(rc)) {
- ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, ap_server_conf,
+ else if (ap_accept_error_is_nonfatal(rc)) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, ap_server_conf,
"accept() on client socket failed");
}
if (csd != NULL) {
conns_this_child--;
- if (push2worker(NULL, csd, ptrans) == APR_SUCCESS) {
+
+ /* Create and account for the connection from here, or
+ * a graceful shutdown happening before it's processed
+ * would consider it does not exist and could exit the
+ * child too early.
+ */
+ cs = make_conn_state(ptrans, csd);
+ if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
have_idle_worker = 0;
}
}
@@ -2304,7 +2513,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
* with the user callback being called while we handle
* the same baton multiple times here.
*/
- if (!baton->signaled) {
+ if (!baton->signaled) {
baton->signaled = 1;
baton->next = user_chain;
user_chain = baton;
@@ -2312,7 +2521,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
} /* for processing poll */
- /* Time to handle user callbacks chained above */
+ /* Time to queue user callbacks chained above */
while (user_chain) {
socket_callback_baton_t *baton = user_chain;
user_chain = user_chain->next;
@@ -2323,30 +2532,31 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
event_cleanup_poll_callback);
/* masquerade as a timer event that is firing */
- te = event_get_timer_event(-1 /* fake timer */,
- baton->cbfunc,
- baton->user_baton,
- 0, /* don't insert it */
- NULL /* no associated socket callback */);
+ te = get_timer_event(-1 /* fake timer */,
+ baton->cbfunc,
+ baton->user_baton,
+ 0, /* don't insert it */
+ NULL /* no associated socket callback */);
push_timer2worker(te);
}
/* We process the timeout queues here only when the global
- * queues_next_expiry is passed. This happens accurately since
+ * queues_next_expiry has passed. This happens accurately since
* adding to the queues (in workers) can only decrease this expiry,
* while latest ones are only taken into account here (in listener)
* during queues' processing, with the lock held. This works both
* with and without wake-ability.
*/
- expiry = queues_next_expiry;
+ next_expiry = queues_next_expiry;
do_maintenance:
- if (expiry && expiry < (now = apr_time_now())) {
+ if (next_expiry && next_expiry <= (now = apr_time_now())) {
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
- "queues maintenance with timeout=%" APR_TIME_T_FMT,
- expiry > 0 ? expiry - now : -1);
+ "queues maintenance: expired=%" APR_TIME_T_FMT,
+ next_expiry > 0 ? now - next_expiry : -1);
+
apr_thread_mutex_lock(timeout_mutex);
- /* Steps below will recompute this. */
+ /* Recompute this by walking the timeout queues (under the lock) */
queues_next_expiry = 0;
/* Step 1: keepalive queue timeouts are closed */
@@ -2373,11 +2583,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* Step 5: short lingering close queue timeouts are closed */
process_timeout_queue(short_linger_q, now, shutdown_connection);
+ next_expiry = queues_next_expiry;
apr_thread_mutex_unlock(timeout_mutex);
+
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
- "queues maintained with timeout=%" APR_TIME_T_FMT,
- queues_next_expiry > now ? queues_next_expiry - now
- : -1);
+ "queues maintained: next timeout=%" APR_TIME_T_FMT,
+ next_expiry ? next_expiry - now : -1);
ps->wait_io = apr_atomic_read32(waitio_q->total);
ps->write_completion = apr_atomic_read32(write_completion_q->total);
@@ -2411,12 +2622,11 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
have_idle_worker = 0;
}
}
-
- if (!workers_were_busy && should_enable_listensocks()) {
- enable_listensocks();
- }
} /* listener main loop */
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "listener thread exiting");
+
ap_queue_term(worker_queue);
apr_thread_exit(thd, APR_SUCCESS);
@@ -2429,23 +2639,25 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
*
* return 1 if thread should exit, 0 if it should continue running.
*/
-static int worker_thread_should_exit_early(void)
+static int worker_thread_should_exit_early(int slot)
{
+ const apr_uint32_t max = threads_per_child;
for (;;) {
apr_uint32_t conns = apr_atomic_read32(&connection_count);
- apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
- apr_uint32_t newdead;
+ apr_uint32_t deads = apr_atomic_read32(&threads_shutdown);
- AP_DEBUG_ASSERT(dead <= threads_per_child);
- if (conns >= threads_per_child - dead)
+ AP_DEBUG_ASSERT(deads < max);
+ if (conns >= max - deads)
return 0;
- newdead = dead + 1;
- if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
+ if (apr_atomic_cas32(&threads_shutdown, deads + 1, deads) == deads) {
/*
* No other thread has exited in the mean time, safe to exit
* this one.
*/
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "worker thread %i/%i-%i should exit (%i conns)",
+ slot, threads_per_child, deads + 1, conns);
return 1;
}
}
@@ -2463,20 +2675,21 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
proc_info *ti = dummy;
int process_slot = ti->pslot;
int thread_slot = ti->tslot;
+ worker_score *ws = &ap_scoreboard_image->servers[process_slot][thread_slot];
apr_status_t rv;
int is_idle = 0;
free(ti);
- ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
- ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
- ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->mpm->my_generation;
+ ws->pid = ap_my_pid;
+ ws->tid = apr_os_thread_current();
+ ws->generation = retained->mpm->my_generation;
ap_update_child_status_from_indexes(process_slot, thread_slot,
SERVER_STARTING, NULL);
for (;;) {
apr_socket_t *csd = NULL;
- event_conn_state_t *cs;
+ event_conn_state_t *cs = NULL;
timer_event_t *te = NULL;
apr_pool_t *ptrans; /* Pool for per-transaction stuff */
@@ -2490,23 +2703,33 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
signal_threads(ST_GRACEFUL);
break;
}
- /* A new idler may have changed connections_above_limit(),
- * let the listener know and decide.
+ ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
+ "worker thread %i/%i idle (idlers %i)",
+ thread_slot, threads_per_child,
+ ap_queue_info_num_idlers(worker_queue_info));
+ is_idle = 1;
+
+ /* If the listening sockets are paused and this new idler switches
+ * connections_above_limit() back, let the listener know and poll
+ * them again.
*/
if (listener_is_wakeable && should_enable_listensocks()) {
apr_pollset_wakeup(event_pollset);
}
- is_idle = 1;
}
ap_update_child_status_from_indexes(process_slot, thread_slot,
dying ? SERVER_GRACEFUL
- : SERVER_READY, NULL);
- worker_pop:
+ : SERVER_READY,
+ NULL);
+
if (workers_may_exit) {
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "worker thread %i/%i may exit",
+ thread_slot, threads_per_child);
break;
}
- if (dying && worker_thread_should_exit_early()) {
+ if (dying && worker_thread_should_exit_early(thread_slot)) {
break;
}
@@ -2518,8 +2741,12 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
* connections accepted by this server process have been handled.
*/
if (APR_STATUS_IS_EOF(rv)) {
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "worker thread %i/%i queue terminated",
+ thread_slot, threads_per_child);
break;
}
+
/* We get APR_EINTR whenever ap_queue_pop_*() has been interrupted
* from an explicit call to ap_queue_interrupt_all(). This allows
* us to unblock threads stuck in ap_queue_pop_*() when a shutdown
@@ -2531,26 +2758,29 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
* may have already been cleaned up. Don't log the "error" if
* workers_may_exit is set.
*/
- else if (APR_STATUS_IS_EINTR(rv)) {
- goto worker_pop;
- }
- /* We got some other error. */
- else if (!workers_may_exit) {
+ if (!APR_STATUS_IS_EINTR(rv) && !workers_may_exit) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
- APLOGNO(03099) "ap_queue_pop_socket failed");
+ APLOGNO(03099) "ap_queue_pop_something failed");
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
}
continue;
}
+
+ ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
+ "worker thread %i/%i busy (idlers %i)",
+ thread_slot, threads_per_child,
+ ap_queue_info_num_idlers(worker_queue_info));
+
if (te != NULL) {
- te->cbfunc(te->baton);
- {
- apr_thread_mutex_lock(g_timer_skiplist_mtx);
- APR_RING_INSERT_TAIL(&timer_free_ring.link, te, timer_event_t, link);
- apr_thread_mutex_unlock(g_timer_skiplist_mtx);
- }
+ void *baton = te->baton;
+ ap_mpm_callback_fn_t *cbfunc = te->cbfunc;
+ /* first recycle the timer event */
+ put_timer_event(te, 0);
+ cbfunc(baton);
}
else {
- is_idle = 0;
+ is_idle = 0; /* consumed */
if (csd != NULL) {
worker_sockets[thread_slot] = csd;
process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
@@ -2572,15 +2802,23 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
cs->chain = NULL;
AP_DEBUG_ASSERT(cs->pub.state == CONN_STATE_LINGER);
- worker_sockets[thread_slot] = csd = cs->pfd.desc.s;
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "deferred close for connection %" CS_FMT, CS_ARG(cs));
+
+ worker_sockets[thread_slot] = csd = cs_sd(cs);
process_socket(thd, cs->p, csd, cs, process_slot, thread_slot);
worker_sockets[thread_slot] = NULL;
}
}
+ if (is_idle) {
+ /* Not idling anymore */
+ ap_queue_info_wait_for_idler(worker_queue_info, NULL);
+ }
ap_update_child_status_from_indexes(process_slot, thread_slot,
dying ? SERVER_DEAD
- : SERVER_GRACEFUL, NULL);
+ : SERVER_GRACEFUL,
+ NULL);
apr_thread_exit(thd, APR_SUCCESS);
return NULL;
@@ -2623,14 +2861,14 @@ static void setup_threads_runtime(void)
ap_listen_rec *lr;
apr_pool_t *pskip = NULL;
int max_recycled_pools = -1, i;
- const int good_methods[] = { APR_POLLSET_KQUEUE,
- APR_POLLSET_PORT,
+ const int good_methods[] = { APR_POLLSET_PORT,
+ APR_POLLSET_KQUEUE,
APR_POLLSET_EPOLL };
/* XXX: K-A or lingering close connection included in the async factor */
- const apr_uint32_t async_factor = worker_factor / WORKER_FACTOR_SCALE;
- const apr_uint32_t pollset_size = (apr_uint32_t)num_listensocks +
- (apr_uint32_t)threads_per_child *
- (async_factor > 2 ? async_factor : 2);
+ const unsigned int threads_factor = worker_factor / WORKER_FACTOR_SCALE;
+ const apr_size_t pollset_size = ((unsigned int)num_listensocks +
+ (unsigned int)threads_per_child *
+ (threads_factor > 2 ? threads_factor : 2));
int pollset_flags;
/* Event's skiplist operations will happen concurrently with other modules'
@@ -2730,13 +2968,13 @@ static void setup_threads_runtime(void)
}
/* Add listeners to the main pollset */
- listener_pollfd = apr_pcalloc(pruntime, num_listensocks *
- sizeof(apr_pollfd_t));
+ listener_pollfd = apr_pcalloc(pruntime,
+ num_listensocks * sizeof(apr_pollfd_t));
for (i = 0, lr = my_bucket->listeners; lr; lr = lr->next, i++) {
apr_pollfd_t *pfd;
listener_poll_type *pt;
- AP_DEBUG_ASSERT(i < num_listensocks);
+ ap_assert(i < num_listensocks);
pfd = &listener_pollfd[i];
pfd->reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
@@ -2758,7 +2996,12 @@ static void setup_threads_runtime(void)
pt->baton = lr;
apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
- apr_pollset_add(event_pollset, pfd);
+ rv = apr_pollset_add(event_pollset, pfd);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(10473)
+ "apr_pollset_add for listener failed.");
+ clean_child_exit(APEXIT_CHILDFATAL);
+ }
lr->accept_func = ap_unixd_accept;
}
@@ -2906,7 +3149,7 @@ static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
/* listener has not stopped accepting yet */
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
"listener has not stopped accepting yet (%d iter)", iter);
- wakeup_listener();
+ shutdown_listener();
}
if (iter > 10) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
@@ -2922,6 +3165,9 @@ static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
}
for (i = 0; i < threads_per_child; i++) {
+ ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+ "apr_thread_join: joining thread %pp (%i/%i)",
+ threads[i], i, threads_per_child);
if (threads[i]) { /* if we ever created this thread */
rv = apr_thread_join(&thread_rv, threads[i]);
if (rv != APR_SUCCESS) {
@@ -3043,7 +3289,7 @@ static void child_main(int child_num_arg, int child_bucket)
if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
"WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
- "inappropriate, using default",
+ "inappropriate, using default",
ap_thread_stacksize);
}
}
@@ -3384,7 +3630,7 @@ static void perform_idle_server_maintenance(void)
retained->maxclients_reported = 1;
}
}
- else {
+ else {
if (!retained->near_maxclients_reported) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(10159)
"server is within MinSpareThreads of "
@@ -3490,7 +3736,7 @@ static void server_main_loop(int remaining_children_to_start)
child_slot = ap_find_child_by_pid(&pid);
if (processed_status == APEXIT_CHILDFATAL) {
/* fix race condition found in PR 39311
- * A child created at the same time as a graceful happens
+ * A child created at the same time as a graceful happens
* can find the lock missing and create a fatal error.
* It is not fatal for the last generation to be in this state.
*/
@@ -3866,25 +4112,23 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
return OK;
}
-static void setup_slave_conn(conn_rec *c, void *csd)
+static void setup_slave_conn(conn_rec *c, void *csd)
{
event_conn_state_t *mcs;
event_conn_state_t *cs;
-
+
mcs = ap_get_module_config(c->master->conn_config, &mpm_event_module);
-
- cs = apr_pcalloc(c->pool, sizeof(*cs));
+
+ cs = make_conn_state(c->pool, csd);
cs->c = c;
- cs->r = NULL;
cs->sc = mcs->sc;
cs->suspended = 0;
- cs->p = c->pool;
cs->bucket_alloc = c->bucket_alloc;
cs->pfd = mcs->pfd;
cs->pub = mcs->pub;
cs->pub.state = CONN_STATE_PROCESSING;
cs->pub.sense = CONN_SENSE_DEFAULT;
-
+
c->cs = &(cs->pub);
ap_set_module_config(c->conn_config, &mpm_event_module, cs);
}
@@ -3908,7 +4152,7 @@ static int event_protocol_switch(conn_rec *c, request_rec *r, server_rec *s,
* other than http/1.1, this might never happen.
*/
event_conn_state_t *cs;
-
+
cs = ap_get_module_config(c->conn_config, &mpm_event_module);
cs->sc = ap_get_module_config(s->module_config, &mpm_event_module);
}
@@ -3932,7 +4176,11 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
level_flags |= APLOG_STARTUP;
}
- if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
+ /* This sets up new listeners or reuses existing ones, as well as cleaning
+ * up unused ones from the previous generation.
+ */
+ num_listensocks = ap_setup_listeners(ap_server_conf);
+ if (num_listensocks < 1) {
ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
(startup ? NULL : s), APLOGNO(03272)
"no listening sockets available, shutting down");
@@ -4045,74 +4293,34 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
apr_pool_t *ptemp, server_rec *s)
{
- struct {
- struct timeout_queue *tail, *q;
- apr_hash_t *hash;
- } io, wc, ka;
+ apr_hash_t *io_h, *wc_h, *ka_h;
/* Not needed in pre_config stage */
if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
return OK;
}
- io.hash = apr_hash_make(ptemp);
- wc.hash = apr_hash_make(ptemp);
- ka.hash = apr_hash_make(ptemp);
- io.tail = wc.tail = ka.tail = NULL;
+ io_h = apr_hash_make(ptemp);
+ wc_h = apr_hash_make(ptemp);
+ ka_h = apr_hash_make(ptemp);
- linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(MAX_SECS_TO_LINGER),
- NULL);
- short_linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(SECONDS_TO_LINGER),
- NULL);
+ linger_q = TO_QUEUE_MAKE(pconf, "linger",
+ apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
+ short_linger_q = TO_QUEUE_MAKE(pconf, "short_linger",
+ apr_time_from_sec(SECONDS_TO_LINGER), NULL);
for (; s; s = s->next) {
event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
-
ap_set_module_config(s->module_config, &mpm_event_module, sc);
- if (!io.tail) {
- /* The main server uses the global queues */
- io.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
- apr_hash_set(io.hash, &s->timeout, sizeof s->timeout, io.q);
- io.tail = waitio_q = io.q;
-
- wc.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
- apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
- wc.tail = write_completion_q = wc.q;
-
- ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, NULL);
- apr_hash_set(ka.hash, &s->keep_alive_timeout,
- sizeof s->keep_alive_timeout, ka.q);
- ka.tail = keepalive_q = ka.q;
- }
- else {
- /* The vhosts use any existing queue with the same timeout,
- * or their own queue(s) if there isn't */
- io.q = apr_hash_get(io.hash, &s->timeout, sizeof s->timeout);
- if (!io.q) {
- io.q = TO_QUEUE_MAKE(pconf, s->timeout, io.tail);
- apr_hash_set(io.hash, &s->timeout, sizeof s->timeout, io.q);
- io.tail = io.tail->next = io.q;
- }
- wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
- if (!wc.q) {
- wc.q = TO_QUEUE_MAKE(pconf, s->timeout, wc.tail);
- apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
- wc.tail = wc.tail->next = wc.q;
- }
+ sc->io_q = TO_QUEUE_CHAIN(pconf, "waitio", s->timeout,
+ &waitio_q, io_h, ptemp);
- ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
- sizeof s->keep_alive_timeout);
- if (!ka.q) {
- ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, ka.tail);
- apr_hash_set(ka.hash, &s->keep_alive_timeout,
- sizeof s->keep_alive_timeout, ka.q);
- ka.tail = ka.tail->next = ka.q;
- }
- }
- sc->io_q = io.q;
- sc->wc_q = wc.q;
- sc->ka_q = ka.q;
+ sc->wc_q = TO_QUEUE_CHAIN(pconf, "write_completion", s->timeout,
+ &write_completion_q, wc_h, ptemp);
+
+ sc->ka_q = TO_QUEUE_CHAIN(pconf, "keepalive", s->keep_alive_timeout,
+ &keepalive_q, ka_h, ptemp);
}
return OK;
@@ -4430,7 +4638,7 @@ static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
threads_per_child = atoi(arg);
return NULL;
}
-static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
+static const char *set_server_limit(cmd_parms *cmd, void *dummy, const char *arg)
{
const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
if (err != NULL) {
diff --git a/server/mpm/motorz/motorz.c b/server/mpm/motorz/motorz.c
index 7026d08cd6e..e06aeab573b 100644
--- a/server/mpm/motorz/motorz.c
+++ b/server/mpm/motorz/motorz.c
@@ -380,8 +380,8 @@ static apr_status_t motorz_io_process(motorz_conn_t *scon)
scon->cs.state = CONN_STATE_PROCESSING;
}
-read_request:
if (scon->cs.state == CONN_STATE_PROCESSING) {
+ process_connection:
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(03328)
"motorz_io_process(): CONN_STATE_PROCESSING");
if (!c->aborted) {
@@ -432,14 +432,14 @@ static apr_status_t motorz_io_process(motorz_conn_t *scon)
}
return APR_SUCCESS;
}
- if (c->keepalive != AP_CONN_KEEPALIVE) {
- pending = DONE;
- }
- else if (pending == OK) {
- pending = ap_check_input_pending(c);
+ if (pending == OK) {
+ /* Some data to process immediately? */
+ pending = (c->keepalive == AP_CONN_KEEPALIVE
+ ? ap_check_input_pending(c)
+ : DONE);
if (pending == AGAIN) {
scon->cs.state = CONN_STATE_PROCESSING;
- goto read_request;
+ goto process_connection;
}
}
if (pending == OK) {
diff --git a/server/mpm/simple/simple_io.c b/server/mpm/simple/simple_io.c
index 36c5ad87956..154c9a2c1d3 100644
--- a/server/mpm/simple/simple_io.c
+++ b/server/mpm/simple/simple_io.c
@@ -126,11 +126,11 @@ static apr_status_t simple_io_process(simple_conn_t * scon)
}
return APR_SUCCESS;
}
- if (c->keepalive != AP_CONN_KEEPALIVE) {
- pending = DONE;
- }
- else if (pending == OK) {
- pending = ap_check_input_pending(c);
+ if (pending == OK) {
+ /* Some data to process immediately? */
+ pending = (c->keepalive == AP_CONN_KEEPALIVE
+ ? ap_check_input_pending(c)
+ : DONE);
if (pending == AGAIN) {
scon->cs.state = CONN_STATE_PROCESSING;
continue;
From db8ec1e53750901d0acf44a59e6346f9bd9c7b90 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 1 Feb 2022 22:47:38 +0100
Subject: [PATCH 04/22] mpm_event: Use monotonic timestamps if available.
If clock_gettime() and CLOCK_MONOTONIC are defined (i.e. most/all? unixes),
use them to provide a timestamp that never goes past (even if the admin
changes the system time). This avoids entries potentially suddenly expiring
in centuries on a bad clock skew.
* configure.in():
Provide HAVE_TIME_H, HAVE_CLOCK_GETTIME and HAVE_CLOCK_GETRES.
* server/mpm/event/event.c(event_time_now):
New helper to get a monotonic timestamp from clock_gettime() if it's
available, or apr_time_now() (i.e. gettimeofday()) otherwise.
* server/mpm/event/event.c(process_socket, event_resume_suspended,
event_get_timer_event, process_lingering_close,
listener_thread, event_run):
Use event_time_now().
---
configure.in | 5 ++
server/mpm/event/event.c | 112 +++++++++++++++++++++++++++++++++++----
2 files changed, 107 insertions(+), 10 deletions(-)
diff --git a/configure.in b/configure.in
index c56c8972afd..4b2098d8034 100644
--- a/configure.in
+++ b/configure.in
@@ -471,6 +471,8 @@ AC_CHECK_HEADERS( \
string.h \
limits.h \
unistd.h \
+time.h \
+mach/mach_time.h \
sys/socket.h \
pwd.h \
grp.h \
@@ -534,6 +536,9 @@ getpwnam \
getgrnam \
initgroups \
bindprocessor \
+clock_getres \
+clock_gettime \
+clock_gettime_nsec_np \
prctl \
procctl \
pthread_getthreadid_np \
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 64ff1e30ead..795f4b1f37c 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -73,6 +73,9 @@
#ifdef HAVE_SYS_PROCESSOR_H
#include /* for bindprocessor() */
#endif
+#ifdef HAVE_TIME_H
+#include /* for clock_gettime() */
+#endif
#if !APR_HAS_THREADS
#error The Event MPM requires APR threads, but they are unavailable.
@@ -336,6 +339,93 @@ static APR_INLINE const char *cs_state_str(event_conn_state_t *cs)
*/
static event_conn_state_t *volatile defer_linger_chain;
+#define USE_CLOCK_COARSE 0 /* not for now */
+#if HAVE_CLOCK_GETTIME && defined(CLOCK_MONOTONIC) /* POSIX */
+static clockid_t event_clockid;
+#elif HAVE_CLOCK_GETTIME_NSEC_NP && defined(CLOCK_UPTIME_RAW) /* Newer OSX */
+/* All #include'd by already */
+#elif HAVE_MACH_MACH_TIME_H /* Older OSX */
+#include
+#endif
+
+static void event_time_init(void)
+{
+#if HAVE_CLOCK_GETTIME && defined(CLOCK_MONOTONIC)
+ event_clockid = (clockid_t)-1;
+
+#if HAVE_CLOCK_GETRES && defined(CLOCK_MONOTONIC_COARSE) && USE_CLOCK_COARSE
+ if (event_clockid == (clockid_t)-1) {
+ struct timespec ts;
+ if (clock_getres(CLOCK_MONOTONIC_COARSE, &ts) == 0) {
+ apr_time_t res = apr_time_from_sec(ts.tv_sec) + ts.tv_nsec / 1000;
+ if (res <= TIMERS_FUDGE_TIMEOUT) {
+ event_clockid = CLOCK_MONOTONIC_COARSE;
+ }
+ }
+ }
+#endif /* CLOCK_MONOTONIC_COARSE */
+
+#if HAVE_CLOCK_GETRES && defined(CLOCK_MONOTONIC_FAST) && USE_CLOCK_COARSE
+ if (event_clockid == (clockid_t)-1) {
+ struct timespec ts;
+ if (clock_getres(CLOCK_MONOTONIC_FAST, &ts) == 0) {
+ apr_time_t res = apr_time_from_sec(ts.tv_sec) + ts.tv_nsec / 1000;
+ if (res <= TIMERS_FUDGE_TIMEOUT) {
+ event_clockid = CLOCK_MONOTONIC_FAST;
+ }
+ }
+ }
+#endif /* CLOCK_MONOTONIC_FAST */
+
+#if HAVE_CLOCK_GETRES && defined(CLOCK_MONOTONIC_RAW_APPROX) && USE_CLOCK_COARSE
+ if (event_clockid == (clockid_t)-1) {
+ struct timespec ts;
+ if (clock_getres(CLOCK_MONOTONIC_RAW_APPROX, &ts) == 0) {
+ apr_time_t res = apr_time_from_sec(ts.tv_sec) + ts.tv_nsec / 1000;
+ if (res <= TIMERS_FUDGE_TIMEOUT) {
+ event_clockid = CLOCK_MONOTONIC_RAW_APPROX;
+ }
+ }
+ }
+#endif /* CLOCK_MONOTONIC_RAW_APPROX */
+
+ if (event_clockid == (clockid_t)-1) {
+#if defined(CLOCK_MONOTONIC_RAW)
+ event_clockid = CLOCK_MONOTONIC_RAW;
+#else
+ event_clockid = CLOCK_MONOTONIC;
+#endif
+ }
+
+#endif /* HAVE_CLOCK_GETTIME */
+}
+
+static apr_time_t event_time_now(void)
+{
+#if HAVE_CLOCK_GETTIME && defined(CLOCK_MONOTONIC)
+
+ struct timespec ts;
+ clock_gettime(event_clockid, &ts);
+ return apr_time_from_sec(ts.tv_sec) + ts.tv_nsec / 1000;
+
+#elif HAVE_CLOCK_GETTIME_NSEC_NP && defined(CLOCK_UPTIME_RAW)
+
+ return clock_gettime_nsec_np(CLOCK_UPTIME_RAW) / 1000;
+
+#elif HAVE_MACH_MACH_TIME_H
+
+ mach_timebase_info_data_t ti;
+ mach_timebase_info(&ti);
+ return mach_continuous_time() * ti.numer / ti.denom / 1000;
+
+#else
+
+ /* XXX: not monotonic, still some platform to care about? */
+ return apr_time_now();
+
+#endif
+}
+
APR_RING_HEAD(timeout_head_t, event_conn_state_t);
struct timeout_queue {
struct timeout_head_t head;
@@ -375,7 +465,7 @@ static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *cs)
ap_assert(q && !cs->q);
cs->q = q;
- cs->queue_timestamp = apr_time_now();
+ cs->queue_timestamp = event_time_now();
APR_RING_INSERT_TAIL(&q->head, cs, event_conn_state_t, timeout_list);
++*q->total;
++q->count;
@@ -1786,7 +1876,7 @@ static timer_event_t *get_timer_event(apr_time_t timeout,
apr_array_header_t *pfds)
{
timer_event_t *te;
- apr_time_t now = (timeout < 0) ? 0 : apr_time_now();
+ apr_time_t now = (timeout < 0) ? 0 : event_time_now();
/* oh yeah, and make locking smarter/fine grained. */
@@ -2158,7 +2248,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
int have_idle_worker = 0;
apr_time_t last_log;
- last_log = apr_time_now();
+ last_log = event_time_now();
free(ti);
#if HAVE_SERF
@@ -2207,7 +2297,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
if (APLOGtrace6(ap_server_conf)) {
- now = apr_time_now();
+ now = event_time_now();
/* trace log status every second */
if (now - last_log > apr_time_from_sec(1)) {
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
@@ -2240,7 +2330,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
* up occurs, otherwise periodic checks (maintenance, shutdown, ...)
* must be performed.
*/
- now = apr_time_now();
+ now = event_time_now();
timeout = -1;
/* Push expired timers to a worker, the first remaining one (if any)
@@ -2333,7 +2423,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
if (APLOGtrace7(ap_server_conf)) {
apr_time_t old_now = now;
- now = apr_time_now();
+ now = event_time_now();
ap_log_error(APLOG_MARK, APLOG_TRACE7, rc, ap_server_conf,
"pollset: have #%i time=%" APR_TIME_T_FMT "/%" APR_TIME_T_FMT
@@ -2549,7 +2639,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
*/
next_expiry = queues_next_expiry;
do_maintenance:
- if (next_expiry && next_expiry <= (now = apr_time_now())) {
+ if (next_expiry && next_expiry <= (now = event_time_now())) {
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
"queues maintenance: expired=%" APR_TIME_T_FMT,
next_expiry > 0 ? now - next_expiry : -1);
@@ -3257,7 +3347,7 @@ static void child_main(int child_num_arg, int child_bucket)
}
/* For rand() users (e.g. skiplist). */
- srand((unsigned int)apr_time_now());
+ srand((unsigned int)event_time_now());
ap_run_child_init(pchild, ap_server_conf);
@@ -4057,7 +4147,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
}
if (ap_graceful_shutdown_timeout) {
- cutoff = apr_time_now() +
+ cutoff = event_time_now() +
apr_time_from_sec(ap_graceful_shutdown_timeout);
}
@@ -4079,7 +4169,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
}
}
} while (!retained->mpm->shutdown_pending && active_children &&
- (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
+ (!ap_graceful_shutdown_timeout || event_time_now() < cutoff));
/* We might be here because we received SIGTERM, either
* way, try and make sure that all of our processes are
@@ -4210,6 +4300,8 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
foreground = ap_exists_config_define("FOREGROUND");
}
+ event_time_init();
+
retained = ap_retained_data_get(userdata_key);
if (!retained) {
retained = ap_retained_data_create(userdata_key, sizeof(*retained));
From 8f3ed4cc7a3d20b864ee898930e189e39cce55fa Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 9 Jul 2024 15:53:33 +0200
Subject: [PATCH 05/22] mpm_event: No need/use of "clogged" connections count,
axe.
---
server/mpm/event/event.c | 26 ++++++--------------------
1 file changed, 6 insertions(+), 20 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 795f4b1f37c..4e544ccdec9 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -205,7 +205,6 @@ static volatile int listener_may_exit = 0;
static apr_uint32_t connection_count = 0; /* Number of open connections */
static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
-static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */
static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
early during graceful termination */
static int had_healthy_child = 0;
@@ -703,8 +702,7 @@ static void disable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(10381)
"Suspend listening sockets: idlers:%i conns:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u "
- "suspended:%u clogged:%u",
+ "waitio:%u write:%u keepalive:%u linger:%u/%u suspended:%u",
ap_queue_info_num_idlers(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(waitio_q->total),
@@ -712,8 +710,7 @@ static void disable_listensocks(void)
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
- apr_atomic_read32(&suspended_count),
- apr_atomic_read32(&clogged_count));
+ apr_atomic_read32(&suspended_count));
ap_scoreboard_image->parent[ap_child_slot].not_accepting = 1;
@@ -732,8 +729,7 @@ static void enable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(00457)
"Resume listening sockets: idlers:%i conns:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u "
- "suspended:%u clogged:%u",
+ "waitio:%u write:%u keepalive:%u linger:%u/%u suspended:%u",
ap_queue_info_num_idlers(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(waitio_q->total),
@@ -741,8 +737,7 @@ static void enable_listensocks(void)
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
- apr_atomic_read32(&suspended_count),
- apr_atomic_read32(&clogged_count));
+ apr_atomic_read32(&suspended_count));
/*
* XXX: This is not yet optimal. If many workers suddenly become available,
@@ -1415,7 +1410,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
{
conn_rec *c = cs->c;
long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
- int rc = OK, processed = 0, clogging;
+ int rc = OK, processed = 0;
if (!c) { /* This is a new connection */
cs->bucket_alloc = apr_bucket_alloc_create(p);
@@ -1469,14 +1464,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
process_connection:
processed = 1;
cs->pub.state = CONN_STATE_PROCESSING;
- clogging = c->clogging_input_filters;
- if (clogging) {
- apr_atomic_inc32(&clogged_count);
- }
rc = ap_run_process_connection(c);
- if (clogging) {
- apr_atomic_dec32(&clogged_count);
- }
/*
* The process_connection hooks should set the appropriate connection
* state upon return, for event MPM to either:
@@ -2302,15 +2290,13 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
if (now - last_log > apr_time_from_sec(1)) {
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
"connections: %u (waitio:%u write:%u keepalive:%u "
- "lingering:%u suspended:%u clogged:%u), "
- "workers: %u/%u shutdown",
+ "lingering:%u suspended:%u), workers: %u/%u shutdown",
apr_atomic_read32(&connection_count),
apr_atomic_read32(waitio_q->total),
apr_atomic_read32(write_completion_q->total),
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(&lingering_count),
apr_atomic_read32(&suspended_count),
- apr_atomic_read32(&clogged_count),
apr_atomic_read32(&threads_shutdown),
threads_per_child);
last_log = now;
From f1367ba03edeaaa1ddd451b2561b69e20c976c13 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 1 Feb 2022 22:24:15 +0100
Subject: [PATCH 06/22] mpm_event: Use r->server's Timeout after the
post_read_request hook.
Regardless of keep_alive_timeout_set which anyway is only about the
KeepAliveTimeout to apply _after_ the current request, always use the
request's server Timeout during its processing (i.e. CONN_STATE_HEAR
and CONN_STATE_COMPLETION).
To save the next KeepAliveTimeout to use later, add a new event_srv_cfg
to the conn_state which points to the appropriate server (either r->server
or c->base_server depending on keep_alive_timeout_set as before).
* server/mpm/event/event.c(struct event_conn_state_t):
Add event_srv_cfg *ka_sc as the server config to apply for kept alive
connections.
* server/mpm/event/event.c(event_post_read_request):
Always set cs->sc to the event_srv_cfg or the request's server, and
point cs->ka_sc to the appropriate one according to keep_alive_timeout_set.
* server/mpm/event/event.c(make_conn_state):
Initialize cs->ka_sc to the ap_server_conf's event_srv_cfg, like cs->sc.
* server/mpm/event/event.c(process_socket):
Use cs->ka_sc->ka_q for CONN_STATE_KEEPALIVE.
---
server/mpm/event/event.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 4e544ccdec9..601a23dd9f6 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -251,6 +251,8 @@ struct event_conn_state_t {
request_rec *r;
/** server config this struct refers to */
event_srv_cfg *sc;
+ /** server config this struct refers to during keepalive */
+ event_srv_cfg *ka_sc;
/** scoreboard handle for the conn_rec */
ap_sb_handle_t *sbh;
/** bucket allocator */
@@ -1224,18 +1226,23 @@ static int event_post_read_request(request_rec *r)
event_conn_state_t *cs = ap_get_module_config(c->conn_config,
&mpm_event_module);
+ /* Use Timeout from the request's server. */
+ cs->sc = ap_get_module_config(r->server->module_config,
+ &mpm_event_module);
+
/* To preserve legacy behaviour (consistent with other MPMs), use
- * the keepalive timeout from the base server (first on this IP:port)
- * when none is explicitly configured on this server.
+ * KeepaliveTimeout from the base server (first on this IP:port)
+ * when none is explicitly configured on this server. Otherwise
+ * use the one from the request's server.
*/
- if (r->server->keep_alive_timeout_set) {
- cs->sc = ap_get_module_config(r->server->module_config,
- &mpm_event_module);
+ if (!r->server->keep_alive_timeout_set) {
+ cs->ka_sc = ap_get_module_config(c->base_server->module_config,
+ &mpm_event_module);
}
else {
- cs->sc = ap_get_module_config(c->base_server->module_config,
- &mpm_event_module);
+ cs->ka_sc = cs->sc;
}
+
return OK;
}
@@ -1352,8 +1359,8 @@ static event_conn_state_t *make_conn_state(apr_pool_t *p, apr_socket_t *csd)
APR_RING_ELEM_INIT(cs, timeout_list);
- cs->sc = ap_get_module_config(ap_server_conf->module_config,
- &mpm_event_module);
+ cs->sc = cs->ka_sc = ap_get_module_config(ap_server_conf->module_config,
+ &mpm_event_module);
/**
* XXX If the platform does not have a usable way of bundling
@@ -1594,7 +1601,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
*/
notify_suspend(cs);
- if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->sc->ka_q)) {
+ if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->ka_sc->ka_q)) {
apr_table_setn(cs->c->notes, "short-lingering-close", "1");
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
From 589d21a0cacf822d905f3c37632be102b243921f Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 27 Jun 2023 03:26:56 +0200
Subject: [PATCH 07/22] mpm_event: Add kill_connection() to log (APLOG_INFO)
interrupted connections.
---
server/mpm/event/event.c | 26 +++++++++++++++++++++++---
1 file changed, 23 insertions(+), 3 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 601a23dd9f6..b58fc50bd94 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -1154,6 +1154,25 @@ static void close_connection_at(event_conn_state_t *cs,
#define close_connection(cs) \
close_connection_at((cs), __FUNCTION__, __LINE__)
+static void kill_connection_at(event_conn_state_t *cs, apr_status_t status,
+ const char *at, int line)
+{
+ if (cs->c) {
+ ap_log_cerror(APLOG_MARK, APLOG_INFO, status, cs->c, APLOGNO(10382)
+ "killing connection in %s at %s:%i",
+ cs_state_str(cs), at, line);
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_INFO, status, ap_server_conf, APLOGNO(10383)
+ "killing unprocessed connection from %pI in %s at %s:%i",
+ cs_raddr(cs), cs_state_str(cs), at, line);
+ }
+
+ close_connection_at(cs, at, line);
+}
+#define kill_connection(cs, status) \
+ kill_connection_at((cs), (status), __FUNCTION__, __LINE__)
+
/* forward declare */
static void set_conn_state_sense(event_conn_state_t *cs, int sense);
@@ -1787,7 +1806,7 @@ static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
* socket to a worker
*/
if (cs) {
- shutdown_connection(cs);
+ kill_connection(cs, rc);
}
else {
if (csd) {
@@ -2178,7 +2197,7 @@ static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
TO_QUEUE_REMOVE(qp, cs);
if (!pollset_del(cs, 1)) {
- shutdown_connection(cs);
+ kill_connection(cs, APR_EGENERAL);
continue;
}
@@ -2468,7 +2487,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
if (!pollset_del(cs, 0)) {
- shutdown_connection(cs);
+ /* Can't go anywhere, kill (and log) and next. */
+ kill_connection(cs, APR_EGENERAL);
continue;
}
From 0ea6ae6162fbcdf4e659cacd16d07f93a61dbef7 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 1 Feb 2022 17:17:11 +0100
Subject: [PATCH 08/22] core,mod_reqtimeout: Add ap_get_connection_timeout().
Provide a new min_connection_timeout hook that modules enforcing a
dynamic connection timeout (e.g. mod_reqtimeout) should use to inform
ap_get_connection_timeout() users about the current timeout being
applied.
Expose the current timeout enforced by mod_reqtimeout by implementing
the min_connection_timeout hook.
* include/ap_mmn.h():
Minor bump for min_connection_timeout and ap_get_connection_timeout().
* include/http_connection.h():
Declare min_connection_timeout and ap_get_connection_timeout().
* server/connection.c():
Implement min_connection_timeout and ap_get_connection_timeout().
* modules/filters/mod_reqtimeout.c(struct reqtimeout_stage_t):
Add server_timeout as the timeout defined for the server at the current
stage.
* modules/filters/mod_reqtimeout.c(struct reqtimeout_con_cfg):
Add time_left as the dynamic timeout enforced by mod_reqtimeout at the
current stage.
* modules/filters/mod_reqtimeout.c(check_time_left):
Store the computed time_left in the reqtimeout_con_cfg, and set the
socket timeout there (returning an error which will be caught if that
fails).
* modules/filters/mod_reqtimeout.c(extend_timeout):
Update time_left in the reqtimeout_con_cfg per the time taken by the last
read.
* modules/filters/mod_reqtimeout.c(reqtimeout_filter):
Remove the special path for APR_NONBLOCK_READ or AP_MODE_EATCRLF, it
does the exact same thing than the !(AP_MODE_GETLINE && APR_BLOCK_READ)
one.
* modules/filters/mod_reqtimeout.c(reqtimeout_init, reqtimeout_before_header,
reqtimeout_before_body, INIT_STAGE):
Set the server_timeout in the current stage.
* modules/filters/mod_reqtimeout.c(reqtimeout_min_timeout):
The new hook implementation.
---
include/ap_mmn.h | 4 +-
include/http_connection.h | 5 ++
modules/filters/mod_reqtimeout.c | 127 +++++++++++++++++++------------
server/connection.c | 16 ++++
4 files changed, 101 insertions(+), 51 deletions(-)
diff --git a/include/ap_mmn.h b/include/ap_mmn.h
index acfa61e22b5..fb8f4512d47 100644
--- a/include/ap_mmn.h
+++ b/include/ap_mmn.h
@@ -733,6 +733,8 @@
* 20211221.25 (2.5.1-dev) AP_SLASHES and AP_IS_SLASH
* 20211221.26 (2.5.1-dev) Add AGAIN, ap_check_input_pending() and
* ap_check_output_pending()
+ * 20211221.27 (2.5.1-dev) Add min_connection_timeout hook and
+ * ap_get_connection_timeout()
*/
#define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */
@@ -740,7 +742,7 @@
#ifndef MODULE_MAGIC_NUMBER_MAJOR
#define MODULE_MAGIC_NUMBER_MAJOR 20211221
#endif
-#define MODULE_MAGIC_NUMBER_MINOR 26 /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 27 /* 0...n */
/**
* Determine if the server's current MODULE_MAGIC_NUMBER is at least a
diff --git a/include/http_connection.h b/include/http_connection.h
index a89113bcb3b..601a4769109 100644
--- a/include/http_connection.h
+++ b/include/http_connection.h
@@ -196,6 +196,11 @@ AP_DECLARE(conn_rec *) ap_create_secondary_connection(apr_pool_t *pool,
conn_rec *master,
apr_bucket_alloc_t *alloc);
+AP_DECLARE_HOOK(int, min_connection_timeout,
+ (conn_rec *c, server_rec *s, apr_interval_time_t *min_timeout))
+
+AP_DECLARE(apr_interval_time_t) ap_get_connection_timeout(conn_rec *c,
+ server_rec *s);
/** End Of Connection (EOC) bucket */
AP_DECLARE_DATA extern const apr_bucket_type_t ap_bucket_type_eoc;
diff --git a/modules/filters/mod_reqtimeout.c b/modules/filters/mod_reqtimeout.c
index 0e5afca57e4..693351e1280 100644
--- a/modules/filters/mod_reqtimeout.c
+++ b/modules/filters/mod_reqtimeout.c
@@ -45,6 +45,7 @@ typedef struct
int max_timeout; /* max timeout in secs */
int min_rate; /* min rate in bytes/s */
apr_time_t rate_factor; /* scale factor (#usecs per min_rate) */
+ apr_interval_time_t server_timeout; /* server timeout at this stage */
} reqtimeout_stage_t;
typedef struct
@@ -59,6 +60,7 @@ typedef struct
{
apr_time_t timeout_at;
apr_time_t max_timeout_at;
+ apr_interval_time_t time_left;
reqtimeout_stage_t cur_stage;
int in_keep_alive;
char *type;
@@ -74,34 +76,45 @@ static int default_body_rate_factor;
static void extend_timeout(reqtimeout_con_cfg *ccfg, apr_bucket_brigade *bb)
{
apr_off_t len;
+ apr_time_t old_timeout_at;
apr_time_t new_timeout_at;
if (apr_brigade_length(bb, 0, &len) != APR_SUCCESS || len <= 0)
return;
- new_timeout_at = ccfg->timeout_at + len * ccfg->cur_stage.rate_factor;
+ old_timeout_at = ccfg->timeout_at;
+ new_timeout_at = old_timeout_at + len * ccfg->cur_stage.rate_factor;
if (ccfg->max_timeout_at > 0 && new_timeout_at > ccfg->max_timeout_at) {
ccfg->timeout_at = ccfg->max_timeout_at;
}
else {
ccfg->timeout_at = new_timeout_at;
}
+
+ ccfg->time_left += new_timeout_at - old_timeout_at;
+ if (ccfg->time_left > ccfg->cur_stage.server_timeout) {
+ ccfg->time_left = ccfg->cur_stage.server_timeout;
+ }
}
-static apr_status_t check_time_left(reqtimeout_con_cfg *ccfg,
- apr_time_t *time_left_p,
- apr_time_t now)
+static apr_status_t check_and_update_time_left(reqtimeout_con_cfg *ccfg,
+ apr_time_t now)
{
if (!now)
now = apr_time_now();
- *time_left_p = ccfg->timeout_at - now;
- if (*time_left_p <= 0)
+
+ ccfg->time_left = ccfg->timeout_at - now;
+ if (ccfg->time_left <= 0)
return APR_TIMEUP;
- if (*time_left_p < apr_time_from_sec(1)) {
- *time_left_p = apr_time_from_sec(1);
+ if (ccfg->time_left < apr_time_from_sec(1)) {
+ ccfg->time_left = apr_time_from_sec(1);
}
- return APR_SUCCESS;
+ else if (ccfg->time_left > ccfg->cur_stage.server_timeout) {
+ ccfg->time_left = ccfg->cur_stage.server_timeout;
+ }
+
+ return apr_socket_timeout_set(ccfg->socket, ccfg->time_left);
}
static apr_status_t have_lf_or_eos(apr_bucket_brigade *bb)
@@ -168,16 +181,14 @@ static apr_status_t brigade_append(apr_bucket_brigade *bbOut, apr_bucket_brigade
}
-#define MIN(x,y) ((x) < (y) ? (x) : (y))
static apr_status_t reqtimeout_filter(ap_filter_t *f,
apr_bucket_brigade *bb,
ap_input_mode_t mode,
apr_read_type_e block,
apr_off_t readbytes)
{
- apr_time_t time_left;
- apr_time_t now = 0;
apr_status_t rv;
+ apr_time_t now = 0;
apr_interval_time_t saved_sock_timeout = UNSET;
reqtimeout_con_cfg *ccfg = f->ctx;
@@ -198,11 +209,11 @@ static apr_status_t reqtimeout_filter(ap_filter_t *f,
/* set new timeout */
now = apr_time_now();
ccfg->timeout_at = now + apr_time_from_sec(ccfg->cur_stage.timeout);
- ccfg->cur_stage.timeout = 0;
if (ccfg->cur_stage.max_timeout > 0) {
ccfg->max_timeout_at = now + apr_time_from_sec(ccfg->cur_stage.max_timeout);
ccfg->cur_stage.max_timeout = 0;
}
+ ccfg->cur_stage.timeout = 0;
}
else if (ccfg->timeout_at == 0) {
/* no timeout set, or in between requests */
@@ -213,39 +224,30 @@ static apr_status_t reqtimeout_filter(ap_filter_t *f,
ccfg->socket = ap_get_conn_socket(f->c);
}
- rv = check_time_left(ccfg, &time_left, now);
- if (rv != APR_SUCCESS)
- goto out;
-
- if (block == APR_NONBLOCK_READ || mode == AP_MODE_EATCRLF) {
- rv = ap_get_brigade(f->next, bb, mode, block, readbytes);
- if (ccfg->cur_stage.rate_factor && rv == APR_SUCCESS) {
- extend_timeout(ccfg, bb);
- }
- return rv;
- }
-
rv = apr_socket_timeout_get(ccfg->socket, &saved_sock_timeout);
AP_DEBUG_ASSERT(rv == APR_SUCCESS);
- rv = apr_socket_timeout_set(ccfg->socket, MIN(time_left, saved_sock_timeout));
- AP_DEBUG_ASSERT(rv == APR_SUCCESS);
+ rv = check_and_update_time_left(ccfg, now);
+ if (rv != APR_SUCCESS)
+ goto cleanup;
+
+ if (mode == AP_MODE_GETLINE && block == APR_BLOCK_READ) {
+ apr_off_t remaining = HUGE_STRING_LEN;
+#if APR_MAJOR_VERSION < 2
+ apr_int32_t nsds;
+ apr_interval_time_t poll_timeout;
+ apr_pollfd_t pollset;
+ pollset.p = NULL;
+#endif
- if (mode == AP_MODE_GETLINE) {
/*
* For a blocking AP_MODE_GETLINE read, apr_brigade_split_line()
* would loop until a whole line has been read. As this would make it
* impossible to enforce a total timeout, we only do non-blocking
* reads.
*/
- apr_off_t remaining = HUGE_STRING_LEN;
do {
apr_off_t bblen;
-#if APR_MAJOR_VERSION < 2
- apr_int32_t nsds;
- apr_interval_time_t poll_timeout;
- apr_pollfd_t pollset;
-#endif
rv = ap_get_brigade(f->next, bb, AP_MODE_GETLINE, APR_NONBLOCK_READ, remaining);
if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) {
@@ -282,10 +284,12 @@ static apr_status_t reqtimeout_filter(ap_filter_t *f,
/* ... and wait for more */
#if APR_MAJOR_VERSION < 2
- pollset.p = f->c->pool;
- pollset.desc_type = APR_POLL_SOCKET;
- pollset.reqevents = APR_POLLIN|APR_POLLHUP;
- pollset.desc.s = ccfg->socket;
+ if (pollset.p == NULL) {
+ pollset.p = f->c->pool;
+ pollset.desc_type = APR_POLL_SOCKET;
+ pollset.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
+ pollset.desc.s = ccfg->socket;
+ }
apr_socket_timeout_get(ccfg->socket, &poll_timeout);
rv = apr_poll(&pollset, 1, &nsds, poll_timeout);
#else
@@ -294,14 +298,10 @@ static apr_status_t reqtimeout_filter(ap_filter_t *f,
if (rv != APR_SUCCESS)
break;
- rv = check_time_left(ccfg, &time_left, 0);
+ rv = check_and_update_time_left(ccfg, 0);
if (rv != APR_SUCCESS)
break;
- rv = apr_socket_timeout_set(ccfg->socket,
- MIN(time_left, saved_sock_timeout));
- AP_DEBUG_ASSERT(rv == APR_SUCCESS);
-
} while (1);
if (ccfg->tmpbb)
@@ -310,19 +310,21 @@ static apr_status_t reqtimeout_filter(ap_filter_t *f,
}
else { /* mode != AP_MODE_GETLINE */
rv = ap_get_brigade(f->next, bb, mode, block, readbytes);
+
/* Don't extend the timeout in speculative mode, wait for
* the real (relevant) bytes to be asked later, within the
* currently allotted time.
*/
- if (ccfg->cur_stage.rate_factor && rv == APR_SUCCESS
- && mode != AP_MODE_SPECULATIVE) {
+ if (rv == APR_SUCCESS
+ && mode != AP_MODE_SPECULATIVE
+ && ccfg->cur_stage.rate_factor) {
extend_timeout(ccfg, bb);
}
}
+cleanup:
apr_socket_timeout_set(ccfg->socket, saved_sock_timeout);
-out:
if (APR_STATUS_IS_TIMEUP(rv)) {
ap_log_cerror(APLOG_MARK, APLOG_INFO, 0, f->c, APLOGNO(01382)
"Request %s read timeout", ccfg->type);
@@ -353,7 +355,7 @@ static apr_status_t reqtimeout_eor(ap_filter_t *f, apr_bucket_brigade *bb)
return ap_pass_brigade(f->next, bb);
}
-#define INIT_STAGE(cfg, ccfg, stage) do { \
+#define INIT_STAGE(cfg, ccfg, stage, s_timeout) do { \
if (cfg->stage.timeout != UNSET) { \
ccfg->cur_stage.timeout = cfg->stage.timeout; \
ccfg->cur_stage.max_timeout = cfg->stage.max_timeout; \
@@ -364,6 +366,8 @@ static apr_status_t reqtimeout_eor(ap_filter_t *f, apr_bucket_brigade *bb)
ccfg->cur_stage.max_timeout = MRT_DEFAULT_##stage##_MAX_TIMEOUT; \
ccfg->cur_stage.rate_factor = default_##stage##_rate_factor; \
} \
+ ccfg->cur_stage.server_timeout = s_timeout; \
+ ccfg->time_left = ccfg->cur_stage.timeout; \
} while (0)
static int reqtimeout_init(conn_rec *c)
@@ -392,7 +396,7 @@ static int reqtimeout_init(conn_rec *c)
ccfg->type = "handshake";
if (cfg->handshake.timeout > 0) {
- INIT_STAGE(cfg, ccfg, handshake);
+ INIT_STAGE(cfg, ccfg, handshake, c->base_server->timeout);
}
}
@@ -422,7 +426,7 @@ static void reqtimeout_before_header(request_rec *r, conn_rec *c)
ccfg->timeout_at = 0;
ccfg->max_timeout_at = 0;
ccfg->in_keep_alive = (c->keepalives > 0);
- INIT_STAGE(cfg, ccfg, header);
+ INIT_STAGE(cfg, ccfg, header, c->base_server->timeout);
}
static int reqtimeout_before_body(request_rec *r)
@@ -447,11 +451,31 @@ static int reqtimeout_before_body(request_rec *r)
ccfg->cur_stage.timeout = 0;
}
else {
- INIT_STAGE(cfg, ccfg, body);
+ INIT_STAGE(cfg, ccfg, body, r->server->timeout);
}
return OK;
}
+static int reqtimeout_min_timeout(conn_rec *c, server_rec *s/*unused*/,
+ apr_interval_time_t *min_timeout)
+{
+ reqtimeout_con_cfg *ccfg = ap_get_module_config(c->conn_config,
+ &reqtimeout_module);
+ reqtimeout_stage_t *stage = &ccfg->cur_stage;
+
+ if (stage->timeout > 0 || ccfg->timeout_at) {
+ if (ccfg->time_left <= 0) {
+ *min_timeout = 0;
+ }
+ else if (*min_timeout < 0 || *min_timeout > ccfg->time_left) {
+ *min_timeout = ccfg->time_left;
+ }
+ return OK;
+ }
+
+ return DECLINED;
+}
+
#define UNSET_STAGE(cfg, stage) do { \
cfg->stage.timeout = UNSET; \
cfg->stage.max_timeout = UNSET; \
@@ -637,6 +661,9 @@ static void reqtimeout_hooks(apr_pool_t *pool)
ap_hook_post_read_request(reqtimeout_before_body, NULL, NULL,
APR_HOOK_MIDDLE);
+ ap_hook_min_connection_timeout(reqtimeout_min_timeout, NULL, NULL,
+ APR_HOOK_MIDDLE);
+
#if MRT_DEFAULT_handshake_MIN_RATE
default_handshake_rate_factor = apr_time_from_sec(1) /
MRT_DEFAULT_handshake_MIN_RATE;
diff --git a/server/connection.c b/server/connection.c
index f32a1f3712c..a1c4c1860f0 100644
--- a/server/connection.c
+++ b/server/connection.c
@@ -36,6 +36,7 @@ APR_HOOK_STRUCT(
APR_HOOK_LINK(pre_connection)
APR_HOOK_LINK(pre_close_connection)
APR_HOOK_LINK(create_secondary_connection)
+ APR_HOOK_LINK(min_connection_timeout)
)
AP_IMPLEMENT_HOOK_RUN_FIRST(conn_rec *,create_connection,
(apr_pool_t *p, server_rec *server, apr_socket_t *csd, long conn_id, void *sbh, apr_bucket_alloc_t *alloc),
@@ -46,6 +47,9 @@ AP_IMPLEMENT_HOOK_RUN_ALL(int,pre_close_connection,(conn_rec *c),(c),OK,DECLINED
AP_IMPLEMENT_HOOK_RUN_FIRST(conn_rec *,create_secondary_connection,
(apr_pool_t *p, conn_rec *master, apr_bucket_alloc_t *alloc),
(p, master, alloc), NULL)
+AP_IMPLEMENT_HOOK_RUN_ALL(int,min_connection_timeout,
+ (conn_rec *c, server_rec *s, apr_interval_time_t *min_timeout),
+ (c, s, min_timeout),OK,DECLINED)
AP_DECLARE(conn_rec *) ap_create_connection(apr_pool_t *p,
server_rec *server,
@@ -251,3 +255,15 @@ AP_CORE_DECLARE(void) ap_process_connection(conn_rec *c, void *csd)
ap_run_process_connection(c);
}
}
+
+AP_DECLARE(apr_interval_time_t) ap_get_connection_timeout(conn_rec *c,
+ server_rec *s)
+{
+ apr_interval_time_t timeout = -1;
+
+ if (ap_run_min_connection_timeout(c, s, &timeout) != OK || timeout < 0) {
+ timeout = (s) ? s->timeout : c->base_server->timeout;
+ }
+
+ return timeout;
+}
From 8bddc079c906fb556f6507026d32a6c3b1dcaae7 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 1 Feb 2022 17:25:48 +0100
Subject: [PATCH 09/22] mpm_event: Use ap_get_connection_timeout() for
CONN_STATE_ASYNC_WAITIO.
If ap_run_process_connection() returns CONN_STATE_ASYNC_WAITIO and the
connection timeout as returned by ap_get_connection_timeout() is different
than the waitio_q timeout, use a timer event rather than the waitio_q to keep
track of the idle connection.
* server/mpm_fdqueue.h(truct timer_event_t):
Add the "timeout" field to store the timeout of the timer, recomputing
it from "when" would require to call apr_time_now() otherwise.
* server/mpm/event/event.c():
#define TIMER_MIN_TIMEOUT as the minimal timer event's timeout, to
prevent the events from firing before the sockets are added to the
pollset. Currently set to 50ms (an arbitrary value..).
* server/mpm/event/event.c(struct event_conn_state_t):
Add the timer_event_t *te field as an alternative to the q.
* server/mpm/event/event.c(struct event_srv_cfg_s):
Add the server_rec *s field to backref the server_rec and easily pass
cs->sc->s to ap_get_connection_timeout().
* server/mpm/event/event.c(pollset_add_at, pollset_del_at):
If the connection is attached to a timer event, log a "t" instead of
a "q" and the timer's timeout instead of the q's.
* server/mpm/event/event.c(process_socket):
If ap_get_connection_timeout() is different than the waitio_q timeout,
acquire a timer event and associate it with the conn_state. A timer
event associated with a conn_state has a NULL callback (cbfn).
* server/mpm/event/event.c(event_get_timer_event):
Set the given timeout to the ->timeout field.
* server/mpm/event/event.c(event_register_timed_callback_ex,
event_register_poll_callback_ex):
Return APR_EINVAL if the given callbacks are NULL, this is reserved
for conn_state timers now. Since it would have crashed at some point
to pass NULL callbacks before, it's not really an API change.
* server/mpm/event/event.c(listener_thread):
Fix the poll() timeout set from timers_next_expiry which should be
taken into account whether it expired or not.
When a conn_state timer fires/expires, remove it from the pollset and
abort the connection (with APLOG_INFO).
When a conn_state timer is polled, cancel the timer.
---
server/mpm/event/event.c | 82 ++++++++++++++++++++++++++++++----------
server/mpm_fdqueue.h | 1 +
2 files changed, 64 insertions(+), 19 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index b58fc50bd94..8ea061140c3 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -267,6 +267,8 @@ struct event_conn_state_t {
apr_time_t queue_timestamp;
/** the timeout queue for this entry */
struct timeout_queue *q;
+ /** the timer event for this entry */
+ timer_event_t *te;
/*
* when queued to workers
@@ -646,6 +648,7 @@ struct event_srv_cfg_s {
struct timeout_queue *io_q,
*wc_q,
*ka_q;
+ server_rec *s; /* backref */
};
#define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
@@ -1266,7 +1269,7 @@ static int event_post_read_request(request_rec *r)
}
static int pollset_add_at(event_conn_state_t *cs, int sense,
- struct timeout_queue *q,
+ struct timeout_queue *q, timer_event_t *te,
const char *at, int line)
{
apr_status_t rv;
@@ -1275,11 +1278,11 @@ static int pollset_add_at(event_conn_state_t *cs, int sense,
"pollset: add %s=%" APR_TIME_T_FMT " events=%x"
" for connection %" CS_FMT " at %s:%i",
(q) ? "q" : "t",
- (q) ? q->timeout : -1,
+ (q) ? q->timeout : (te) ? te->timeout : -1,
(int)cs->pfd.reqevents,
CS_ARG(cs), at, line);
- ap_assert(cs->q == NULL && q != NULL);
+ ap_assert(cs->q == NULL && cs->te == NULL && ((q != NULL) ^ (te != NULL)));
set_conn_state_sense(cs, sense);
@@ -1287,12 +1290,20 @@ static int pollset_add_at(event_conn_state_t *cs, int sense,
apr_thread_mutex_lock(timeout_mutex);
TO_QUEUE_APPEND(q, cs);
}
+ else {
+ cs->te = te;
+ }
+
rv = apr_pollset_add(event_pollset, &cs->pfd);
if (rv != APR_SUCCESS) {
if (q) {
TO_QUEUE_REMOVE(q, cs);
apr_thread_mutex_unlock(timeout_mutex);
}
+ else {
+ te->canceled = 1;
+ cs->te = NULL;
+ }
/* close_worker_sockets() may have closed it already */
if (workers_may_exit) {
@@ -1312,8 +1323,8 @@ static int pollset_add_at(event_conn_state_t *cs, int sense,
}
return 1;
}
-#define pollset_add(cs, sense, q) \
- pollset_add_at((cs), (sense), (q), __FUNCTION__, __LINE__)
+#define pollset_add(cs, sense, q, te) \
+ pollset_add_at((cs), (sense), (q), (te), __FUNCTION__, __LINE__)
static int pollset_del_at(event_conn_state_t *cs, int locked,
const char *at, int line)
@@ -1324,11 +1335,11 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
"pollset: del %s=%" APR_TIME_T_FMT " events=%x"
" for connection %" CS_FMT " at %s:%i",
(cs->q) ? "q" : "t",
- (cs->q) ? cs->q->timeout : -1,
+ (cs->q) ? cs->q->timeout : (cs->te ? cs->te->timeout : -1),
(int)cs->pfd.reqevents,
CS_ARG(cs), at, line);
- ap_assert(cs->q != NULL);
+ ap_assert((cs->q != NULL) ^ (cs->te != NULL));
if (cs->q) {
if (!locked) {
@@ -1339,6 +1350,10 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
apr_thread_mutex_unlock(timeout_mutex);
}
}
+ else {
+ cs->te->canceled = 1;
+ cs->te = NULL;
+ }
/*
* Some of the pollset backends, like KQueue or Epoll
@@ -1362,6 +1377,10 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
pollset_del_at((cs), (locked), __FUNCTION__, __LINE__)
/* Forward declare */
+static timer_event_t *get_timer_event(apr_time_t timeout,
+ ap_mpm_callback_fn_t *cbfn, void *baton,
+ int insert,
+ apr_array_header_t *pfds);
static void process_lingering_close(event_conn_state_t *cs);
static event_conn_state_t *make_conn_state(apr_pool_t *p, apr_socket_t *csd)
@@ -1547,16 +1566,32 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
}
if (cs->pub.state == CONN_STATE_ASYNC_WAITIO) {
+ apr_interval_time_t timeout;
+ struct timeout_queue *q = NULL;
+ timer_event_t *te = NULL;
+
/* Set a read/write timeout for this connection, and let the
* event thread poll for read/writeability.
*/
ap_update_child_status(cs->sbh, SERVER_BUSY_READ, NULL);
notify_suspend(cs);
- /* Modules might set c->cs->sense to CONN_SENSE_WANT_WRITE,
- * the default is CONN_SENSE_WANT_READ still.
+ /* If the connection timeout is actually different than the waitio_q's,
+ * use a timer event to honor it (e.g. mod_reqtimeout may enforce its
+ * own timeouts per request stage).
*/
- if (pollset_add(cs, CONN_SENSE_WANT_READ, cs->sc->io_q)) {
+ timeout = ap_get_connection_timeout(c, cs->sc->s);
+ if (timeout >= 0 && timeout != cs->sc->io_q->timeout) {
+ /* Prevent the timer from firing before the pollset is updated */
+ if (timeout < TIMERS_FUDGE_TIMEOUT) {
+ timeout = TIMERS_FUDGE_TIMEOUT;
+ }
+ te = get_timer_event(timeout, NULL, cs, 1, NULL);
+ }
+ else {
+ q = cs->sc->io_q;
+ }
+ if (!pollset_add(cs, CONN_SENSE_WANT_READ, q, te)) {
apr_table_setn(cs->c->notes, "short-lingering-close", "1");
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
@@ -1583,7 +1618,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
/* Let the event thread poll for write */
notify_suspend(cs);
cs->pub.sense = CONN_SENSE_DEFAULT;
- if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q)) {
+ if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q, NULL)) {
return; /* queued */
}
/* Fall through lingering close */
@@ -1620,7 +1655,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
*/
notify_suspend(cs);
- if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->ka_sc->ka_q)) {
+ if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->ka_sc->ka_q, NULL)) {
apr_table_setn(cs->c->notes, "short-lingering-close", "1");
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
@@ -1661,7 +1696,7 @@ static apr_status_t event_resume_suspended (conn_rec *c)
cs->pub.sense = CONN_SENSE_DEFAULT;
if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
cs->pub.state = CONN_STATE_WRITE_COMPLETION;
- if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q)) {
+ if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q, NULL)) {
return APR_SUCCESS; /* queued */
}
@@ -1884,8 +1919,7 @@ static int timer_comp(void *a, void *b)
static apr_thread_mutex_t *g_timer_skiplist_mtx;
static timer_event_t *get_timer_event(apr_time_t timeout,
- ap_mpm_callback_fn_t *cbfn,
- void *baton,
+ ap_mpm_callback_fn_t *cbfn, void *baton,
int insert,
apr_array_header_t *pfds)
{
@@ -1909,6 +1943,7 @@ static timer_event_t *get_timer_event(apr_time_t timeout,
te->cbfunc = cbfn;
te->baton = baton;
te->when = now + timeout;
+ te->timeout = timeout;
te->pfds = pfds;
if (insert) {
@@ -2141,7 +2176,7 @@ static void process_lingering_close(event_conn_state_t *cs)
struct timeout_queue *q;
/* (Re)queue the connection to come back when readable */
q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
- if (pollset_add(cs, CONN_SENSE_WANT_READ, q)) {
+ if (pollset_add(cs, CONN_SENSE_WANT_READ, q, NULL)) {
return; /* queued */
}
}
@@ -2195,7 +2230,6 @@ static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
break;
}
- TO_QUEUE_REMOVE(qp, cs);
if (!pollset_del(cs, 1)) {
kill_connection(cs, APR_EGENERAL);
continue;
@@ -2353,8 +2387,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
apr_thread_mutex_lock(g_timer_skiplist_mtx);
while ((te = apr_skiplist_peek(timer_skiplist))) {
if (te->when > now) {
- timers_next_expiry = te->when;
- timeout = te->when - now;
break;
}
apr_skiplist_pop(timer_skiplist, NULL);
@@ -2364,6 +2396,17 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
continue;
}
+ if (!te->cbfunc) {
+ cs = te->baton;
+ put_timer_event(te, 1);
+ ap_assert(cs && cs->te == te);
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "timed out connection %" CS_FMT, CS_ARG(cs));
+ (void)pollset_del(cs, 0);
+ kill_connection(cs, APR_TIMEUP);
+ continue;
+ }
+
if (te->pfds) {
/* remove all sockets from the pollset */
apr_pool_cleanup_run(te->pfds->pool, te->pfds,
@@ -4417,6 +4460,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
for (; s; s = s->next) {
event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
ap_set_module_config(s->module_config, &mpm_event_module, sc);
+ sc->s = s; /* backref */
sc->io_q = TO_QUEUE_CHAIN(pconf, "waitio", s->timeout,
&waitio_q, io_h, ptemp);
diff --git a/server/mpm_fdqueue.h b/server/mpm_fdqueue.h
index 0dd558b938a..260e22ab80e 100644
--- a/server/mpm_fdqueue.h
+++ b/server/mpm_fdqueue.h
@@ -70,6 +70,7 @@ struct timer_event_t
void *baton;
int canceled;
apr_array_header_t *pfds;
+ apr_interval_time_t timeout;
};
typedef struct timer_event_t timer_event_t;
From ae9a3b90f96939e38f2f92a73e34180b83e41c8a Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 3 Jun 2024 16:42:51 +0200
Subject: [PATCH 10/22] mpm_fdqueue: Allow to queue any events (socket, timer,
opaque), and use that for mpm_event's backlog queue.
---
include/scoreboard.h | 1 +
modules/lua/lua_request.c | 4 +
server/mpm/event/event.c | 891 +++++++++++++++++++------------------
server/mpm/worker/worker.c | 9 +-
server/mpm_fdqueue.c | 580 +++++++++++++++---------
server/mpm_fdqueue.h | 92 ++--
6 files changed, 884 insertions(+), 693 deletions(-)
diff --git a/include/scoreboard.h b/include/scoreboard.h
index 25d19f03538..e83e52fdb16 100644
--- a/include/scoreboard.h
+++ b/include/scoreboard.h
@@ -149,6 +149,7 @@ struct process_score {
apr_uint32_t keep_alive; /* async connections in keep alive */
apr_uint32_t suspended; /* connections suspended by some module */
apr_uint32_t wait_io; /* async connections waiting an IO in the MPM */
+ apr_uint32_t backlog; /* async connections waiting for a worker */
};
/* Scoreboard is now in 'local' memory, since it isn't updated once created,
diff --git a/modules/lua/lua_request.c b/modules/lua/lua_request.c
index 6787bbfaf7f..5fa3a968c6b 100644
--- a/modules/lua/lua_request.c
+++ b/modules/lua/lua_request.c
@@ -1248,6 +1248,10 @@ static int lua_ap_scoreboard_process(lua_State *L)
lua_pushnumber(L, ps_record->connections);
lua_settable(L, -3);
+ lua_pushstring(L, "backlog");
+ lua_pushnumber(L, ps_record->backlog);
+ lua_settable(L, -3);
+
lua_pushstring(L, "keepalive");
lua_pushnumber(L, ps_record->keep_alive);
lua_settable(L, -3);
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 8ea061140c3..5a9f4b676b4 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -203,6 +203,7 @@ static volatile int workers_may_exit = 0;
static volatile int start_thread_may_exit = 0;
static volatile int listener_may_exit = 0;
static apr_uint32_t connection_count = 0; /* Number of open connections */
+static apr_uint32_t timers_count = 0; /* Number of queued timers */
static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
@@ -236,6 +237,14 @@ static apr_thread_mutex_t *timeout_mutex;
* XXX: cases.
*/
static apr_pollset_t *event_pollset;
+#define POLLSET_RESERVE_SIZE 10000
+
+struct backlog_timer_event {
+ timer_event_t te;
+ ap_queue_event_t qe;
+};
+#define te_qe(te) (&((struct backlog_timer_event *)(te))->qe)
+#define te_in_backlog(te) (te_qe(te)->cb != NULL)
typedef struct event_conn_state_t event_conn_state_t;
struct event_conn_state_t {
@@ -273,8 +282,12 @@ struct event_conn_state_t {
/*
* when queued to workers
*/
- /** chaining in defer_linger_chain */
- struct event_conn_state_t *chain;
+ /** the backlog event for this entry */
+ struct backlog_socket_event {
+ sock_event_t se;
+ ap_queue_event_t qe;
+ struct timeout_queue *q;
+ } bse;
/*
* bools as bits
@@ -290,6 +303,9 @@ struct event_conn_state_t {
/** Has ap_start_lingering_close() been called? */
linger_started :1;
};
+#define cs_se(cs) (&(cs)->bse.se)
+#define cs_qe(cs) (&(cs)->bse.qe)
+#define cs_in_backlog(cs) (cs_qe(cs)->cb != NULL)
static APR_INLINE apr_socket_t *cs_sd(event_conn_state_t *cs)
{
@@ -336,12 +352,6 @@ static APR_INLINE const char *cs_state_str(event_conn_state_t *cs)
#define CS_FMT_TO CS_FMT " to [%pI]"
#define CS_ARG_TO(cs) CS_ARG(cs), cs_raddr(cs)
-/*
- * The chain of connections to be shutdown by a worker thread (deferred),
- * linked list updated atomically.
- */
-static event_conn_state_t *volatile defer_linger_chain;
-
#define USE_CLOCK_COARSE 0 /* not for now */
#if HAVE_CLOCK_GETTIME && defined(CLOCK_MONOTONIC) /* POSIX */
static clockid_t event_clockid;
@@ -447,14 +457,15 @@ struct timeout_queue {
* keepalive_q uses vhost's KeepAliveTimeOut
* linger_q uses MAX_SECS_TO_LINGER
* short_linger_q uses SECONDS_TO_LINGER
+ * backlog_q uses vhost's TimeOut
*/
static struct timeout_queue *waitio_q, /* wait for I/O to happen */
*write_completion_q, /* completion or user async poll */
*keepalive_q, /* in between requests */
*linger_q, /* lingering (read) before close */
- *short_linger_q; /* lingering (read) before close (short timeout) */
-
-static volatile apr_time_t queues_next_expiry; /* next expiry time accross all queues */
+ *short_linger_q, /* lingering (read) before close (short timeout) */
+ *backlog_q; /* waiting for a worker */
+static volatile apr_time_t queues_next_expiry; /* next expiry time accross all queues */
/*
* Macros for accessing struct timeout_queue.
@@ -584,7 +595,6 @@ typedef struct socket_callback_baton
apr_array_header_t *pfds;
timer_event_t *cancel_event; /* If a timeout was requested, a pointer to the timer event */
struct socket_callback_baton *next;
- unsigned int signaled :1;
} socket_callback_baton_t;
typedef struct event_child_bucket {
@@ -647,7 +657,8 @@ struct event_srv_cfg_s {
/* Per server timeout queues */
struct timeout_queue *io_q,
*wc_q,
- *ka_q;
+ *ka_q,
+ *bl_q;
server_rec *s; /* backref */
};
@@ -696,25 +707,34 @@ static int ap_child_slot; /* Current child process slot in scoreboard */
*/
static apr_socket_t **worker_sockets;
-static volatile apr_uint32_t listensocks_disabled;
+/* Disabling / enabling listening sockets can only happen in the listener
+ * thread, which is the only one to set 'dying' to 1 too, so it's all thread
+ * safe. 'listensocks_off' is changed atomically still because it's read
+ * concurrently in listensocks_disabled().
+ */
+static /*atomic*/ apr_uint32_t listensocks_off = 0;
-static void disable_listensocks(void)
+static int disable_listensocks(void)
{
int i;
- if (apr_atomic_cas32(&listensocks_disabled, 1, 0) != 0) {
- return;
+
+ if (apr_atomic_cas32(&listensocks_off, 1, 0) != 0) {
+ return 0;
}
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(10381)
- "Suspend listening sockets: idlers:%i conns:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u suspended:%u",
- ap_queue_info_num_idlers(worker_queue_info),
+ "Suspend listening sockets: idlers:%i conns:%u backlog:%u "
+ "waitio:%u write:%u keepalive:%u linger:%u/%u "
+ "timers:%u suspended:%u",
+ ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
+ apr_atomic_read32(backlog_q->total),
apr_atomic_read32(waitio_q->total),
apr_atomic_read32(write_completion_q->total),
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
+ apr_atomic_read32(&timers_count),
apr_atomic_read32(&suspended_count));
ap_scoreboard_image->parent[ap_child_slot].not_accepting = 1;
@@ -722,26 +742,31 @@ static void disable_listensocks(void)
for (i = 0; i < num_listensocks; i++) {
apr_pollset_remove(event_pollset, &listener_pollfd[i]);
}
+ return 1;
}
-static void enable_listensocks(void)
+static int enable_listensocks(void)
{
int i;
+
if (listener_may_exit
- || apr_atomic_cas32(&listensocks_disabled, 0, 1) != 1) {
- return;
+ || apr_atomic_cas32(&listensocks_off, 0, 1) != 1) {
+ return 0;
}
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(00457)
- "Resume listening sockets: idlers:%i conns:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u suspended:%u",
- ap_queue_info_num_idlers(worker_queue_info),
+ "Resume listening sockets: idlers:%i conns:%u backlog:%u "
+ "waitio:%u write:%u keepalive:%u linger:%u/%u "
+ "timers:%u suspended:%u",
+ ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
+ apr_atomic_read32(backlog_q->total),
apr_atomic_read32(waitio_q->total),
apr_atomic_read32(write_completion_q->total),
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
+ apr_atomic_read32(&timers_count),
apr_atomic_read32(&suspended_count));
/*
@@ -753,23 +778,24 @@ static void enable_listensocks(void)
for (i = 0; i < num_listensocks; i++) {
apr_pollset_add(event_pollset, &listener_pollfd[i]);
}
+ return 1;
}
-static APR_INLINE apr_uint32_t listeners_disabled(void)
+static APR_INLINE int listensocks_disabled(void)
{
- return apr_atomic_read32(&listensocks_disabled);
+ return apr_atomic_read32(&listensocks_off) != 0;
}
static APR_INLINE int connections_above_limit(int *busy)
{
- apr_uint32_t i_count = ap_queue_info_num_idlers(worker_queue_info);
+ apr_int32_t i_count = ap_queue_info_idlers_count(worker_queue_info);
if (i_count > 0) {
apr_uint32_t c_count = apr_atomic_read32(&connection_count);
apr_uint32_t l_count = apr_atomic_read32(&lingering_count);
if (c_count <= l_count
- /* Off by 'listeners_disabled()' to avoid flip flop */
+ /* Off by 'listensocks_disabled()' to avoid flip flop */
|| c_count - l_count < (apr_uint32_t)threads_per_child +
- (i_count - listeners_disabled()) *
+ (i_count - listensocks_disabled()) *
(worker_factor / WORKER_FACTOR_SCALE)) {
return 0;
}
@@ -782,7 +808,7 @@ static APR_INLINE int connections_above_limit(int *busy)
static APR_INLINE int should_enable_listensocks(void)
{
- return !dying && listeners_disabled() && !connections_above_limit(NULL);
+ return !dying && listensocks_disabled() && !connections_above_limit(NULL);
}
static void close_socket_at(apr_socket_t *csd,
@@ -1101,36 +1127,6 @@ static void notify_resume(event_conn_state_t *cs, int cleanup)
ap_run_resume_connection(cs->c, cs->r);
}
-/*
- * Defer flush and close of the connection by adding it to defer_linger_chain,
- * for a worker to grab it and do the job (should that be blocking).
- * Pre-condition: nonblocking, can be called from anywhere provided cs is not
- * in any timeout queue or in the pollset.
- */
-static int defer_lingering_close(event_conn_state_t *cs)
-{
- ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "deferring close for connection %" CS_FMT, CS_ARG(cs));
-
- /* The connection is not shutdown() yet strictly speaking, but it's not
- * in any queue nor handled by a worker either (will be very soon), so
- * to account for it somewhere we bump lingering_count now (and set
- * deferred_linger for process_lingering_close() to know).
- */
- cs->pub.state = CONN_STATE_LINGER;
- apr_atomic_inc32(&lingering_count);
- cs->deferred_linger = 1;
- for (;;) {
- event_conn_state_t *chain = cs->chain = defer_linger_chain;
- if (apr_atomic_casptr((void *)&defer_linger_chain, cs,
- chain) != chain) {
- /* Race lost, try again */
- continue;
- }
- return 1;
- }
-}
-
/* Close the connection and release its resources (ptrans), either because an
* unrecoverable error occured (queues or pollset add/remove) or more usually
* if lingering close timed out.
@@ -1178,23 +1174,53 @@ static void kill_connection_at(event_conn_state_t *cs, apr_status_t status,
/* forward declare */
static void set_conn_state_sense(event_conn_state_t *cs, int sense);
+static void push2worker(event_conn_state_t *cs, timer_event_t *te,
+ apr_time_t now, int *busy);
/* Shutdown the connection in case of timeout, error or resources shortage.
* This starts short lingering close if not already there, or directly closes
* the connection otherwise.
* Pre-condition: nonblocking, can be called from anywhere provided cs is not
- * in any timeout queue or in the pollset.
+ * in the pollset nor any non-backlog timeout queue.
*/
-static int shutdown_connection(event_conn_state_t *cs)
+static void shutdown_connection(event_conn_state_t *cs, apr_time_t now,
+ int in_backlog)
{
- if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
- defer_lingering_close(cs);
+ ap_assert(!cs->q && !cs->te);
+
+ if (cs->c) {
+ int log_level = APLOG_INFO;
+ switch (cs->pub.state) {
+ case CONN_STATE_LINGER:
+ case CONN_STATE_LINGER_NORMAL:
+ case CONN_STATE_LINGER_SHORT:
+ case CONN_STATE_KEEPALIVE:
+ log_level = APLOG_TRACE2;
+ default:
+ break;
+ }
+ ap_log_cerror(APLOG_MARK, log_level, 0, cs->c, APLOGNO(10380)
+ "shutting down %s connection in %s",
+ in_backlog ? "backlog" : "timed out",
+ cs_state_str(cs));
+
+ /* Don't re-schedule connections in lingering close, they had
+ * their chance already so just close them now.
+ */
+ if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
+ apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ cs->pub.state = CONN_STATE_LINGER;
+ push2worker(cs, NULL, now, NULL);
+ }
+ else {
+ close_connection(cs);
+ }
}
else {
- close_connection(cs);
+ /* Never been scheduled/processed, kill it. */
+ ap_assert(in_backlog);
+ kill_connection(cs, APR_EBUSY);
}
- return 1;
}
/*
@@ -1388,10 +1414,13 @@ static event_conn_state_t *make_conn_state(apr_pool_t *p, apr_socket_t *csd)
event_conn_state_t *cs = apr_pcalloc(p, sizeof(*cs));
listener_poll_type *pt;
- cs->p = p;
- cs->pfd.desc.s = csd;
cs->pfd.desc_type = APR_POLL_SOCKET;
+ cs->pfd.desc.s = cs_se(cs)->sd = csd;
cs->pfd.client_data = pt = apr_pcalloc(p, sizeof(*pt));
+ cs_qe(cs)->cb_baton = cs_se(cs)->baton = cs;
+ cs_qe(cs)->type = AP_QUEUE_EVENT_SOCK;
+ cs_qe(cs)->data.se = cs_se(cs);
+ cs->p = cs_se(cs)->p = p;
pt->type = PT_CSD;
pt->baton = cs;
@@ -1814,85 +1843,128 @@ static void init_serf(apr_pool_t *p)
}
#endif
-static apr_status_t push_timer2worker(timer_event_t* te)
+/* A backlog connection is both in the worker_queue (for a worker to pull
+ * it ASAP) and in the backlog_q (for the listener to enforce a timeout).
+ * The worker_queue can do the queuing on both queues for us, that is
+ * consistently and safely push/pop to/from both queues under its lock,
+ * thanks to a callback called when an event is pushed and popped.
+ */
+static void conn_state_backlog_cb(void *baton, int pushed)
{
- return ap_queue_push_timer(worker_queue, te);
+ event_conn_state_t *cs = baton;
+
+ if (pushed) {
+ TO_QUEUE_APPEND(cs->sc->bl_q, cs);
+ }
+ else { /* popped */
+ TO_QUEUE_REMOVE(cs->sc->bl_q, cs);
+
+ /* not in backlog anymore */
+ cs_qe(cs)->cb = NULL;
+ }
}
-/*
- * Pre-condition: cs is neither in event_pollset nor a timeout queue
- * this function may only be called by the listener
- */
-static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
- apr_pool_t *ptrans)
+static void timer_event_backlog_cb(void *baton, int pushed)
{
- apr_status_t rc;
+ timer_event_t *te = baton;
+ ap_assert(te && te_qe(te));
- if (cs) {
- ptrans = cs->p;
- csd = cs_sd(cs);
+ if (pushed) {
+ apr_atomic_inc32(&timers_count);
}
+ else { /* popped */
+ apr_atomic_dec32(&timers_count);
- rc = ap_queue_push_socket(worker_queue, csd, cs, ptrans);
- if (rc != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
- "push2worker: ap_queue_push_socket failed");
- /* trash the connection; we couldn't queue the connected
- * socket to a worker
- */
- if (cs) {
- kill_connection(cs, rc);
- }
- else {
- if (csd) {
- close_socket(csd);
- }
- if (ptrans) {
- ap_queue_info_push_pool(worker_queue_info, ptrans);
- }
- }
- signal_threads(ST_GRACEFUL);
+ /* not in backlog anymore */
+ te_qe(te)->cb = NULL;
}
-
- return rc;
}
-/* get_worker:
- * If *have_idle_worker_p == 0, reserve a worker thread, and set
- * *have_idle_worker_p = 1.
- * If *have_idle_worker_p is already 1, will do nothing.
- * If blocking == 1, block if all workers are currently busy.
- * If no worker was available immediately, will set *all_busy to 1.
- * XXX: If there are no workers, we should not block immediately but
- * XXX: close all keep-alive connections first.
+/*
+ * Pre-condition: cs is neither in event_pollset nor a queue
+ * this function may only be called by the listener
*/
-static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
+static void push2worker(event_conn_state_t *cs, timer_event_t *te,
+ apr_time_t now, int *above_limit)
{
+ ap_queue_event_t *qe;
apr_status_t rc;
+ int busy;
+
+ ap_assert((cs != NULL) ^ (te != NULL));
- if (*have_idle_worker_p) {
- /* already reserved a worker thread - must have hit a
- * transient error on a previous pass
+ busy = (ap_queue_info_idlers_dec(worker_queue_info) < 0);
+ if (busy) {
+ /* Might need to kindle the fire by not accepting new connections until
+ * the situation settles down. The listener and new idling workers will
+ * test for should_enable_listensocks() to recover (when suitable).
*/
- return;
+ if (connections_above_limit(NULL)) {
+ disable_listensocks();
+ if (above_limit) {
+ *above_limit = 1;
+ }
+ }
}
- if (blocking)
- rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
- else
- rc = ap_queue_info_try_get_idler(worker_queue_info);
+ if (te) {
+ ap_assert(!te_in_backlog(te));
- if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
- *have_idle_worker_p = 1;
- }
- else if (!blocking && rc == APR_EAGAIN) {
- *all_busy = 1;
+ qe = te_qe(te);
+ qe->cb = timer_event_backlog_cb;
}
else {
- ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
- "ap_queue_info_wait_for_idler failed. "
- "Attempting to shutdown process gracefully");
- signal_threads(ST_GRACEFUL);
+ ap_assert(!cs_in_backlog(cs));
+ ap_assert(!cs->q);
+
+ if (busy && cs->pub.state == CONN_STATE_LINGER && cs->linger_started) {
+ /* Not worth lingering more on this connection if we are short of
+ * workers and everything is flushed+shutdown already, back out
+ * and close.
+ */
+ ap_queue_info_idlers_inc(worker_queue_info);
+ close_connection(cs);
+ return;
+ }
+
+ if (cs->c) {
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "pushing connection %" CS_FMT,
+ CS_ARG(cs));
+ }
+ else {
+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
+ "pushing connection %" CS_FMT_TO,
+ CS_ARG_TO(cs));
+ }
+
+ qe = cs_qe(cs);
+ qe->cb = conn_state_backlog_cb;
+ }
+
+ rc = ap_queue_push_event(worker_queue, qe);
+ if (rc != APR_SUCCESS) {
+ int mode = ST_GRACEFUL;
+
+ ap_queue_info_idlers_inc(worker_queue_info);
+
+ ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
+ "push2worker: queuing %s failed", cs ? "socket" : "timer");
+
+ if (cs) {
+ /* Can't go anywhere, kill (and log). */
+ kill_connection(cs, rc);
+ }
+ else {
+ /* Can't call te->cbfunc() and potentially block there, someone is
+ * going to miss this event thus never release their connection(s),
+ * graceful stop could never complete.
+ */
+ mode = ST_UNGRACEFUL;
+ }
+
+ AP_DEBUG_ASSERT(0);
+ signal_threads(mode);
}
}
@@ -1935,8 +2007,13 @@ static timer_event_t *get_timer_event(apr_time_t timeout,
APR_RING_REMOVE(te, link);
}
else {
- te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
- memset(te, 0, sizeof(*te));
+ struct backlog_timer_event *bte;
+ /* invariant: (te == &bte->te) => (te_qe(te) == &bte->qe) */
+ bte = apr_skiplist_alloc(timer_skiplist, sizeof(*bte));
+ memset(bte, 0, sizeof(*bte));
+ bte->qe.type = AP_QUEUE_EVENT_TIMER;
+ bte->qe.data.te = bte->qe.cb_baton = &bte->te;
+ te = &bte->te;
}
APR_RING_ELEM_INIT(te, link);
@@ -2123,14 +2200,11 @@ static void process_lingering_close(event_conn_state_t *cs)
CS_ARG(cs));
AP_DEBUG_ASSERT(CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state));
+ /* Flush and shutdown first */
if (!cs->linger_started) {
+ cs->linger_started = 1; /* once! */
+ apr_atomic_inc32(&lingering_count);
cs->pub.state = CONN_STATE_LINGER;
- cs->linger_started = 1;
-
- /* defer_lingering_close() may have bumped lingering_count already */
- if (!cs->deferred_linger) {
- apr_atomic_inc32(&lingering_count);
- }
apr_socket_timeout_set(csd, apr_time_from_sec(SECONDS_TO_LINGER));
if (ap_start_lingering_close(cs->c)) {
@@ -2157,24 +2231,17 @@ static void process_lingering_close(event_conn_state_t *cs)
cs->pub.state = CONN_STATE_LINGER_NORMAL;
}
cs->pub.sense = CONN_SENSE_DEFAULT;
-
- /* One timestamp/duration for the whole lingering close time.
- * XXX: This makes the (short_)linger_q not sorted/ordered by expiring
- * timeouts whenever multiple schedules are necessary (EAGAIN below),
- * but we probabaly don't care since these connections do not count
- * for connections_above_limit() and all of them will be killed when
- * busy or gracefully stopping anyway.
- */
- cs->queue_timestamp = apr_time_now();
}
+ /* Drain until EAGAIN or EOF/error, in the former case requeue and
+ * come back when readable again, otherwise the connection is over.
+ */
do {
apr_size_t nbytes = sizeof(dummybuf);
rv = apr_socket_recv(csd, dummybuf, &nbytes);
} while (rv == APR_SUCCESS);
if (APR_STATUS_IS_EAGAIN(rv)) {
struct timeout_queue *q;
- /* (Re)queue the connection to come back when readable */
q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
if (pollset_add(cs, CONN_SENSE_WANT_READ, q, NULL)) {
return; /* queued */
@@ -2183,108 +2250,92 @@ static void process_lingering_close(event_conn_state_t *cs)
close_connection(cs);
}
-/* call 'func' for all elements of 'q' above 'expiry'.
+/* Call shutdown_connection() for the elements of 'q' that timed out, or
+ * for all if 'shrink' is set.
* Pre-condition: timeout_mutex must already be locked
- * Post-condition: timeout_mutex will be locked again
*/
-static void process_timeout_queue(struct timeout_queue *q, apr_time_t expiry,
- int (*func)(event_conn_state_t *))
+static unsigned int process_timeout_queue_ex(struct timeout_queue *queue,
+ apr_time_t now,
+ int shrink)
{
- apr_uint32_t total = 0, count;
- event_conn_state_t *first, *cs, *last;
- struct event_conn_state_t trash;
- struct timeout_queue *qp;
+ unsigned int count = 0;
+ struct timeout_queue *q;
- if (!*q->total) {
- return;
+ if (!*queue->total) {
+ return 0;
}
- APR_RING_INIT(&trash.timeout_list, event_conn_state_t, timeout_list);
- for (qp = q; qp; qp = qp->next) {
- count = 0;
- cs = first = last = APR_RING_FIRST(&qp->head);
- while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
- timeout_list)) {
- /* Trash the entry if:
- * - no expiry was given (zero means all), or
- * - it expired (according to the queue timeout), or
- * - the system clock skewed in the past: no entry should be
- * registered above the given expiry (~now) + the queue
- * timeout, we won't keep any here (eg. for centuries).
- *
- * Otherwise stop, no following entry will match thanks to the
- * single timeout per queue (entries are added to the end!).
- * This allows maintenance in O(1).
- */
- if (expiry && cs->queue_timestamp + qp->timeout > expiry
- && cs->queue_timestamp < expiry + qp->timeout) {
- /* Since this is the next expiring entry of this queue, update
- * the global queues_next_expiry if it's later than this one.
+ for (q = queue; q; q = q->next) {
+ while (!APR_RING_EMPTY(&q->head, event_conn_state_t, timeout_list)) {
+ event_conn_state_t *cs = APR_RING_FIRST(&q->head);
+
+ ap_assert(cs->q == q);
+
+ if (!shrink) {
+ /* Stop if this entry did not expire, no following one will
+ * thanks to the single timeout per queue (latest entries are
+ * added to the tail).
*/
- apr_time_t elem_expiry = cs->queue_timestamp + qp->timeout;
- apr_time_t next_expiry = queues_next_expiry;
- if (!next_expiry
+ apr_time_t elem_expiry = cs->queue_timestamp + q->timeout;
+ if (elem_expiry > now) {
+ /* This is the next expiring entry of this queue, update
+ * the global queues_next_expiry if it expires after
+ * this one.
+ */
+ apr_time_t next_expiry = queues_next_expiry;
+ if (!next_expiry
|| next_expiry > elem_expiry + QUEUES_FUDGE_TIMEOUT) {
- queues_next_expiry = elem_expiry;
+ queues_next_expiry = elem_expiry;
+ }
+ break;
}
- break;
}
- if (!pollset_del(cs, 1)) {
- kill_connection(cs, APR_EGENERAL);
- continue;
+ if (cs_in_backlog(cs)) {
+ /* Remove the backlog connection from worker_queue (note that
+ * the lock is held by the listener already when maintaining
+ * the backlog_q), and unreserve/set a worker/idler since
+ * none could handle the event.
+ */
+ ap_assert(cs_qe(cs)->cb_baton == cs);
+ ap_assert(cs->q == cs->sc->bl_q);
+ ap_queue_info_idlers_inc(worker_queue_info);
+ ap_queue_kill_event_locked(worker_queue, cs_qe(cs));
+ shutdown_connection(cs, now, 1);
}
-
- if (cs == first) {
- APR_RING_INSERT_HEAD(&qp->head, cs, event_conn_state_t,
- timeout_list);
+ else if (pollset_del(cs, 1)) {
+ /* Removed from the pollset and timeout queue. */
+ shutdown_connection(cs, now, 0);
}
else {
- APR_RING_INSERT_AFTER(last, cs, timeout_list);
+ /* Can't go anywhere, kill (and log). */
+ kill_connection(cs, APR_EGENERAL);
}
- ++*qp->total;
- ++qp->count;
- last = cs;
- cs = APR_RING_NEXT(cs, timeout_list);
count++;
}
- if (!count)
- continue;
-
- APR_RING_UNSPLICE(first, last, timeout_list);
- APR_RING_SPLICE_TAIL(&trash.timeout_list, first, last, event_conn_state_t,
- timeout_list);
- AP_DEBUG_ASSERT(*q->total >= count && qp->count >= count);
- *q->total -= count;
- qp->count -= count;
- total += count;
}
- if (!total)
- return;
- apr_thread_mutex_unlock(timeout_mutex);
- first = APR_RING_FIRST(&trash.timeout_list);
- do {
- cs = APR_RING_NEXT(first, timeout_list);
- APR_RING_ELEM_INIT(cs, timeout_list);
- func(first);
- first = cs;
- } while (--total);
- apr_thread_mutex_lock(timeout_mutex);
+ return count;
}
-static void process_keepalive_queue(apr_time_t expiry)
+static APR_INLINE void process_timeout_queue(struct timeout_queue *queue,
+ apr_time_t now)
{
- /* If all workers are busy, we kill older keep-alive connections so
- * that they may connect to another process.
- */
- if (!expiry && *keepalive_q->total) {
+ (void)process_timeout_queue_ex(queue, now, 0);
+}
+
+/* When all workers are busy or dying, kill'em all \m/ */
+static APR_INLINE void shrink_timeout_queue(struct timeout_queue *queue,
+ apr_time_t now)
+{
+ unsigned int count = process_timeout_queue_ex(queue, now, 1);
+ if (count) {
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
- "All workers are busy or dying, will shutdown %u "
- "keep-alive connections", *keepalive_q->total);
+ "All workers are %s, %s queue shrinked (%u done, %u left)",
+ dying ? "dying" : "busy", queue->name,
+ count, apr_atomic_read32(queue->total));
}
- process_timeout_queue(keepalive_q, expiry, shutdown_connection);
}
static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
@@ -2293,7 +2344,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
proc_info *ti = dummy;
int process_slot = ti->pslot;
process_score *ps = ap_get_scoreboard_process(process_slot);
- int have_idle_worker = 0;
apr_time_t last_log;
last_log = event_time_now();
@@ -2316,7 +2366,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
int workers_were_busy = 0;
socket_callback_baton_t *user_chain;
const apr_pollfd_t *out_pfd;
- apr_time_t now;
+ apr_time_t now, poll_time;
event_conn_state_t *cs;
timer_event_t *te;
@@ -2325,6 +2375,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
check_infinite_requests();
}
+ now = poll_time = event_time_now();
+
if (listener_may_exit) {
int once = !dying;
if (once) {
@@ -2332,7 +2384,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
if (terminate_mode == ST_UNGRACEFUL
- || apr_atomic_read32(&connection_count) == 0)
+ || (apr_atomic_read32(&connection_count) == 0
+ && apr_atomic_read32(&timers_count) == 0))
break;
if (once) {
@@ -2345,7 +2398,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
if (APLOGtrace6(ap_server_conf)) {
- now = event_time_now();
/* trace log status every second */
if (now - last_log > apr_time_from_sec(1)) {
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
@@ -2376,7 +2428,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
* up occurs, otherwise periodic checks (maintenance, shutdown, ...)
* must be performed.
*/
- now = event_time_now();
timeout = -1;
/* Push expired timers to a worker, the first remaining one (if any)
@@ -2401,7 +2452,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
put_timer_event(te, 1);
ap_assert(cs && cs->te == te);
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "timed out connection %" CS_FMT, CS_ARG(cs));
+ "timed out connection %" CS_FMT,
+ CS_ARG(cs));
(void)pollset_del(cs, 0);
kill_connection(cs, APR_TIMEUP);
continue;
@@ -2412,7 +2464,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
apr_pool_cleanup_run(te->pfds->pool, te->pfds,
event_cleanup_poll_callback);
}
- push_timer2worker(te);
+ push2worker(NULL, te, now, &workers_were_busy);
}
if (te) {
next_expiry = te->when;
@@ -2453,13 +2505,14 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
- "pollset: wait for timeout=%" APR_TIME_T_FMT
+ "pollset: wait timeout=%" APR_TIME_T_FMT
" queues_timeout=%" APR_TIME_T_FMT
" timers_timeout=%" APR_TIME_T_FMT
- " conns=%d exit=%d/%d",
+ " listen=%s conns=%d exit=%d/%d",
timeout,
- queues_next_expiry ? queues_next_expiry - now : -1,
- timers_next_expiry ? timers_next_expiry - now : -1,
+ queues_next_expiry ? queues_next_expiry - now : 0,
+ timers_next_expiry ? timers_next_expiry - now : 0,
+ listensocks_disabled() ? "no" : "yes",
apr_atomic_read32(&connection_count),
listener_may_exit, dying);
@@ -2476,34 +2529,36 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
num = 0;
}
- if (APLOGtrace7(ap_server_conf)) {
- apr_time_t old_now = now;
- now = event_time_now();
-
- ap_log_error(APLOG_MARK, APLOG_TRACE7, rc, ap_server_conf,
- "pollset: have #%i time=%" APR_TIME_T_FMT "/%" APR_TIME_T_FMT
- " queues_timeout=%" APR_TIME_T_FMT
- " timers_timeout=%" APR_TIME_T_FMT
- " conns=%d exit=%d/%d",
- (int)num, now - old_now, timeout,
- queues_next_expiry ? queues_next_expiry - now : -1,
- timers_next_expiry ? timers_next_expiry - now : -1,
- apr_atomic_read32(&connection_count),
- listener_may_exit, dying);
- }
-
- /* XXX possible optimization: stash the current time for use as
- * r->request_time for new requests or queues maintenance
+ /* Update "now" after polling and use it for everything below (all
+ * non-(indefinitely-)blocking code). "now - poll_time" is then the
+ * time passed in poll().
+ *
+ * XXX possible optimization: stash this time for use as
+ * r->request_time for new requests.
*/
+ now = event_time_now();
+
+ ap_log_error(APLOG_MARK, APLOG_TRACE7, rc, ap_server_conf,
+ "pollset: have num=%i"
+ " elapsed=%" APR_TIME_T_FMT "/%" APR_TIME_T_FMT
+ " queues_timeout=%" APR_TIME_T_FMT
+ " timers_timeout=%" APR_TIME_T_FMT
+ " listen=%s conns=%d exit=%d/%d",
+ (int)num, now - poll_time, timeout,
+ queues_next_expiry ? queues_next_expiry - now : 0,
+ timers_next_expiry ? timers_next_expiry - now : 0,
+ listensocks_disabled() ? "no" : "yes",
+ apr_atomic_read32(&connection_count),
+ listener_may_exit, dying);
for (user_chain = NULL; num > 0; --num, ++out_pfd) {
listener_poll_type *pt = out_pfd->client_data;
+ socket_callback_baton_t *baton;
- if (pt->type == PT_CSD) {
- /* one of the sockets is readable */
- int blocking = 1;
-
- cs = (event_conn_state_t *) pt->baton;
+ switch (pt->type) {
+ case PT_CSD:
+ /* one of the sockets is ready */
+ cs = (event_conn_state_t *)pt->baton;
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
"polled connection %" CS_FMT,
CS_ARG(cs));
@@ -2513,12 +2568,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
case CONN_STATE_ASYNC_WAITIO:
cs->pub.state = CONN_STATE_PROCESSING;
case CONN_STATE_WRITE_COMPLETION:
- break;
-
case CONN_STATE_LINGER_NORMAL:
case CONN_STATE_LINGER_SHORT:
- /* don't wait for a worker for lingering close processing. */
- blocking = 0;
break;
default:
@@ -2529,53 +2580,29 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ap_assert(0);
}
- if (!pollset_del(cs, 0)) {
+ if (pollset_del(cs, 0)) {
+ push2worker(cs, NULL, now, &workers_were_busy);
+ }
+ else {
/* Can't go anywhere, kill (and log) and next. */
kill_connection(cs, APR_EGENERAL);
- continue;
}
+ break;
- {
- /* If we don't get a worker immediately (nonblocking), we
- * close the connection; the client can re-connect to a
- * different process for keepalive, and for lingering close
- * the connection will be shutdown so the choice is to favor
- * incoming/alive connections.
- */
- get_worker(&have_idle_worker, blocking,
- &workers_were_busy);
- if (!have_idle_worker) {
- shutdown_connection(cs);
- }
- else if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
- have_idle_worker = 0;
- }
- }
- }
- else if (pt->type == PT_ACCEPT && !listeners_disabled()) {
+ case PT_ACCEPT:
/* A Listener Socket is ready for an accept() */
if (workers_were_busy) {
- disable_listensocks();
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
- APLOGNO(03268)
- "All workers busy, not accepting new conns "
- "in this process");
- }
- else if (connections_above_limit(&workers_were_busy)) {
- disable_listensocks();
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
- APLOGNO(03269)
- "Too many open connections (%u, idlers %u), "
- "not accepting new conns in this process",
- apr_atomic_read32(&connection_count),
- ap_queue_info_num_idlers(worker_queue_info));
+ /* Listeners disabled for now, keep the new connection in
+ * the socket backlog until listening again.
+ */
+ continue;
}
- else if (!listener_may_exit) {
+ if (!dying) {
void *csd = NULL;
ap_listen_rec *lr = (ap_listen_rec *) pt->baton;
apr_pool_t *ptrans; /* Pool for per-transaction stuff */
- ap_queue_info_pop_pool(worker_queue_info, &ptrans);
+ ptrans = ap_queue_info_pop_pool(worker_queue_info);
if (ptrans == NULL) {
/* create a new transaction pool for each accepted socket */
apr_allocator_t *allocator = NULL;
@@ -2604,25 +2631,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
}
- get_worker(&have_idle_worker, 1, &workers_were_busy);
rc = lr->accept_func(&csd, lr, ptrans);
-
- /* later we trash rv and rely on csd to indicate
- * success/failure
- */
- AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
-
- if (rc == APR_EGENERAL) {
- /* E[NM]FILE, ENOMEM, etc */
- resource_shortage = 1;
- signal_threads(ST_GRACEFUL);
- }
- else if (ap_accept_error_is_nonfatal(rc)) {
- ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, ap_server_conf,
- "accept() on client socket failed");
- }
-
- if (csd != NULL) {
+ if (rc == APR_SUCCESS) {
conns_this_child--;
/* Create and account for the connection from here, or
@@ -2630,40 +2640,45 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
* would consider it does not exist and could exit the
* child too early.
*/
+ ap_assert(csd != NULL);
cs = make_conn_state(ptrans, csd);
- if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
- have_idle_worker = 0;
- }
+ push2worker(cs, NULL, now, &workers_were_busy);
}
else {
+ if (rc == APR_EGENERAL) {
+ /* E[NM]FILE, ENOMEM, etc */
+ resource_shortage = 1;
+ signal_threads(ST_GRACEFUL);
+ }
+ else if (ap_accept_error_is_nonfatal(rc)) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, ap_server_conf,
+ "accept() on client socket failed");
+ }
ap_queue_info_push_pool(worker_queue_info, ptrans);
}
}
- } /* if:else on pt->type */
+ break;
+
#if HAVE_SERF
- else if (pt->type == PT_SERF) {
+ case PT_SERF:
/* send socket to serf. */
- /* XXXX: this doesn't require get_worker() */
+ /* XXXX: this doesn't require a worker thread */
serf_event_trigger(g_serf, pt->baton, out_pfd);
- }
-
+ break;
#endif
- else if (pt->type == PT_USER) {
- socket_callback_baton_t *baton = pt->baton;
- if (baton->cancel_event) {
- baton->cancel_event->canceled = 1;
- }
- /* We only signal once per N sockets with this baton,
- * and after this loop to avoid any race/lifetime issue
- * with the user callback being called while we handle
- * the same baton multiple times here.
+ case PT_USER:
+ /* Multiple pfds of the same baton might trigger in this pass
+ * so chain once here and run the cleanup only after this loop
+ * to avoid lifetime issues (i.e. pfds->pool cleared while some
+ * of its pfd->client_data are still to be dereferenced here).
*/
- if (!baton->signaled) {
- baton->signaled = 1;
+ baton = pt->baton;
+ if (baton != user_chain && !baton->next) {
baton->next = user_chain;
user_chain = baton;
}
+ break;
}
} /* for processing poll */
@@ -2673,6 +2688,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
user_chain = user_chain->next;
baton->next = NULL;
+ /* Not expirable anymore */
+ if (baton->cancel_event) {
+ baton->cancel_event->canceled = 1;
+ baton->cancel_event = NULL;
+ }
+
/* remove all sockets from the pollset */
apr_pool_cleanup_run(baton->pfds->pool, baton->pfds,
event_cleanup_poll_callback);
@@ -2683,7 +2704,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
baton->user_baton,
0, /* don't insert it */
NULL /* no associated socket callback */);
- push_timer2worker(te);
+ push2worker(NULL, te, now, &workers_were_busy);
}
/* We process the timeout queues here only when the global
@@ -2692,10 +2713,13 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
* while latest ones are only taken into account here (in listener)
* during queues' processing, with the lock held. This works both
* with and without wake-ability.
+ * Even if "now" drifted a bit since it was fetched and the real
+ * "now" went below "expiry" in the meantime, the next poll() will
+ * return immediately so the maintenance will happen then.
*/
next_expiry = queues_next_expiry;
+ if (next_expiry && next_expiry <= now) {
do_maintenance:
- if (next_expiry && next_expiry <= (now = event_time_now())) {
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
"queues maintenance: expired=%" APR_TIME_T_FMT,
next_expiry > 0 ? now - next_expiry : -1);
@@ -2705,29 +2729,39 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* Recompute this by walking the timeout queues (under the lock) */
queues_next_expiry = 0;
- /* Step 1: keepalive queue timeouts are closed */
+ /* Step 1: keepalive queue timeouts */
if (workers_were_busy || dying) {
- process_keepalive_queue(0); /* kill'em all \m/ */
+ shrink_timeout_queue(keepalive_q, now);
}
else {
- process_keepalive_queue(now);
+ process_timeout_queue(keepalive_q, now);
}
- /* Step 2: waitio queue timeouts are flushed */
- process_timeout_queue(waitio_q, now, defer_lingering_close);
+ /* Step 2: waitio queue timeouts */
+ process_timeout_queue(waitio_q, now);
- /* Step 3: write completion queue timeouts are flushed */
- process_timeout_queue(write_completion_q, now, defer_lingering_close);
+ /* Step 3: write completion queue timeouts */
+ process_timeout_queue(write_completion_q, now);
- /* Step 4: normal lingering close queue timeouts are closed */
+ /* Step 4: normal lingering close queue timeouts */
if (dying && linger_q->timeout > short_linger_q->timeout) {
/* Dying, force short timeout for normal lingering close */
linger_q->timeout = short_linger_q->timeout;
}
- process_timeout_queue(linger_q, now, shutdown_connection);
+ process_timeout_queue(linger_q, now);
- /* Step 5: short lingering close queue timeouts are closed */
- process_timeout_queue(short_linger_q, now, shutdown_connection);
+ /* Step 5: short lingering close queue timeouts */
+ process_timeout_queue(short_linger_q, now);
+
+ /* Step 6: backlog queue timeouts
+ * Connections in backlog race with the workers (dequeuing) under
+ * the worker_queue mutex.
+ */
+ if (apr_atomic_read32(backlog_q->total)) {
+ ap_queue_lock(worker_queue);
+ process_timeout_queue(backlog_q, now);
+ ap_queue_unlock(worker_queue);
+ }
next_expiry = queues_next_expiry;
apr_thread_mutex_unlock(timeout_mutex);
@@ -2740,34 +2774,17 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ps->write_completion = apr_atomic_read32(write_completion_q->total);
ps->keep_alive = apr_atomic_read32(keepalive_q->total);
ps->lingering_close = apr_atomic_read32(&lingering_count);
+ ps->backlog = apr_atomic_read32(backlog_q->total);
ps->suspended = apr_atomic_read32(&suspended_count);
ps->connections = apr_atomic_read32(&connection_count);
}
else if ((workers_were_busy || dying)
&& apr_atomic_read32(keepalive_q->total)) {
apr_thread_mutex_lock(timeout_mutex);
- process_keepalive_queue(0); /* kill'em all \m/ */
+ shrink_timeout_queue(keepalive_q, now);
apr_thread_mutex_unlock(timeout_mutex);
ps->keep_alive = 0;
}
-
- /* If there are some lingering closes to defer (to a worker), schedule
- * them now. We might wakeup a worker spuriously if another one empties
- * defer_linger_chain in the meantime, but there also may be no active
- * or all busy workers for an undefined time. In any case a deferred
- * lingering close can't starve if we do that here since the chain is
- * filled only above in the listener and it's emptied only in the
- * worker(s); thus a NULL here means it will stay so while the listener
- * waits (possibly indefinitely) in poll().
- */
- if (defer_linger_chain) {
- get_worker(&have_idle_worker, 0, &workers_were_busy);
- if (have_idle_worker
- && defer_linger_chain /* re-test */
- && push2worker(NULL, NULL, NULL) == APR_SUCCESS) {
- have_idle_worker = 0;
- }
- }
} /* listener main loop */
ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
@@ -2822,8 +2839,8 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
int process_slot = ti->pslot;
int thread_slot = ti->tslot;
worker_score *ws = &ap_scoreboard_image->servers[process_slot][thread_slot];
+ int is_idler = 0;
apr_status_t rv;
- int is_idle = 0;
free(ti);
@@ -2834,26 +2851,14 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
SERVER_STARTING, NULL);
for (;;) {
- apr_socket_t *csd = NULL;
- event_conn_state_t *cs = NULL;
- timer_event_t *te = NULL;
- apr_pool_t *ptrans; /* Pool for per-transaction stuff */
+ ap_queue_event_t *qe;
- if (!is_idle) {
- rv = ap_queue_info_set_idle(worker_queue_info, NULL);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
- APLOGNO(03270)
- "ap_queue_info_set_idle failed. Attempting to "
- "shutdown process gracefully.");
- signal_threads(ST_GRACEFUL);
- break;
- }
+ if (!is_idler) {
+ int idlers = ap_queue_info_idlers_inc(worker_queue_info);
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
"worker thread %i/%i idle (idlers %i)",
- thread_slot, threads_per_child,
- ap_queue_info_num_idlers(worker_queue_info));
- is_idle = 1;
+ thread_slot, threads_per_child, idlers);
+ is_idler = 1;
/* If the listening sockets are paused and this new idler switches
* connections_above_limit() back, let the listener know and poll
@@ -2879,9 +2884,7 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
break;
}
- rv = ap_queue_pop_something(worker_queue, &csd, (void **)&cs,
- &ptrans, &te);
-
+ rv = ap_queue_pop_event(worker_queue, &qe);
if (rv != APR_SUCCESS) {
/* We get APR_EOF during a graceful shutdown once all the
* connections accepted by this server process have been handled.
@@ -2893,12 +2896,12 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
break;
}
- /* We get APR_EINTR whenever ap_queue_pop_*() has been interrupted
- * from an explicit call to ap_queue_interrupt_all(). This allows
- * us to unblock threads stuck in ap_queue_pop_*() when a shutdown
- * is pending.
+ /* We get APR_EINTR whenever ap_queue_pop_event() has been
+ * interrupted from an explicit call to ap_queue_interrupt_*().
+ * This allows us to unblock threads stuck in ap_queue_pop_event()
+ * when a shutdown is pending.
*
- * If workers_may_exit is set and this is ungraceful termination/
+ * If workers_may_exit is set and this is ungraceful stop or
* restart, we are bound to get an error on some systems (e.g.,
* AIX, which sanity-checks mutex operations) since the queue
* may have already been cleaned up. Don't log the "error" if
@@ -2906,59 +2909,60 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
*/
if (!APR_STATUS_IS_EINTR(rv) && !workers_may_exit) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
- APLOGNO(03099) "ap_queue_pop_something failed");
+ APLOGNO(03099) "ap_queue_pop_event failed");
AP_DEBUG_ASSERT(0);
signal_threads(ST_GRACEFUL);
}
continue;
}
+ is_idler = 0; /* event consumed */
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
"worker thread %i/%i busy (idlers %i)",
thread_slot, threads_per_child,
- ap_queue_info_num_idlers(worker_queue_info));
+ ap_queue_info_idlers_count(worker_queue_info));
+
+ if (qe->type == AP_QUEUE_EVENT_SOCK) {
+ apr_pool_t *p;
+ apr_socket_t *csd;
+ event_conn_state_t *cs;
+
+ ap_assert(qe->data.se);
+ p = qe->data.se->p;
+ csd = qe->data.se->sd;
+ cs = qe->data.se->baton;
+ ap_assert(p && csd && cs && qe == cs_qe(cs));
+
+ worker_sockets[thread_slot] = csd;
+ process_socket(thd, p, csd, cs, process_slot, thread_slot);
+ worker_sockets[thread_slot] = NULL;
+ }
+ else if (qe->type == AP_QUEUE_EVENT_TIMER) {
+ timer_event_t *te;
+ ap_mpm_callback_fn_t *cbfunc;
+ void *baton;
+
+ te = qe->data.te;
+ ap_assert(te && qe == te_qe(te));
+
+ cbfunc = te->cbfunc;
+ baton = te->baton;
- if (te != NULL) {
- void *baton = te->baton;
- ap_mpm_callback_fn_t *cbfunc = te->cbfunc;
/* first recycle the timer event */
put_timer_event(te, 0);
+
+ ap_update_child_status_from_indexes(process_slot, thread_slot,
+ SERVER_BUSY_WRITE, NULL);
+ ap_assert(cbfunc != NULL);
cbfunc(baton);
}
else {
- is_idle = 0; /* consumed */
- if (csd != NULL) {
- worker_sockets[thread_slot] = csd;
- process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
- worker_sockets[thread_slot] = NULL;
- }
- }
-
- /* If there are deferred lingering closes, handle them now. */
- while (!workers_may_exit) {
- cs = defer_linger_chain;
- if (!cs) {
- break;
- }
- if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
- cs) != cs) {
- /* Race lost, try again */
- continue;
- }
- cs->chain = NULL;
- AP_DEBUG_ASSERT(cs->pub.state == CONN_STATE_LINGER);
-
- ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "deferred close for connection %" CS_FMT, CS_ARG(cs));
-
- worker_sockets[thread_slot] = csd = cs_sd(cs);
- process_socket(thd, cs->p, csd, cs, process_slot, thread_slot);
- worker_sockets[thread_slot] = NULL;
+ ap_assert(0);
}
}
- if (is_idle) {
+ if (is_idler) {
/* Not idling anymore */
- ap_queue_info_wait_for_idler(worker_queue_info, NULL);
+ ap_queue_info_idlers_dec(worker_queue_info);
}
ap_update_child_status_from_indexes(process_slot, thread_slot,
@@ -3011,10 +3015,10 @@ static void setup_threads_runtime(void)
APR_POLLSET_KQUEUE,
APR_POLLSET_EPOLL };
/* XXX: K-A or lingering close connection included in the async factor */
- const unsigned int threads_factor = worker_factor / WORKER_FACTOR_SCALE;
- const apr_size_t pollset_size = ((unsigned int)num_listensocks +
- (unsigned int)threads_per_child *
- (threads_factor > 2 ? threads_factor : 2));
+ unsigned int async_factor = (worker_factor < WORKER_FACTOR_SCALE * 2
+ ? WORKER_FACTOR_SCALE * 2 : worker_factor);
+ unsigned int async_threads = (threads_per_child * async_factor / WORKER_FACTOR_SCALE);
+ const apr_size_t pollset_size = (num_listensocks + async_threads + POLLSET_RESERVE_SIZE);
int pollset_flags;
/* Event's skiplist operations will happen concurrently with other modules'
@@ -3046,8 +3050,8 @@ static void setup_threads_runtime(void)
apr_pool_tag(pruntime, "mpm_runtime");
/* We must create the fd queues before we start up the listener
- * and worker threads. */
- rv = ap_queue_create(&worker_queue, threads_per_child, pruntime);
+ * and worker threads, it's bounded by connections_above_limit(). */
+ rv = ap_queue_create(&worker_queue, -1, pruntime);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03100)
"ap_queue_create() failed");
@@ -3061,8 +3065,7 @@ static void setup_threads_runtime(void)
*/
max_recycled_pools = threads_per_child * 3 / 4 ;
}
- rv = ap_queue_info_create(&worker_queue_info, pruntime,
- threads_per_child, max_recycled_pools);
+ rv = ap_queue_info_create(&worker_queue_info, pruntime, max_recycled_pools);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03101)
"ap_queue_info_create() failed");
@@ -3639,6 +3642,7 @@ static void perform_idle_server_maintenance(void)
int max_daemon_used = 0;
int idle_thread_count = 0;
int active_thread_count = 0;
+ int backlog_count = 0;
int i, j;
for (i = 0; i < server_limit; ++i) {
@@ -3682,6 +3686,7 @@ static void perform_idle_server_maintenance(void)
}
}
active_thread_count += child_threads_active;
+ backlog_count += apr_atomic_read32(&ps->backlog);
if (child_threads_active == threads_per_child) {
had_healthy_child = 1;
}
@@ -3855,10 +3860,10 @@ static void perform_idle_server_maintenance(void)
retained->max_daemon_used = max_daemon_used;
if (APLOGdebug(ap_server_conf)) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
- "score: idlers:%d, "
+ "score: idlers:%d backlog:%d, "
"threads active:%d/%d max:%d, "
"daemons active:%d/%d max:%d used:%d/%d/%d",
- idle_thread_count,
+ idle_thread_count, backlog_count,
active_thread_count, retained->active_daemons * threads_per_child,
max_workers, retained->active_daemons, retained->total_daemons,
active_daemons_limit, max_daemon_used, retained->max_daemon_used,
@@ -4425,14 +4430,12 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
active_daemons_limit = server_limit;
threads_per_child = DEFAULT_THREADS_PER_CHILD;
max_workers = active_daemons_limit * threads_per_child;
- defer_linger_chain = NULL;
had_healthy_child = 0;
ap_extended_status = 0;
event_pollset = NULL;
worker_queue_info = NULL;
listener_os_thread = NULL;
- listensocks_disabled = 0;
listener_is_wakeable = 0;
return OK;
@@ -4441,7 +4444,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
apr_pool_t *ptemp, server_rec *s)
{
- apr_hash_t *io_h, *wc_h, *ka_h;
+ apr_hash_t *io_h, *wc_h, *ka_h, *bl_h;
/* Not needed in pre_config stage */
if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
@@ -4451,6 +4454,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
io_h = apr_hash_make(ptemp);
wc_h = apr_hash_make(ptemp);
ka_h = apr_hash_make(ptemp);
+ bl_h = apr_hash_make(ptemp);
linger_q = TO_QUEUE_MAKE(pconf, "linger",
apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
@@ -4470,6 +4474,9 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
sc->ka_q = TO_QUEUE_CHAIN(pconf, "keepalive", s->keep_alive_timeout,
&keepalive_q, ka_h, ptemp);
+
+ sc->bl_q = TO_QUEUE_CHAIN(pconf, "backlog", s->timeout,
+ &backlog_q, bl_h, ptemp);
}
return OK;
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c
index 42b81a8ed1b..1fff5b085e6 100644
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -583,7 +583,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
if (listener_may_exit) break;
if (!have_idle_worker) {
- rv = ap_queue_info_wait_for_idler(worker_queue_info, NULL);
+ rv = ap_queue_info_wait_for_idler(worker_queue_info);
if (APR_STATUS_IS_EOF(rv)) {
break; /* we've been signaled to die now */
}
@@ -662,7 +662,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
if (!listener_may_exit) {
/* the following pops a recycled ptrans pool off a stack */
- ap_queue_info_pop_pool(worker_queue_info, &ptrans);
+ ptrans = ap_queue_info_pop_pool(worker_queue_info);
if (ptrans == NULL) {
/* we can't use a recycled transaction pool this time.
* create a new transaction pool */
@@ -696,7 +696,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
accept_mutex_error("unlock", rv, process_slot);
}
if (csd != NULL) {
- rv = ap_queue_push_socket(worker_queue, csd, NULL, ptrans);
+ rv = ap_queue_push_socket(worker_queue, csd, ptrans);
if (rv) {
/* trash the connection; we couldn't queue the connected
* socket to a worker
@@ -901,8 +901,7 @@ static void setup_threads_runtime(void)
clean_child_exit(APEXIT_CHILDFATAL);
}
- rv = ap_queue_info_create(&worker_queue_info, pruntime,
- threads_per_child, -1);
+ rv = ap_queue_info_create(&worker_queue_info, pruntime, -1);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03141)
"ap_queue_info_create() failed");
diff --git a/server/mpm_fdqueue.c b/server/mpm_fdqueue.c
index 3697ca722f6..7871597d910 100644
--- a/server/mpm_fdqueue.c
+++ b/server/mpm_fdqueue.c
@@ -20,7 +20,23 @@
#include
-static const apr_uint32_t zero_pt = APR_UINT32_MAX/2;
+#define ZERO_PT (APR_UINT32_MAX / 2)
+
+APR_RING_HEAD(fd_queue_ring, fd_queue_elem_t);
+
+struct fd_queue_t
+{
+ struct fd_queue_ring elts;
+ apr_uint32_t nelts;
+ apr_uint32_t bounds;
+ apr_pool_t *spare_pool;
+ fd_queue_elem_t *spare_elems;
+ apr_thread_mutex_t *one_big_mutex;
+ apr_thread_cond_t *not_empty;
+ apr_uint32_t num_waiters;
+ apr_uint32_t interrupted;
+ apr_uint32_t terminated;
+};
struct recycled_pool
{
@@ -30,59 +46,43 @@ struct recycled_pool
struct fd_queue_info_t
{
- apr_uint32_t volatile idlers; /**
- * >= zero_pt: number of idle worker threads
- * < zero_pt: number of threads blocked,
- * waiting for an idle worker
- */
+ apr_uint32_t volatile idlers; /* >= ZERO_PT: number of idle worker threads
+ * < ZERO_PT: number of events in backlog
+ * (waiting for an idle thread) */
apr_thread_mutex_t *idlers_mutex;
apr_thread_cond_t *wait_for_idler;
- int terminated;
- int max_idlers;
- int max_recycled_pools;
- apr_uint32_t recycled_pools_count;
+ apr_uint32_t max_idlers;
+ apr_uint32_t terminated;
struct recycled_pool *volatile recycled_pools;
+ apr_uint32_t recycled_pools_count;
+ apr_uint32_t max_recycled_pools;
};
struct fd_queue_elem_t
{
- apr_socket_t *sd;
- void *sd_baton;
- apr_pool_t *p;
+ APR_RING_ENTRY(fd_queue_elem_t) link; /* in ring */
+ struct fd_queue_elem_t *next; /* in spare list */
+ sock_event_t self_sock_event;
+ ap_queue_event_t self_event;
+ ap_queue_event_t *event;
};
-static apr_status_t queue_info_cleanup(void *data_)
+static apr_status_t queue_info_cleanup(void *qi)
{
- fd_queue_info_t *qi = data_;
- apr_thread_cond_destroy(qi->wait_for_idler);
- apr_thread_mutex_destroy(qi->idlers_mutex);
-
- /* Clean up any pools in the recycled list */
- for (;;) {
- struct recycled_pool *first_pool = qi->recycled_pools;
- if (first_pool == NULL) {
- break;
- }
- if (apr_atomic_casptr((void *)&qi->recycled_pools, first_pool->next,
- first_pool) == first_pool) {
- apr_pool_destroy(first_pool->pool);
- }
- }
-
+ /* Clean up all pools in the recycled list */
+ ap_queue_info_free_idle_pools(qi);
return APR_SUCCESS;
}
-apr_status_t ap_queue_info_create(fd_queue_info_t **queue_info,
- apr_pool_t *pool, int max_idlers,
- int max_recycled_pools)
+AP_DECLARE(apr_status_t) ap_queue_info_create(fd_queue_info_t **queue_info,
+ apr_pool_t *pool, int max_recycled_pools)
{
apr_status_t rv;
fd_queue_info_t *qi;
qi = apr_pcalloc(pool, sizeof(*qi));
- rv = apr_thread_mutex_create(&qi->idlers_mutex, APR_THREAD_MUTEX_DEFAULT,
- pool);
+ rv = apr_thread_mutex_create(&qi->idlers_mutex, APR_THREAD_MUTEX_DEFAULT, pool);
if (rv != APR_SUCCESS) {
return rv;
}
@@ -90,27 +90,30 @@ apr_status_t ap_queue_info_create(fd_queue_info_t **queue_info,
if (rv != APR_SUCCESS) {
return rv;
}
- qi->recycled_pools = NULL;
- qi->max_recycled_pools = max_recycled_pools;
- qi->max_idlers = max_idlers;
- qi->idlers = zero_pt;
+ qi->idlers = ZERO_PT;
+ if (max_recycled_pools >= 0) {
+ qi->max_recycled_pools = max_recycled_pools;
+ }
+ else {
+ qi->max_recycled_pools = APR_INT32_MAX;
+ }
+
apr_pool_cleanup_register(pool, qi, queue_info_cleanup,
apr_pool_cleanup_null);
*queue_info = qi;
-
return APR_SUCCESS;
}
-apr_status_t ap_queue_info_set_idle(fd_queue_info_t *queue_info,
- apr_pool_t *pool_to_recycle)
+AP_DECLARE(apr_status_t) ap_queue_info_set_idle(fd_queue_info_t *queue_info,
+ apr_pool_t *pool_to_recycle)
{
apr_status_t rv;
ap_queue_info_push_pool(queue_info, pool_to_recycle);
/* If other threads are waiting on a worker, wake one up */
- if (apr_atomic_inc32(&queue_info->idlers) < zero_pt) {
+ if (apr_atomic_inc32(&queue_info->idlers) < ZERO_PT) {
rv = apr_thread_mutex_lock(queue_info->idlers_mutex);
if (rv != APR_SUCCESS) {
AP_DEBUG_ASSERT(0);
@@ -130,23 +133,25 @@ apr_status_t ap_queue_info_set_idle(fd_queue_info_t *queue_info,
return APR_SUCCESS;
}
-apr_status_t ap_queue_info_try_get_idler(fd_queue_info_t *queue_info)
+AP_DECLARE(apr_status_t) ap_queue_info_try_get_idler(fd_queue_info_t *queue_info)
{
/* Don't block if there isn't any idle worker. */
+ apr_uint32_t idlers = queue_info->idlers, val;
for (;;) {
- apr_uint32_t idlers = queue_info->idlers;
- if (idlers <= zero_pt) {
+ if (idlers <= ZERO_PT) {
return APR_EAGAIN;
}
- if (apr_atomic_cas32(&queue_info->idlers, idlers - 1,
- idlers) == idlers) {
+
+ val = apr_atomic_cas32(&queue_info->idlers, idlers - 1, idlers);
+ if (val == idlers) {
return APR_SUCCESS;
}
+
+ idlers = val;
}
}
-apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
- int *had_to_block)
+AP_DECLARE(apr_status_t) ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info)
{
apr_status_t rv;
@@ -154,7 +159,7 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
* apr_atomic_add32(x, -1) does the same as dec32(x), except
* that it returns the previous value (unlike dec32's bool).
*/
- if (apr_atomic_add32(&queue_info->idlers, -1) <= zero_pt) {
+ if (apr_atomic_add32(&queue_info->idlers, -1) <= ZERO_PT) {
rv = apr_thread_mutex_lock(queue_info->idlers_mutex);
if (rv != APR_SUCCESS) {
AP_DEBUG_ASSERT(0);
@@ -177,13 +182,14 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
* now non-negative, it's safe for this function to
* return immediately.
*
- * A "negative value" (relative to zero_pt) in
+ * A "negative value" (relative to ZERO_PT) in
* queue_info->idlers tells how many
* threads are waiting on an idle worker.
*/
- if (queue_info->idlers < zero_pt) {
- if (had_to_block) {
- *had_to_block = 1;
+ if (apr_atomic_read32(&queue_info->idlers) < ZERO_PT) {
+ if (queue_info->terminated) {
+ apr_thread_mutex_unlock(queue_info->idlers_mutex);
+ return APR_EOF;
}
rv = apr_thread_cond_wait(queue_info->wait_for_idler,
queue_info->idlers_mutex);
@@ -199,7 +205,7 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
}
}
- if (queue_info->terminated) {
+ if (apr_atomic_read32(&queue_info->terminated)) {
return APR_EOF;
}
else {
@@ -207,52 +213,75 @@ apr_status_t ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
}
}
-apr_uint32_t ap_queue_info_num_idlers(fd_queue_info_t *queue_info)
+AP_DECLARE(apr_uint32_t) ap_queue_info_num_idlers(fd_queue_info_t *queue_info)
{
- apr_uint32_t val;
- val = apr_atomic_read32(&queue_info->idlers);
- return (val > zero_pt) ? val - zero_pt : 0;
+ apr_uint32_t val = apr_atomic_read32(&queue_info->idlers);
+ return (val > ZERO_PT) ? val - ZERO_PT : 0;
}
-void ap_queue_info_push_pool(fd_queue_info_t *queue_info,
- apr_pool_t *pool_to_recycle)
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_count(fd_queue_info_t *queue_info)
{
- struct recycled_pool *new_recycle;
+ return apr_atomic_read32(&queue_info->idlers) - ZERO_PT;
+}
+
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_inc(fd_queue_info_t *queue_info)
+{
+ /* apr_atomic_add32() returns the previous value, we return the new one */
+ return apr_atomic_add32(&queue_info->idlers, +1) + 1 - ZERO_PT;
+}
+
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_dec(fd_queue_info_t *queue_info)
+{
+ /* apr_atomic_add32() returns the previous value, we return the new one */
+ return apr_atomic_add32(&queue_info->idlers, -1) - 1 - ZERO_PT;
+}
+
+AP_DECLARE(void) ap_queue_info_push_pool(fd_queue_info_t *queue_info,
+ apr_pool_t *pool_to_recycle)
+{
+ struct recycled_pool *new_recycle, *first_pool, *val;
+ apr_uint32_t count;
+
/* If we have been given a pool to recycle, atomically link
* it into the queue_info's list of recycled pools
*/
if (!pool_to_recycle)
return;
- if (queue_info->max_recycled_pools >= 0) {
- apr_uint32_t n = apr_atomic_read32(&queue_info->recycled_pools_count);
- if (n >= queue_info->max_recycled_pools) {
- apr_pool_destroy(pool_to_recycle);
- return;
- }
- apr_atomic_inc32(&queue_info->recycled_pools_count);
+ /* The counting is racy but we don't mind recycling a few more/less pools,
+ * it's lighter than a compare & swap loop or an inc + dec to back out.
+ */
+ count = apr_atomic_read32(&queue_info->recycled_pools_count);
+ if (count >= queue_info->max_recycled_pools) {
+ apr_pool_destroy(pool_to_recycle);
+ return;
}
+ apr_atomic_inc32(&queue_info->recycled_pools_count);
apr_pool_clear(pool_to_recycle);
new_recycle = apr_palloc(pool_to_recycle, sizeof *new_recycle);
new_recycle->pool = pool_to_recycle;
+
+ first_pool = queue_info->recycled_pools;
for (;;) {
- /*
- * Save queue_info->recycled_pool in local variable next because
- * new_recycle->next can be changed after apr_atomic_casptr
- * function call. For gory details see PR 44402.
+ new_recycle->next = first_pool;
+ val = apr_atomic_casptr((void *)&queue_info->recycled_pools,
+ new_recycle, first_pool);
+ /* Don't compare with new_recycle->next because it can change
+ * after apr_atomic_casptr(). For gory details see PR 44402.
*/
- struct recycled_pool *next = queue_info->recycled_pools;
- new_recycle->next = next;
- if (apr_atomic_casptr((void *)&queue_info->recycled_pools,
- new_recycle, next) == next)
- break;
+ if (val == first_pool) {
+ return;
+ }
+
+ first_pool = val;
}
}
-void ap_queue_info_pop_pool(fd_queue_info_t *queue_info,
- apr_pool_t **recycled_pool)
+AP_DECLARE(apr_pool_t *) ap_queue_info_pop_pool(fd_queue_info_t *queue_info)
{
+ struct recycled_pool *first_pool, *val;
+
/* Atomically pop a pool from the recycled list */
/* This function is safe only as long as it is single threaded because
@@ -262,41 +291,43 @@ void ap_queue_info_pop_pool(fd_queue_info_t *queue_info,
* happen concurrently with a single cas-based pop.
*/
- *recycled_pool = NULL;
-
-
- /* Atomically pop a pool from the recycled list */
+ first_pool = queue_info->recycled_pools;
for (;;) {
- struct recycled_pool *first_pool = queue_info->recycled_pools;
if (first_pool == NULL) {
- break;
+ return NULL;
}
- if (apr_atomic_casptr((void *)&queue_info->recycled_pools,
- first_pool->next, first_pool) == first_pool) {
- *recycled_pool = first_pool->pool;
- if (queue_info->max_recycled_pools >= 0)
- apr_atomic_dec32(&queue_info->recycled_pools_count);
- break;
+
+ val = apr_atomic_casptr((void *)&queue_info->recycled_pools,
+ first_pool->next, first_pool);
+ if (val == first_pool) {
+ apr_atomic_dec32(&queue_info->recycled_pools_count);
+ return first_pool->pool;
}
+
+ first_pool = val;
}
}
-void ap_queue_info_free_idle_pools(fd_queue_info_t *queue_info)
+AP_DECLARE(void) ap_queue_info_free_idle_pools(fd_queue_info_t *queue_info)
{
apr_pool_t *p;
- queue_info->max_recycled_pools = 0;
+ /* Atomically free the recycled list */
+
+ /* Per ap_queue_info_pop_pool() should not be called concurrently, but
+ * it's only from the listener thread for now.
+ */
+
for (;;) {
- ap_queue_info_pop_pool(queue_info, &p);
+ p = ap_queue_info_pop_pool(queue_info);
if (p == NULL)
- break;
+ return;
apr_pool_destroy(p);
}
- apr_atomic_set32(&queue_info->recycled_pools_count, 0);
}
-apr_status_t ap_queue_info_term(fd_queue_info_t *queue_info)
+AP_DECLARE(apr_status_t) ap_queue_info_term(fd_queue_info_t *queue_info)
{
apr_status_t rv;
@@ -305,47 +336,35 @@ apr_status_t ap_queue_info_term(fd_queue_info_t *queue_info)
return rv;
}
- queue_info->terminated = 1;
+ apr_atomic_set32(&queue_info->terminated, 1);
apr_thread_cond_broadcast(queue_info->wait_for_idler);
return apr_thread_mutex_unlock(queue_info->idlers_mutex);
}
-/**
+/*
+ * Lock/unlock the fd_queue_t.
+ */
+#define queue_lock(q) apr_thread_mutex_lock((q)->one_big_mutex)
+#define queue_unlock(q) apr_thread_mutex_unlock((q)->one_big_mutex)
+
+/*
* Detects when the fd_queue_t is full. This utility function is expected
* to be called from within critical sections, and is not threadsafe.
*/
-#define ap_queue_full(queue) ((queue)->nelts == (queue)->bounds)
+#define queue_full(q) ((q)->nelts == (q)->bounds)
-/**
+/*
* Detects when the fd_queue_t is empty. This utility function is expected
* to be called from within critical sections, and is not threadsafe.
*/
-#define ap_queue_empty(queue) ((queue)->nelts == 0 && \
- APR_RING_EMPTY(&queue->timers, \
- timer_event_t, link))
+#define queue_empty(q) ((q)->nelts == 0)
-/**
- * Callback routine that is called to destroy this
- * fd_queue_t when its pool is destroyed.
- */
-static apr_status_t ap_queue_destroy(void *data)
-{
- fd_queue_t *queue = data;
-
- /* Ignore errors here, we can't do anything about them anyway.
- * XXX: We should at least try to signal an error here, it is
- * indicative of a programmer error. -aaron */
- apr_thread_cond_destroy(queue->not_empty);
- apr_thread_mutex_destroy(queue->one_big_mutex);
-
- return APR_SUCCESS;
-}
-
-/**
+/*
* Initialize the fd_queue_t.
*/
-apr_status_t ap_queue_create(fd_queue_t **pqueue, int capacity, apr_pool_t *p)
+AP_DECLARE(apr_status_t) ap_queue_create(fd_queue_t **pqueue, int capacity,
+ apr_pool_t *p)
{
apr_status_t rv;
fd_queue_t *queue;
@@ -361,143 +380,264 @@ apr_status_t ap_queue_create(fd_queue_t **pqueue, int capacity, apr_pool_t *p)
return rv;
}
- APR_RING_INIT(&queue->timers, timer_event_t, link);
-
- queue->data = apr_pcalloc(p, capacity * sizeof(fd_queue_elem_t));
- queue->bounds = capacity;
+ apr_pool_create(&queue->spare_pool, p);
+ APR_RING_INIT(&queue->elts, fd_queue_elem_t, link);
+ if (capacity > 0) {
+ queue->bounds = capacity;
+ }
+ else {
+ queue->bounds = APR_UINT32_MAX;
+ }
- apr_pool_cleanup_register(p, queue, ap_queue_destroy,
- apr_pool_cleanup_null);
*pqueue = queue;
-
return APR_SUCCESS;
}
-/**
- * Push a new socket onto the queue.
- *
- * precondition: ap_queue_info_wait_for_idler has already been called
- * to reserve an idle worker thread
- */
-apr_status_t ap_queue_push_socket(fd_queue_t *queue,
- apr_socket_t *sd, void *sd_baton,
- apr_pool_t *p)
+static APR_INLINE fd_queue_elem_t *get_spare_elem(fd_queue_t *queue)
+{
+ fd_queue_elem_t *elem = queue->spare_elems;
+ if (elem == NULL) {
+ elem = apr_pcalloc(queue->spare_pool, sizeof(*elem));
+ }
+ else {
+ queue->spare_elems = elem->next;
+ elem->next = NULL;
+ }
+ return elem;
+}
+
+static APR_INLINE void put_spare_elem(fd_queue_t *queue, fd_queue_elem_t *elem)
+{
+ elem->event = NULL;
+ elem->next = queue->spare_elems;
+ queue->spare_elems = elem;
+}
+
+static APR_INLINE void enqueue_elem(fd_queue_t *queue, fd_queue_elem_t *elem,
+ ap_queue_event_t *event)
+{
+ if (event) {
+ elem->event = event;
+ }
+ else {
+ elem->event = &elem->self_event;
+ }
+ elem->event->elem = elem;
+
+ APR_RING_INSERT_TAIL(&queue->elts, elem, fd_queue_elem_t, link);
+ queue->nelts++;
+}
+
+static APR_INLINE void dequeue_elem(fd_queue_t *queue, fd_queue_elem_t *elem)
+{
+ elem->event->elem = NULL;
+ ap_assert(queue->nelts > 0);
+ APR_RING_REMOVE(elem, link);
+ APR_RING_ELEM_INIT(elem, link);
+ queue->nelts--;
+}
+
+/* Pushes the last available element to the queue. */
+static void push_elem(fd_queue_t *queue, fd_queue_elem_t **pushed_elem,
+ ap_queue_event_t *event)
{
fd_queue_elem_t *elem;
+
+ AP_DEBUG_ASSERT(!queue_full(queue));
+ AP_DEBUG_ASSERT(!queue->terminated);
+
+ elem = get_spare_elem(queue);
+ enqueue_elem(queue, elem, event);
+
+ if (pushed_elem) {
+ *pushed_elem = elem;
+ }
+}
+
+/*
+ * Retrieves the oldest available element from the queue, waiting until one
+ * becomes available.
+ */
+static apr_status_t pop_elem(fd_queue_t *queue, fd_queue_elem_t **pelem)
+{
apr_status_t rv;
- if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
+ for (;;) {
+ if (queue->terminated) {
+ return APR_EOF; /* no more elements ever again */
+ }
+
+ if (queue->interrupted) {
+ queue->interrupted--;
+ return APR_EINTR;
+ }
+
+ if (!queue_empty(queue)) {
+ *pelem = APR_RING_FIRST(&queue->elts);
+ dequeue_elem(queue, *pelem);
+ return APR_SUCCESS;
+ }
+
+ queue->num_waiters++;
+ rv = apr_thread_cond_wait(queue->not_empty, queue->one_big_mutex);
+ queue->num_waiters--;
+ if (rv != APR_SUCCESS) {
+ return rv;
+ }
+ }
+}
+
+AP_DECLARE(apr_status_t) ap_queue_push_event(fd_queue_t *queue,
+ ap_queue_event_t *event)
+{
+ apr_status_t rv;
+
+ if ((rv = queue_lock(queue)) != APR_SUCCESS) {
return rv;
}
- AP_DEBUG_ASSERT(!queue->terminated);
- AP_DEBUG_ASSERT(!ap_queue_full(queue));
-
- elem = &queue->data[queue->in++];
- if (queue->in >= queue->bounds)
- queue->in -= queue->bounds;
- elem->sd = sd;
- elem->sd_baton = sd_baton;
- elem->p = p;
- queue->nelts++;
+ switch (event->type) {
+ case AP_QUEUE_EVENT_SOCK:
+ case AP_QUEUE_EVENT_TIMER:
+ case AP_QUEUE_EVENT_BATON:
+ push_elem(queue, NULL, event);
+ if (event->cb) {
+ event->cb(event->cb_baton, 1);
+ }
+ apr_thread_cond_signal(queue->not_empty);
+ break;
- apr_thread_cond_signal(queue->not_empty);
+ default:
+ rv = APR_EINVAL;
+ break;
+ }
- return apr_thread_mutex_unlock(queue->one_big_mutex);
+ queue_unlock(queue);
+ return rv;
}
-apr_status_t ap_queue_push_timer(fd_queue_t *queue, timer_event_t *te)
+AP_DECLARE(apr_status_t) ap_queue_pop_event(fd_queue_t *queue,
+ ap_queue_event_t **pevent)
{
apr_status_t rv;
+ fd_queue_elem_t *elem;
+
+ *pevent = NULL;
- if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
+ if ((rv = queue_lock(queue)) != APR_SUCCESS) {
return rv;
}
- AP_DEBUG_ASSERT(!queue->terminated);
+ rv = pop_elem(queue, &elem);
+ if (rv == APR_SUCCESS) {
+ ap_queue_event_t *event = elem->event;
+ ap_assert(event && event != &elem->self_event);
+ put_spare_elem(queue, elem);
+ if (event->cb) {
+ event->cb(event->cb_baton, 0);
+ }
+ *pevent = event;
+ }
- APR_RING_INSERT_TAIL(&queue->timers, te, timer_event_t, link);
+ queue_unlock(queue);
+ return rv;
+}
- apr_thread_cond_signal(queue->not_empty);
+AP_DECLARE(void) ap_queue_kill_event_locked(fd_queue_t *queue,
+ ap_queue_event_t *event)
+{
+ fd_queue_elem_t *elem = event->elem;
+ ap_assert(elem && APR_RING_NEXT(elem, link) != elem);
- return apr_thread_mutex_unlock(queue->one_big_mutex);
+ dequeue_elem(queue, elem);
+ put_spare_elem(queue, elem);
+ if (event->cb) {
+ event->cb(event->cb_baton, 0);
+ }
+}
+
+AP_DECLARE(apr_status_t) ap_queue_lock(fd_queue_t *queue)
+{
+ return queue_lock(queue);
+}
+
+AP_DECLARE(apr_status_t) ap_queue_unlock(fd_queue_t *queue)
+{
+ return queue_unlock(queue);
}
/**
- * Retrieves the next available socket from the queue. If there are no
- * sockets available, it will block until one becomes available.
- * Once retrieved, the socket is placed into the address specified by
- * 'sd'.
+ * Push a socket onto the queue.
*/
-apr_status_t ap_queue_pop_something(fd_queue_t *queue,
- apr_socket_t **sd, void **sd_baton,
- apr_pool_t **p, timer_event_t **te_out)
+AP_DECLARE(apr_status_t) ap_queue_push_socket(fd_queue_t *queue, apr_socket_t *sd,
+ apr_pool_t *p)
{
- fd_queue_elem_t *elem;
- timer_event_t *te;
apr_status_t rv;
+ fd_queue_elem_t *elem;
+
+ ap_assert(sd != NULL);
- if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
+ if ((rv = queue_lock(queue)) != APR_SUCCESS) {
return rv;
}
- /* Keep waiting until we wake up and find that the queue is not empty. */
- if (ap_queue_empty(queue)) {
- if (!queue->terminated) {
- apr_thread_cond_wait(queue->not_empty, queue->one_big_mutex);
- }
- /* If we wake up and it's still empty, then we were interrupted */
- if (ap_queue_empty(queue)) {
- rv = apr_thread_mutex_unlock(queue->one_big_mutex);
- if (rv != APR_SUCCESS) {
- return rv;
- }
- if (queue->terminated) {
- return APR_EOF; /* no more elements ever again */
- }
- else {
- return APR_EINTR;
- }
- }
+ push_elem(queue, &elem, NULL);
+ elem->event->type = AP_QUEUE_EVENT_SOCK;
+ elem->event->data.se = &elem->self_sock_event;
+ elem->event->data.se->baton = NULL;
+ elem->event->data.se->sd = sd;
+ elem->event->data.se->p = p;
+
+ apr_thread_cond_signal(queue->not_empty);
+
+ queue_unlock(queue);
+ return APR_SUCCESS;
+}
+
+/**
+ * Pop a socket from the queue.
+ */
+AP_DECLARE(apr_status_t) ap_queue_pop_socket(fd_queue_t *queue, apr_socket_t **psd,
+ apr_pool_t **pp)
+{
+ apr_status_t rv;
+ fd_queue_elem_t *elem;
+
+ if (psd) {
+ *psd = NULL;
+ }
+ if (pp) {
+ *pp = NULL;
}
- te = NULL;
- if (te_out) {
- if (!APR_RING_EMPTY(&queue->timers, timer_event_t, link)) {
- te = APR_RING_FIRST(&queue->timers);
- APR_RING_REMOVE(te, link);
- }
- *te_out = te;
+ if ((rv = queue_lock(queue)) != APR_SUCCESS) {
+ return rv;
}
- if (!te) {
- elem = &queue->data[queue->out++];
- if (queue->out >= queue->bounds)
- queue->out -= queue->bounds;
- queue->nelts--;
- *sd = elem->sd;
- if (sd_baton) {
- *sd_baton = elem->sd_baton;
+ rv = pop_elem(queue, &elem);
+ if (rv == APR_SUCCESS) {
+ ap_queue_event_t *event = elem->event;
+ ap_assert(event && event == &elem->self_event);
+ ap_assert(event->data.se == &elem->self_sock_event);
+ ap_assert(event->type == AP_QUEUE_EVENT_SOCK);
+ if (psd) {
+ *psd = event->data.se->sd;
+ }
+ if (pp) {
+ *pp = event->data.se->p;
}
- *p = elem->p;
-#ifdef AP_DEBUG
- elem->sd = NULL;
- elem->p = NULL;
-#endif /* AP_DEBUG */
+ put_spare_elem(queue, elem);
}
- return apr_thread_mutex_unlock(queue->one_big_mutex);
+ queue_unlock(queue);
+ return rv;
}
static apr_status_t queue_interrupt(fd_queue_t *queue, int all, int term)
{
apr_status_t rv;
- if (queue->terminated) {
- return APR_EOF;
- }
-
- if ((rv = apr_thread_mutex_lock(queue->one_big_mutex)) != APR_SUCCESS) {
+ if ((rv = queue_lock(queue)) != APR_SUCCESS) {
return rv;
}
@@ -505,15 +645,21 @@ static apr_status_t queue_interrupt(fd_queue_t *queue, int all, int term)
* we could end up setting it and waking everybody up just after a
* would-be popper checks it but right before they block
*/
+ queue->interrupted = 1;
if (term) {
queue->terminated = 1;
}
- if (all)
+ if (all) {
+ if (queue->num_waiters > 1)
+ queue->interrupted += queue->num_waiters - 1;
apr_thread_cond_broadcast(queue->not_empty);
- else
+ }
+ else {
apr_thread_cond_signal(queue->not_empty);
+ }
- return apr_thread_mutex_unlock(queue->one_big_mutex);
+ queue_unlock(queue);
+ return APR_SUCCESS;
}
apr_status_t ap_queue_interrupt_all(fd_queue_t *queue)
diff --git a/server/mpm_fdqueue.h b/server/mpm_fdqueue.h
index 260e22ab80e..29297fd60d5 100644
--- a/server/mpm_fdqueue.h
+++ b/server/mpm_fdqueue.h
@@ -27,7 +27,7 @@
#include
-/* This code is not AP_DECLARE()ed/exported, and used by MPMs event/worker
+/* This code is AP_DECLARE()ed/exportedbut used by MPMs event/worker
* only (for now), not worth thinking about w/o threads either...
*/
#if APR_HAS_THREADS
@@ -40,28 +40,48 @@
#include
#include
+struct fd_queue_t; /* opaque */
struct fd_queue_info_t; /* opaque */
struct fd_queue_elem_t; /* opaque */
+typedef struct fd_queue_t fd_queue_t;
typedef struct fd_queue_info_t fd_queue_info_t;
typedef struct fd_queue_elem_t fd_queue_elem_t;
AP_DECLARE(apr_status_t) ap_queue_info_create(fd_queue_info_t **queue_info,
- apr_pool_t *pool, int max_idlers,
- int max_recycled_pools);
+ apr_pool_t *pool, int max_recycled_pools);
AP_DECLARE(apr_status_t) ap_queue_info_set_idle(fd_queue_info_t *queue_info,
apr_pool_t *pool_to_recycle);
AP_DECLARE(apr_status_t) ap_queue_info_try_get_idler(fd_queue_info_t *queue_info);
-AP_DECLARE(apr_status_t) ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info,
- int *had_to_block);
+AP_DECLARE(apr_status_t) ap_queue_info_wait_for_idler(fd_queue_info_t *queue_info);
AP_DECLARE(apr_uint32_t) ap_queue_info_num_idlers(fd_queue_info_t *queue_info);
AP_DECLARE(apr_status_t) ap_queue_info_term(fd_queue_info_t *queue_info);
-AP_DECLARE(void) ap_queue_info_pop_pool(fd_queue_info_t *queue_info,
- apr_pool_t **recycled_pool);
+/* Async API */
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_inc(fd_queue_info_t *queue_info);
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_dec(fd_queue_info_t *queue_info);
+AP_DECLARE(apr_int32_t) ap_queue_info_idlers_count(fd_queue_info_t *queue_info);
+
+AP_DECLARE(apr_pool_t *) ap_queue_info_pop_pool(fd_queue_info_t *queue_info);
AP_DECLARE(void) ap_queue_info_push_pool(fd_queue_info_t *queue_info,
apr_pool_t *pool_to_recycle);
AP_DECLARE(void) ap_queue_info_free_idle_pools(fd_queue_info_t *queue_info);
+enum ap_queue_event_type_e
+{
+ AP_QUEUE_EVENT_SOCK,
+ AP_QUEUE_EVENT_TIMER,
+ AP_QUEUE_EVENT_BATON,
+};
+typedef enum ap_queue_event_type_e ap_queue_event_type_e;
+
+struct sock_event_t
+{
+ apr_pool_t *p;
+ apr_socket_t *sd;
+ void *baton;
+};
+typedef struct sock_event_t sock_event_t;
+
struct timer_event_t
{
APR_RING_ENTRY(timer_event_t) link;
@@ -74,33 +94,47 @@ struct timer_event_t
};
typedef struct timer_event_t timer_event_t;
-struct fd_queue_t
+struct ap_queue_event_t
{
- APR_RING_HEAD(timers_t, timer_event_t) timers;
- fd_queue_elem_t *data;
- unsigned int nelts;
- unsigned int bounds;
- unsigned int in;
- unsigned int out;
- apr_thread_mutex_t *one_big_mutex;
- apr_thread_cond_t *not_empty;
- volatile int terminated;
+ /* event data */
+ ap_queue_event_type_e type;
+ union {
+ sock_event_t *se;
+ timer_event_t *te;
+ void *baton;
+ } data;
+
+ /* called back when the event is pushed/popped,
+ * under the queue lock (must not block!)
+ */
+ void (*cb)(void *baton, int pushed);
+ void *cb_baton;
+
+ /* link in container when queued (for internal use) */
+ fd_queue_elem_t *elem;
};
-typedef struct fd_queue_t fd_queue_t;
+typedef struct ap_queue_event_t ap_queue_event_t;
+
+AP_DECLARE(apr_status_t) ap_queue_create(fd_queue_t **pqueue, int capacity,
+ apr_pool_t *p);
+
+/* mpm_event API (queue of any event) */
+AP_DECLARE(apr_status_t) ap_queue_push_event(fd_queue_t *queue,
+ ap_queue_event_t *event);
+AP_DECLARE(apr_status_t) ap_queue_pop_event(fd_queue_t *queue,
+ ap_queue_event_t **pevent);
+AP_DECLARE(apr_status_t) ap_queue_lock(fd_queue_t *queue);
+AP_DECLARE(void) ap_queue_kill_event_locked(fd_queue_t *queue,
+ ap_queue_event_t *event);
+AP_DECLARE(apr_status_t) ap_queue_unlock(fd_queue_t *queue);
-AP_DECLARE(apr_status_t) ap_queue_create(fd_queue_t **pqueue,
- int capacity, apr_pool_t *p);
-AP_DECLARE(apr_status_t) ap_queue_push_socket(fd_queue_t *queue,
- apr_socket_t *sd, void *sd_baton,
+/* mpm_worker API (queue of socket_event_t only) */
+AP_DECLARE(apr_status_t) ap_queue_push_socket(fd_queue_t *queue, apr_socket_t *sd,
apr_pool_t *p);
-AP_DECLARE(apr_status_t) ap_queue_push_timer(fd_queue_t *queue,
- timer_event_t *te);
-AP_DECLARE(apr_status_t) ap_queue_pop_something(fd_queue_t *queue,
- apr_socket_t **sd, void **sd_baton,
- apr_pool_t **p, timer_event_t **te);
-#define ap_queue_pop_socket(q_, s_, p_) \
- ap_queue_pop_something((q_), (s_), NULL, (p_), NULL)
+AP_DECLARE(apr_status_t) ap_queue_pop_socket(fd_queue_t *queue, apr_socket_t **psd,
+ apr_pool_t **pp);
+/* common API */
AP_DECLARE(apr_status_t) ap_queue_interrupt_all(fd_queue_t *queue);
AP_DECLARE(apr_status_t) ap_queue_interrupt_one(fd_queue_t *queue);
AP_DECLARE(apr_status_t) ap_queue_term(fd_queue_t *queue);
From aa04f2aab4588075f0f63dc9b19d19f264a7dbfe Mon Sep 17 00:00:00 2001
From: ylavic
Date: Fri, 7 Jul 2023 13:04:42 +0200
Subject: [PATCH 11/22] core,mpm_event: Non blocking shutdown.
---
include/http_connection.h | 9 ++-
include/scoreboard.h | 1 +
modules/generators/mod_status.c | 17 +++--
modules/lua/lua_request.c | 4 ++
server/connection.c | 23 +++---
server/mpm/event/event.c | 119 ++++++++++++++++++++++----------
6 files changed, 120 insertions(+), 53 deletions(-)
diff --git a/include/http_connection.h b/include/http_connection.h
index 601a4769109..78371efbb27 100644
--- a/include/http_connection.h
+++ b/include/http_connection.h
@@ -43,10 +43,15 @@ extern "C" {
*/
AP_CORE_DECLARE(void) ap_process_connection(conn_rec *c, void *csd);
+#define AP_SHUTDOWN_CONN_NOFLUSH 0
+#define AP_SHUTDOWN_CONN_FLUSH 1
+#define AP_SHUTDOWN_CONN_WC 2
+
/**
* Shutdown the connection for writing.
* @param c The connection to shutdown
- * @param flush Whether or not to flush pending data before
+ * @param flush Whether to flush pending data before, and if so how to
+ * (AP_SHUTDOWN_CONN_* flags)
* @return APR_SUCCESS or the underlying error
*/
AP_CORE_DECLARE(apr_status_t) ap_shutdown_conn(conn_rec *c, int flush);
@@ -54,7 +59,7 @@ AP_CORE_DECLARE(apr_status_t) ap_shutdown_conn(conn_rec *c, int flush);
/**
* Flushes all remain data in the client send buffer
* @param c The connection to flush
- * @remark calls ap_shutdown_conn(c, 1)
+ * @remark calls ap_shutdown_conn(c, AP_SHUTDOWN_CONN_FLUSH)
*/
AP_CORE_DECLARE(void) ap_flush_conn(conn_rec *c);
diff --git a/include/scoreboard.h b/include/scoreboard.h
index e83e52fdb16..581f86b866c 100644
--- a/include/scoreboard.h
+++ b/include/scoreboard.h
@@ -149,6 +149,7 @@ struct process_score {
apr_uint32_t keep_alive; /* async connections in keep alive */
apr_uint32_t suspended; /* connections suspended by some module */
apr_uint32_t wait_io; /* async connections waiting an IO in the MPM */
+ apr_uint32_t shutdown; /* async connections shutting down before close */
apr_uint32_t backlog; /* async connections waiting for a worker */
};
diff --git a/modules/generators/mod_status.c b/modules/generators/mod_status.c
index f0cff67ac45..5ff635cc96e 100644
--- a/modules/generators/mod_status.c
+++ b/modules/generators/mod_status.c
@@ -564,8 +564,8 @@ static int status_handler(request_rec *r)
ap_rputs("", r);
if (is_async) {
- int wait_io = 0, write_completion = 0, lingering_close = 0, keep_alive = 0,
- connections = 0, stopping = 0, procs = 0;
+ int wait_io = 0, write_completion = 0, shutdown = 0, lingering_close = 0,
+ keep_alive = 0, connections = 0, stopping = 0, procs = 0;
if (!short_report)
ap_rputs("\n\n\n"
"Slot | "
@@ -577,7 +577,7 @@ static int status_handler(request_rec *r)
"
---|
total | accepting | "
"busy | graceful | idle | "
"wait-io | writing | keep-alive | "
- "closing |
\n", r);
+ "shutdown | closing | \n", r);
for (i = 0; i < server_limit; ++i) {
ps_record = ap_get_scoreboard_process(i);
if (ps_record->pid) {
@@ -585,6 +585,7 @@ static int status_handler(request_rec *r)
wait_io += ps_record->wait_io;
write_completion += ps_record->write_completion;
keep_alive += ps_record->keep_alive;
+ shutdown += ps_record->shutdown;
lingering_close += ps_record->lingering_close;
procs++;
if (ps_record->quiescing) {
@@ -601,7 +602,7 @@ static int status_handler(request_rec *r)
ap_rprintf(r, "%u | %" APR_PID_T_FMT " | "
"%s%s | "
"%u | %s | "
- "%u | %u | %u | "
+ "%u | %u | %u | %u | "
"%u | %u | %u | %u | "
"
\n",
i, ps_record->pid,
@@ -614,6 +615,7 @@ static int status_handler(request_rec *r)
ps_record->wait_io,
ps_record->write_completion,
ps_record->keep_alive,
+ ps_record->shutdown,
ps_record->lingering_close);
}
}
@@ -622,14 +624,14 @@ static int status_handler(request_rec *r)
ap_rprintf(r, "Sum | "
"%d | %d | "
"%d | | "
- "%d | %d | %d | "
+ "%d | %d | %d | %d | "
"%d | %d | %d | %d | "
"
\n
\n",
procs, stopping,
connections,
busy, graceful, idle,
wait_io, write_completion, keep_alive,
- lingering_close);
+ shutdown, lingering_close);
}
else {
ap_rprintf(r, "Processes: %d\n"
@@ -638,11 +640,12 @@ static int status_handler(request_rec *r)
"ConnsAsyncWaitIO: %d\n"
"ConnsAsyncWriting: %d\n"
"ConnsAsyncKeepAlive: %d\n"
+ "ConnsAsyncShutdown: %d\n"
"ConnsAsyncClosing: %d\n",
procs, stopping,
connections,
wait_io, write_completion, keep_alive,
- lingering_close);
+ shutdown, lingering_close);
}
}
diff --git a/modules/lua/lua_request.c b/modules/lua/lua_request.c
index 5fa3a968c6b..f93c3493af4 100644
--- a/modules/lua/lua_request.c
+++ b/modules/lua/lua_request.c
@@ -1276,6 +1276,10 @@ static int lua_ap_scoreboard_process(lua_State *L)
lua_pushnumber(L, ps_record->write_completion);
lua_settable(L, -3);
+ lua_pushstring(L, "shutdown");
+ lua_pushnumber(L, ps_record->shutdown);
+ lua_settable(L, -3);
+
lua_pushstring(L, "not_accepting");
lua_pushnumber(L, ps_record->not_accepting);
lua_settable(L, -3);
diff --git a/server/connection.c b/server/connection.c
index a1c4c1860f0..383b769660f 100644
--- a/server/connection.c
+++ b/server/connection.c
@@ -111,37 +111,42 @@ AP_CORE_DECLARE(apr_status_t) ap_shutdown_conn(conn_rec *c, int flush)
apr_bucket_brigade *bb;
apr_bucket *b;
- bb = apr_brigade_create(c->pool, c->bucket_alloc);
+ bb = ap_acquire_brigade(c);
- if (flush) {
+ if (flush == AP_SHUTDOWN_CONN_WC) {
+ /* Write Completion bucket */
+ b = ap_bucket_wc_create(c->bucket_alloc);
+ }
+ else {
/* FLUSH bucket */
b = apr_bucket_flush_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bb, b);
}
+ APR_BRIGADE_INSERT_TAIL(bb, b);
/* End Of Connection bucket */
b = ap_bucket_eoc_create(c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(bb, b);
rv = ap_pass_brigade(c->output_filters, bb);
- apr_brigade_destroy(bb);
+ ap_release_brigade(c, bb);
return rv;
}
AP_CORE_DECLARE(void) ap_flush_conn(conn_rec *c)
{
- (void)ap_shutdown_conn(c, 1);
+ (void)ap_shutdown_conn(c, AP_SHUTDOWN_CONN_FLUSH);
}
AP_DECLARE(int) ap_prep_lingering_close(conn_rec *c)
{
/* Give protocol handlers one last chance to raise their voice */
- ap_run_pre_close_connection(c);
+ int rc = ap_run_pre_close_connection(c);
if (c->sbh) {
ap_update_child_status(c->sbh, SERVER_CLOSING, NULL);
}
- return 0;
+
+ return (rc == DECLINED) ? OK : rc;
}
/* we now proceed to read from the client until we get EOF, or until
@@ -172,7 +177,9 @@ AP_DECLARE(int) ap_start_lingering_close(conn_rec *c)
*/
/* Send any leftover data to the client, but never try to again */
- ap_flush_conn(c);
+ if (ap_shutdown_conn(c, AP_SHUTDOWN_CONN_FLUSH)) {
+ return 1;
+ }
#ifdef NO_LINGCLOSE
return 1;
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 5a9f4b676b4..8c5bee23115 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -301,7 +301,9 @@ struct event_conn_state_t {
/** Is lingering close from defer_lingering_close()? */
deferred_linger :1,
/** Has ap_start_lingering_close() been called? */
- linger_started :1;
+ linger_started :1,
+ /** Is lingering connection flushed and shutdown? */
+ linger_shutdown :1;
};
#define cs_se(cs) (&(cs)->bse.se)
#define cs_qe(cs) (&(cs)->bse.qe)
@@ -455,6 +457,7 @@ struct timeout_queue {
* waitio_q uses vhost's TimeOut
* write_completion_q uses vhost's TimeOut
* keepalive_q uses vhost's KeepAliveTimeOut
+ * shutdown_q uses vhost's TimeOut
* linger_q uses MAX_SECS_TO_LINGER
* short_linger_q uses SECONDS_TO_LINGER
* backlog_q uses vhost's TimeOut
@@ -462,6 +465,7 @@ struct timeout_queue {
static struct timeout_queue *waitio_q, /* wait for I/O to happen */
*write_completion_q, /* completion or user async poll */
*keepalive_q, /* in between requests */
+ *shutdown_q, /* shutting down (write) before close */
*linger_q, /* lingering (read) before close */
*short_linger_q, /* lingering (read) before close (short timeout) */
*backlog_q; /* waiting for a worker */
@@ -658,6 +662,7 @@ struct event_srv_cfg_s {
struct timeout_queue *io_q,
*wc_q,
*ka_q,
+ *sh_q,
*bl_q;
server_rec *s; /* backref */
};
@@ -724,14 +729,15 @@ static int disable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(10381)
"Suspend listening sockets: idlers:%i conns:%u backlog:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u "
- "timers:%u suspended:%u",
+ "waitio:%u write:%u keepalive:%u shutdown:%u "
+ "linger:%u/%u timers:%u suspended:%u",
ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(backlog_q->total),
apr_atomic_read32(waitio_q->total),
apr_atomic_read32(write_completion_q->total),
apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(shutdown_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
apr_atomic_read32(&timers_count),
@@ -756,14 +762,15 @@ static int enable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(00457)
"Resume listening sockets: idlers:%i conns:%u backlog:%u "
- "waitio:%u write:%u keepalive:%u linger:%u/%u "
- "timers:%u suspended:%u",
+ "waitio:%u write:%u keepalive:%u shutdown:%u "
+ "linger:%u/%u timers:%u suspended:%u",
ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(backlog_q->total),
apr_atomic_read32(waitio_q->total),
apr_atomic_read32(write_completion_q->total),
apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(shutdown_q->total),
apr_atomic_read32(linger_q->total),
apr_atomic_read32(short_linger_q->total),
apr_atomic_read32(&timers_count),
@@ -1917,7 +1924,7 @@ static void push2worker(event_conn_state_t *cs, timer_event_t *te,
ap_assert(!cs_in_backlog(cs));
ap_assert(!cs->q);
- if (busy && cs->pub.state == CONN_STATE_LINGER && cs->linger_started) {
+ if (busy && cs->pub.state == CONN_STATE_LINGER && cs->linger_shutdown) {
/* Not worth lingering more on this connection if we are short of
* workers and everything is flushed+shutdown already, back out
* and close.
@@ -2201,19 +2208,53 @@ static void process_lingering_close(event_conn_state_t *cs)
AP_DEBUG_ASSERT(CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state));
/* Flush and shutdown first */
- if (!cs->linger_started) {
- cs->linger_started = 1; /* once! */
- apr_atomic_inc32(&lingering_count);
+ if (!cs->linger_shutdown) {
+ conn_rec *c = cs->c;
+ int rc = OK;
+
cs->pub.state = CONN_STATE_LINGER;
- apr_socket_timeout_set(csd, apr_time_from_sec(SECONDS_TO_LINGER));
- if (ap_start_lingering_close(cs->c)) {
+ if (!cs->linger_started) {
+ cs->linger_started = 1; /* once! */
+ apr_atomic_inc32(&lingering_count);
notify_suspend(cs);
+
+ /* Shutdown the connection, i.e. pre_connection_close hooks,
+ * SSL/TLS close notify, WC bucket, etc..
+ */
+ rc = ap_prep_lingering_close(c);
+ if (rc == OK) {
+ rc = ap_shutdown_conn(c, AP_SHUTDOWN_CONN_WC);
+ if (rc == OK) {
+ if (c->aborted) {
+ rc = DONE;
+ }
+ else if (ap_filter_should_yield(c->output_filters)) {
+ rc = AGAIN;
+ }
+ }
+ }
+ }
+ else {
+ rc = ap_check_output_pending(c);
+ }
+
+ cs->pub.state = CONN_STATE_LINGER;
+ cs->pub.sense = CONN_SENSE_DEFAULT;
+ if (rc == AGAIN) {
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "queuing lingering close for connection %" CS_FMT,
+ CS_ARG(cs));
+ if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->sh_q, NULL)) {
+ return; /* queued */
+ }
+ }
+ if (rc != OK || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE)) {
close_connection(cs);
return;
}
-
- notify_suspend(cs);
+
+ cs->linger_shutdown = 1; /* once! */
/* All nonblocking from now, no need for APR_INCOMPLETE_READ either */
apr_socket_timeout_set(csd, 0);
@@ -2230,7 +2271,6 @@ static void process_lingering_close(event_conn_state_t *cs)
else {
cs->pub.state = CONN_STATE_LINGER_NORMAL;
}
- cs->pub.sense = CONN_SENSE_DEFAULT;
}
/* Drain until EAGAIN or EOF/error, in the former case requeue and
@@ -2729,32 +2769,30 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* Recompute this by walking the timeout queues (under the lock) */
queues_next_expiry = 0;
- /* Step 1: keepalive queue timeouts */
+ /* Process shutdown_q first because the expired entries from the
+ * other queues will go there and don't need to be checked twice
+ * (nor do we want to potentially kill them before the shutdown).
+ */
+ process_timeout_queue(shutdown_q, now);
+
+ process_timeout_queue(waitio_q, now);
+ process_timeout_queue(write_completion_q, now);
+
+ /* The linger and keepalive queues can be shrinked any time
+ * under pressure.
+ */
if (workers_were_busy || dying) {
+ shrink_timeout_queue(linger_q, now);
+ shrink_timeout_queue(short_linger_q, now);
shrink_timeout_queue(keepalive_q, now);
}
else {
+ process_timeout_queue(linger_q, now);
+ process_timeout_queue(short_linger_q, now);
process_timeout_queue(keepalive_q, now);
}
- /* Step 2: waitio queue timeouts */
- process_timeout_queue(waitio_q, now);
-
- /* Step 3: write completion queue timeouts */
- process_timeout_queue(write_completion_q, now);
-
- /* Step 4: normal lingering close queue timeouts */
- if (dying && linger_q->timeout > short_linger_q->timeout) {
- /* Dying, force short timeout for normal lingering close */
- linger_q->timeout = short_linger_q->timeout;
- }
- process_timeout_queue(linger_q, now);
-
- /* Step 5: short lingering close queue timeouts */
- process_timeout_queue(short_linger_q, now);
-
- /* Step 6: backlog queue timeouts
- * Connections in backlog race with the workers (dequeuing) under
+ /* Connections in backlog race with the workers (dequeuing) under
* the worker_queue mutex.
*/
if (apr_atomic_read32(backlog_q->total)) {
@@ -2773,14 +2811,19 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ps->wait_io = apr_atomic_read32(waitio_q->total);
ps->write_completion = apr_atomic_read32(write_completion_q->total);
ps->keep_alive = apr_atomic_read32(keepalive_q->total);
+ ps->shutdown = apr_atomic_read32(shutdown_q->total);
ps->lingering_close = apr_atomic_read32(&lingering_count);
ps->backlog = apr_atomic_read32(backlog_q->total);
ps->suspended = apr_atomic_read32(&suspended_count);
ps->connections = apr_atomic_read32(&connection_count);
}
else if ((workers_were_busy || dying)
- && apr_atomic_read32(keepalive_q->total)) {
+ && (apr_atomic_read32(linger_q->total)
+ || apr_atomic_read32(short_linger_q->total)
+ || apr_atomic_read32(keepalive_q->total))) {
apr_thread_mutex_lock(timeout_mutex);
+ shrink_timeout_queue(linger_q, now);
+ shrink_timeout_queue(short_linger_q, now);
shrink_timeout_queue(keepalive_q, now);
apr_thread_mutex_unlock(timeout_mutex);
ps->keep_alive = 0;
@@ -4444,7 +4487,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
apr_pool_t *ptemp, server_rec *s)
{
- apr_hash_t *io_h, *wc_h, *ka_h, *bl_h;
+ apr_hash_t *io_h, *wc_h, *ka_h, *sh_h, *bl_h;
/* Not needed in pre_config stage */
if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
@@ -4454,6 +4497,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
io_h = apr_hash_make(ptemp);
wc_h = apr_hash_make(ptemp);
ka_h = apr_hash_make(ptemp);
+ sh_h = apr_hash_make(ptemp);
bl_h = apr_hash_make(ptemp);
linger_q = TO_QUEUE_MAKE(pconf, "linger",
@@ -4475,8 +4519,11 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
sc->ka_q = TO_QUEUE_CHAIN(pconf, "keepalive", s->keep_alive_timeout,
&keepalive_q, ka_h, ptemp);
+ sc->sh_q = TO_QUEUE_CHAIN(pconf, "shutdown", s->timeout,
+ &shutdown_q, sh_h, ptemp);
+
sc->bl_q = TO_QUEUE_CHAIN(pconf, "backlog", s->timeout,
- &backlog_q, bl_h, ptemp);
+ &backlog_q, bl_h, ptemp);
}
return OK;
From 364a3894b3b6c80211d615c6722f7607c3fe9d82 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Wed, 10 Jul 2024 15:08:28 +0200
Subject: [PATCH 12/22] mpm_event: Don't shrink keepalive queue when
busy/exiting.
---
server/mpm/event/event.c | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 8c5bee23115..f341f1daf87 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -1669,7 +1669,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
goto process_connection;
}
}
- if (pending != OK || listener_may_exit) {
+ if (pending != OK) {
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
@@ -2280,7 +2280,7 @@ static void process_lingering_close(event_conn_state_t *cs)
apr_size_t nbytes = sizeof(dummybuf);
rv = apr_socket_recv(csd, dummybuf, &nbytes);
} while (rv == APR_SUCCESS);
- if (APR_STATUS_IS_EAGAIN(rv)) {
+ if (APR_STATUS_IS_EAGAIN(rv) && !listensocks_disabled()) {
struct timeout_queue *q;
q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
if (pollset_add(cs, CONN_SENSE_WANT_READ, q, NULL)) {
@@ -2777,19 +2777,16 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
process_timeout_queue(waitio_q, now);
process_timeout_queue(write_completion_q, now);
+ process_timeout_queue(keepalive_q, now);
- /* The linger and keepalive queues can be shrinked any time
- * under pressure.
- */
+ /* The linger queues can be shrinked any time under pressure */
if (workers_were_busy || dying) {
shrink_timeout_queue(linger_q, now);
shrink_timeout_queue(short_linger_q, now);
- shrink_timeout_queue(keepalive_q, now);
}
else {
process_timeout_queue(linger_q, now);
process_timeout_queue(short_linger_q, now);
- process_timeout_queue(keepalive_q, now);
}
/* Connections in backlog race with the workers (dequeuing) under
@@ -2819,14 +2816,11 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
else if ((workers_were_busy || dying)
&& (apr_atomic_read32(linger_q->total)
- || apr_atomic_read32(short_linger_q->total)
- || apr_atomic_read32(keepalive_q->total))) {
+ || apr_atomic_read32(short_linger_q->total))) {
apr_thread_mutex_lock(timeout_mutex);
shrink_timeout_queue(linger_q, now);
shrink_timeout_queue(short_linger_q, now);
- shrink_timeout_queue(keepalive_q, now);
apr_thread_mutex_unlock(timeout_mutex);
- ps->keep_alive = 0;
}
} /* listener main loop */
From eb1eb7fb894dab129efd0f1181f3e2fd1a95ef74 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 26 Jun 2023 19:26:58 +0200
Subject: [PATCH 13/22] mpm_event: Single linger queue/timeout (short one, 2s).
---
server/mpm/event/event.c | 128 +++++++++++++--------------------------
1 file changed, 41 insertions(+), 87 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index f341f1daf87..0058ba20994 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -155,12 +155,8 @@
#define apr_time_from_msec(x) ((x) * 1000)
#endif
-#define CONN_STATE_IS_LINGERING_CLOSE(s) ((s) >= CONN_STATE_LINGER && \
- (s) <= CONN_STATE_LINGER_SHORT)
-#ifndef MAX_SECS_TO_LINGER
-#define MAX_SECS_TO_LINGER 30
-#endif
-#define SECONDS_TO_LINGER 2
+/* Lingering close (read) timeout */
+#define LINGER_READ_TIMEOUT apr_time_from_sec(2)
/* Don't wait more time in poll() if APR_POLLSET_WAKEABLE is not implemented */
#define NON_WAKEABLE_TIMEOUT apr_time_from_msec(100)
@@ -204,7 +200,6 @@ static volatile int start_thread_may_exit = 0;
static volatile int listener_may_exit = 0;
static apr_uint32_t connection_count = 0; /* Number of open connections */
static apr_uint32_t timers_count = 0; /* Number of queued timers */
-static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
early during graceful termination */
@@ -458,8 +453,7 @@ struct timeout_queue {
* write_completion_q uses vhost's TimeOut
* keepalive_q uses vhost's KeepAliveTimeOut
* shutdown_q uses vhost's TimeOut
- * linger_q uses MAX_SECS_TO_LINGER
- * short_linger_q uses SECONDS_TO_LINGER
+ * linger_q uses LINGER_READ_TIMEOUT
* backlog_q uses vhost's TimeOut
*/
static struct timeout_queue *waitio_q, /* wait for I/O to happen */
@@ -467,7 +461,6 @@ static struct timeout_queue *waitio_q, /* wait for I/O to happen */
*keepalive_q, /* in between requests */
*shutdown_q, /* shutting down (write) before close */
*linger_q, /* lingering (read) before close */
- *short_linger_q, /* lingering (read) before close (short timeout) */
*backlog_q; /* waiting for a worker */
static volatile apr_time_t queues_next_expiry; /* next expiry time accross all queues */
@@ -730,7 +723,7 @@ static int disable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(10381)
"Suspend listening sockets: idlers:%i conns:%u backlog:%u "
"waitio:%u write:%u keepalive:%u shutdown:%u "
- "linger:%u/%u timers:%u suspended:%u",
+ "linger:%u timers:%u suspended:%u",
ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(backlog_q->total),
@@ -739,7 +732,6 @@ static int disable_listensocks(void)
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(shutdown_q->total),
apr_atomic_read32(linger_q->total),
- apr_atomic_read32(short_linger_q->total),
apr_atomic_read32(&timers_count),
apr_atomic_read32(&suspended_count));
@@ -763,7 +755,7 @@ static int enable_listensocks(void)
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, APLOGNO(00457)
"Resume listening sockets: idlers:%i conns:%u backlog:%u "
"waitio:%u write:%u keepalive:%u shutdown:%u "
- "linger:%u/%u timers:%u suspended:%u",
+ "linger:%u timers:%u suspended:%u",
ap_queue_info_idlers_count(worker_queue_info),
apr_atomic_read32(&connection_count),
apr_atomic_read32(backlog_q->total),
@@ -772,7 +764,6 @@ static int enable_listensocks(void)
apr_atomic_read32(keepalive_q->total),
apr_atomic_read32(shutdown_q->total),
apr_atomic_read32(linger_q->total),
- apr_atomic_read32(short_linger_q->total),
apr_atomic_read32(&timers_count),
apr_atomic_read32(&suspended_count));
@@ -798,7 +789,7 @@ static APR_INLINE int connections_above_limit(int *busy)
apr_int32_t i_count = ap_queue_info_idlers_count(worker_queue_info);
if (i_count > 0) {
apr_uint32_t c_count = apr_atomic_read32(&connection_count);
- apr_uint32_t l_count = apr_atomic_read32(&lingering_count);
+ apr_uint32_t l_count = apr_atomic_read32(linger_q->total);
if (c_count <= l_count
/* Off by 'listensocks_disabled()' to avoid flip flop */
|| c_count - l_count < (apr_uint32_t)threads_per_child +
@@ -1092,17 +1083,12 @@ static apr_status_t decrement_connection_count(void *cs_)
CS_ARG_TO(cs));
switch (cs->pub.state) {
- case CONN_STATE_LINGER:
- case CONN_STATE_LINGER_NORMAL:
- case CONN_STATE_LINGER_SHORT:
- apr_atomic_dec32(&lingering_count);
- break;
- case CONN_STATE_SUSPENDED:
- apr_atomic_dec32(&suspended_count);
- break;
- default:
- break;
+ case CONN_STATE_SUSPENDED:
+ apr_atomic_dec32(&suspended_count);
+ default:
+ break;
}
+
/* Unblock the listener if it's waiting for connection_count = 0,
* or if the listening sockets were disabled due to limits and can
* now accept new connections.
@@ -1185,7 +1171,7 @@ static void push2worker(event_conn_state_t *cs, timer_event_t *te,
apr_time_t now, int *busy);
/* Shutdown the connection in case of timeout, error or resources shortage.
- * This starts short lingering close if not already there, or directly closes
+ * This starts lingering close if not already there, or directly closes
* the connection otherwise.
* Pre-condition: nonblocking, can be called from anywhere provided cs is not
* in the pollset nor any non-backlog timeout queue.
@@ -1199,8 +1185,6 @@ static void shutdown_connection(event_conn_state_t *cs, apr_time_t now,
int log_level = APLOG_INFO;
switch (cs->pub.state) {
case CONN_STATE_LINGER:
- case CONN_STATE_LINGER_NORMAL:
- case CONN_STATE_LINGER_SHORT:
case CONN_STATE_KEEPALIVE:
log_level = APLOG_TRACE2;
default:
@@ -1214,8 +1198,7 @@ static void shutdown_connection(event_conn_state_t *cs, apr_time_t now,
/* Don't re-schedule connections in lingering close, they had
* their chance already so just close them now.
*/
- if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ if (cs->pub.state != CONN_STATE_LINGER) {
cs->pub.state = CONN_STATE_LINGER;
push2worker(cs, NULL, now, NULL);
}
@@ -1530,7 +1513,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
"processing connection %" CS_FMT " (aborted %d, clogging %d)",
CS_ARG(cs), c->aborted, c->clogging_input_filters);
- if (CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
+ if (cs->pub.state == CONN_STATE_LINGER) {
goto lingering_close;
}
@@ -1628,7 +1611,6 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
q = cs->sc->io_q;
}
if (!pollset_add(cs, CONN_SENSE_WANT_READ, q, te)) {
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
@@ -1658,7 +1640,6 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
return; /* queued */
}
/* Fall through lingering close */
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
}
else if (pending == OK) {
/* Some data to process immediately? */
@@ -1692,7 +1673,6 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
notify_suspend(cs);
if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->ka_sc->ka_q, NULL)) {
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
@@ -1730,16 +1710,15 @@ static apr_status_t event_resume_suspended (conn_rec *c)
c->suspended_baton = NULL;
cs->pub.sense = CONN_SENSE_DEFAULT;
- if (!CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state)) {
+ if (cs->pub.state != CONN_STATE_LINGER) {
cs->pub.state = CONN_STATE_WRITE_COMPLETION;
if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q, NULL)) {
return APR_SUCCESS; /* queued */
}
/* fall through lingering close on error */
- apr_table_setn(cs->c->notes, "short-lingering-close", "1");
+ cs->pub.state = CONN_STATE_LINGER;
}
- cs->pub.state = CONN_STATE_LINGER;
process_lingering_close(cs);
return APR_SUCCESS;
}
@@ -2205,7 +2184,7 @@ static void process_lingering_close(event_conn_state_t *cs)
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
"lingering close for connection %" CS_FMT,
CS_ARG(cs));
- AP_DEBUG_ASSERT(CONN_STATE_IS_LINGERING_CLOSE(cs->pub.state));
+ AP_DEBUG_ASSERT(cs->pub.state == CONN_STATE_LINGER);
/* Flush and shutdown first */
if (!cs->linger_shutdown) {
@@ -2216,7 +2195,6 @@ static void process_lingering_close(event_conn_state_t *cs)
if (!cs->linger_started) {
cs->linger_started = 1; /* once! */
- apr_atomic_inc32(&lingering_count);
notify_suspend(cs);
/* Shutdown the connection, i.e. pre_connection_close hooks,
@@ -2259,18 +2237,6 @@ static void process_lingering_close(event_conn_state_t *cs)
/* All nonblocking from now, no need for APR_INCOMPLETE_READ either */
apr_socket_timeout_set(csd, 0);
apr_socket_opt_set(csd, APR_INCOMPLETE_READ, 0);
-
- /*
- * If some module requested a shortened waiting period, only wait for
- * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
- * DoS attacks.
- */
- if (apr_table_get(cs->c->notes, "short-lingering-close")) {
- cs->pub.state = CONN_STATE_LINGER_SHORT;
- }
- else {
- cs->pub.state = CONN_STATE_LINGER_NORMAL;
- }
}
/* Drain until EAGAIN or EOF/error, in the former case requeue and
@@ -2280,14 +2246,12 @@ static void process_lingering_close(event_conn_state_t *cs)
apr_size_t nbytes = sizeof(dummybuf);
rv = apr_socket_recv(csd, dummybuf, &nbytes);
} while (rv == APR_SUCCESS);
- if (APR_STATUS_IS_EAGAIN(rv) && !listensocks_disabled()) {
- struct timeout_queue *q;
- q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
- if (pollset_add(cs, CONN_SENSE_WANT_READ, q, NULL)) {
- return; /* queued */
- }
+
+ if (!APR_STATUS_IS_EAGAIN(rv)
+ || listensocks_disabled() /* busy enough */
+ || !pollset_add(cs, CONN_SENSE_WANT_READ, linger_q, NULL)) {
+ close_connection(cs);
}
- close_connection(cs);
}
/* Call shutdown_connection() for the elements of 'q' that timed out, or
@@ -2437,22 +2401,20 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
}
- if (APLOGtrace6(ap_server_conf)) {
- /* trace log status every second */
- if (now - last_log > apr_time_from_sec(1)) {
- ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
- "connections: %u (waitio:%u write:%u keepalive:%u "
- "lingering:%u suspended:%u), workers: %u/%u shutdown",
- apr_atomic_read32(&connection_count),
- apr_atomic_read32(waitio_q->total),
- apr_atomic_read32(write_completion_q->total),
- apr_atomic_read32(keepalive_q->total),
- apr_atomic_read32(&lingering_count),
- apr_atomic_read32(&suspended_count),
- apr_atomic_read32(&threads_shutdown),
- threads_per_child);
- last_log = now;
- }
+ /* trace log status every second */
+ if (APLOGtrace6(ap_server_conf) && now - last_log > apr_time_from_sec(1)) {
+ ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
+ "connections: %u (waitio:%d write:%d keepalive:%d "
+ "lingering:%d suspended:%u), workers: %u/%u shutdown",
+ apr_atomic_read32(&connection_count),
+ apr_atomic_read32(waitio_q->total),
+ apr_atomic_read32(write_completion_q->total),
+ apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(linger_q->total),
+ apr_atomic_read32(&suspended_count),
+ apr_atomic_read32(&threads_shutdown),
+ threads_per_child);
+ last_log = now;
}
#if HAVE_SERF
@@ -2608,8 +2570,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
case CONN_STATE_ASYNC_WAITIO:
cs->pub.state = CONN_STATE_PROCESSING;
case CONN_STATE_WRITE_COMPLETION:
- case CONN_STATE_LINGER_NORMAL:
- case CONN_STATE_LINGER_SHORT:
+ case CONN_STATE_LINGER:
break;
default:
@@ -2779,14 +2740,12 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
process_timeout_queue(write_completion_q, now);
process_timeout_queue(keepalive_q, now);
- /* The linger queues can be shrinked any time under pressure */
+ /* The linger queue can be shrinked any time under pressure */
if (workers_were_busy || dying) {
shrink_timeout_queue(linger_q, now);
- shrink_timeout_queue(short_linger_q, now);
}
else {
process_timeout_queue(linger_q, now);
- process_timeout_queue(short_linger_q, now);
}
/* Connections in backlog race with the workers (dequeuing) under
@@ -2809,17 +2768,15 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ps->write_completion = apr_atomic_read32(write_completion_q->total);
ps->keep_alive = apr_atomic_read32(keepalive_q->total);
ps->shutdown = apr_atomic_read32(shutdown_q->total);
- ps->lingering_close = apr_atomic_read32(&lingering_count);
+ ps->lingering_close = apr_atomic_read32(linger_q->total);
ps->backlog = apr_atomic_read32(backlog_q->total);
ps->suspended = apr_atomic_read32(&suspended_count);
ps->connections = apr_atomic_read32(&connection_count);
}
else if ((workers_were_busy || dying)
- && (apr_atomic_read32(linger_q->total)
- || apr_atomic_read32(short_linger_q->total))) {
+ && apr_atomic_read32(linger_q->total)) {
apr_thread_mutex_lock(timeout_mutex);
shrink_timeout_queue(linger_q, now);
- shrink_timeout_queue(short_linger_q, now);
apr_thread_mutex_unlock(timeout_mutex);
}
} /* listener main loop */
@@ -4494,10 +4451,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
sh_h = apr_hash_make(ptemp);
bl_h = apr_hash_make(ptemp);
- linger_q = TO_QUEUE_MAKE(pconf, "linger",
- apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
- short_linger_q = TO_QUEUE_MAKE(pconf, "short_linger",
- apr_time_from_sec(SECONDS_TO_LINGER), NULL);
+ linger_q = TO_QUEUE_MAKE(pconf, "linger", LINGER_READ_TIMEOUT, NULL);
for (; s; s = s->next) {
event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
From c82d67ad99dd9f725135a8a0e5d8bf87b9a9b2d8 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 26 Jun 2023 21:55:25 +0200
Subject: [PATCH 14/22] mpm_event: Periodic linger queue shrink (500ms).
---
server/mpm/event/event.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 0058ba20994..2d33613c41f 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -158,6 +158,9 @@
/* Lingering close (read) timeout */
#define LINGER_READ_TIMEOUT apr_time_from_sec(2)
+/* Shrink linger_q at this period (min) when busy */
+#define QUEUES_SHRINK_TIMEOUT apr_time_from_msec(500)
+
/* Don't wait more time in poll() if APR_POLLSET_WAKEABLE is not implemented */
#define NON_WAKEABLE_TIMEOUT apr_time_from_msec(100)
@@ -2348,7 +2351,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
proc_info *ti = dummy;
int process_slot = ti->pslot;
process_score *ps = ap_get_scoreboard_process(process_slot);
- apr_time_t last_log;
+ apr_time_t last_log, next_shrink_time = 0;
last_log = event_time_now();
free(ti);
@@ -2743,6 +2746,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* The linger queue can be shrinked any time under pressure */
if (workers_were_busy || dying) {
shrink_timeout_queue(linger_q, now);
+ next_shrink_time = now + QUEUES_SHRINK_TIMEOUT;
}
else {
process_timeout_queue(linger_q, now);
@@ -2773,11 +2777,13 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ps->suspended = apr_atomic_read32(&suspended_count);
ps->connections = apr_atomic_read32(&connection_count);
}
- else if ((workers_were_busy || dying)
+ else if (next_shrink_time <= now
+ && (workers_were_busy || dying)
&& apr_atomic_read32(linger_q->total)) {
apr_thread_mutex_lock(timeout_mutex);
shrink_timeout_queue(linger_q, now);
apr_thread_mutex_unlock(timeout_mutex);
+ next_shrink_time = now + QUEUES_SHRINK_TIMEOUT;
}
} /* listener main loop */
From eddf29957cfe27fb6243a16767d1508fa1295254 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 26 Jun 2023 20:05:33 +0200
Subject: [PATCH 15/22] mpm_event: Use atomic reads/writes for shared
resources.
---
server/mpm/event/event.c | 123 ++++++++++++++++++++++++---------------
1 file changed, 75 insertions(+), 48 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 2d33613c41f..37e6f1b63fd 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -197,15 +197,16 @@ static int server_limit = 0; /* ServerLimit */
static int thread_limit = 0; /* ThreadLimit */
static int conns_this_child = 0; /* MaxConnectionsPerChild, only accessed
in listener thread */
-static volatile int dying = 0;
-static volatile int workers_may_exit = 0;
-static volatile int start_thread_may_exit = 0;
-static volatile int listener_may_exit = 0;
-static apr_uint32_t connection_count = 0; /* Number of open connections */
-static apr_uint32_t timers_count = 0; /* Number of queued timers */
-static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
-static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
- early during graceful termination */
+static /*atomic*/ apr_uint32_t dying = 0;
+static /*atomic*/ apr_uint32_t workers_may_exit = 0;
+static /*atomic*/ apr_uint32_t start_thread_may_exit = 0;
+static /*atomic*/ apr_uint32_t listener_may_exit = 0;
+static /*atomic*/ apr_uint32_t connection_count = 0; /* Number of open connections */
+static /*atomic*/ apr_uint32_t timers_count = 0; /* Number of queued timers */
+static /*atomic*/ apr_uint32_t suspended_count = 0; /* Number of suspended connections */
+static /*atomic*/ apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
+ early during graceful termination */
+
static int had_healthy_child = 0;
static int resource_shortage = 0;
@@ -481,9 +482,14 @@ static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *cs)
cs->q = q;
cs->queue_timestamp = event_time_now();
APR_RING_INSERT_TAIL(&q->head, cs, event_conn_state_t, timeout_list);
- ++*q->total;
++q->count;
+ /* Use atomic_set to be ordered/consistent with potential atomic reads
+ * outside the critical section, but writes are protected so a more
+ * expensive atomic_inc is not needed.
+ */
+ apr_atomic_set32(q->total, *q->total + 1);
+
/* Cheaply update the global queues_next_expiry with the one of the
* first entry of this queue (oldest) if it expires before.
*/
@@ -506,8 +512,13 @@ static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *cs)
APR_RING_REMOVE(cs, timeout_list);
APR_RING_ELEM_INIT(cs, timeout_list);
- --*q->total;
--q->count;
+
+ /* Use atomic_set to be ordered/consistent with potential atomic reads
+ * outside the critical section, but writes are protected so a more
+ * expensive atomic_dec is not needed.
+ */
+ apr_atomic_set32(q->total, *q->total - 1);
}
static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p,
@@ -717,6 +728,7 @@ static /*atomic*/ apr_uint32_t listensocks_off = 0;
static int disable_listensocks(void)
{
+ volatile process_score *ps;
int i;
if (apr_atomic_cas32(&listensocks_off, 1, 0) != 0) {
@@ -738,7 +750,8 @@ static int disable_listensocks(void)
apr_atomic_read32(&timers_count),
apr_atomic_read32(&suspended_count));
- ap_scoreboard_image->parent[ap_child_slot].not_accepting = 1;
+ ps = &ap_scoreboard_image->parent[ap_child_slot];
+ ps->not_accepting = 1;
for (i = 0; i < num_listensocks; i++) {
apr_pollset_remove(event_pollset, &listener_pollfd[i]);
@@ -748,9 +761,10 @@ static int disable_listensocks(void)
static int enable_listensocks(void)
{
+ volatile process_score *ps;
int i;
- if (listener_may_exit
+ if (apr_atomic_read32(&dying)
|| apr_atomic_cas32(&listensocks_off, 0, 1) != 1) {
return 0;
}
@@ -774,7 +788,8 @@ static int enable_listensocks(void)
* XXX: This is not yet optimal. If many workers suddenly become available,
* XXX: the parent may kill some processes off too soon.
*/
- ap_scoreboard_image->parent[ap_child_slot].not_accepting = 0;
+ ps = &ap_scoreboard_image->parent[ap_child_slot];
+ ps->not_accepting = 0;
for (i = 0; i < num_listensocks; i++) {
apr_pollset_add(event_pollset, &listener_pollfd[i]);
@@ -809,7 +824,9 @@ static APR_INLINE int connections_above_limit(int *busy)
static APR_INLINE int should_enable_listensocks(void)
{
- return !dying && listensocks_disabled() && !connections_above_limit(NULL);
+ return (listensocks_disabled()
+ && !apr_atomic_read32(&dying)
+ && !connections_above_limit(NULL));
}
static void close_socket_at(apr_socket_t *csd,
@@ -855,10 +872,9 @@ static void shutdown_listener(void)
{
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
"shutting down listener%s",
- listener_may_exit ? " again" : "");
+ apr_atomic_read32(&listener_may_exit) ? " again" : "");
- listener_may_exit = 1;
- disable_listensocks();
+ apr_atomic_set32(&listener_may_exit, 1);
/* Unblock the listener if it's poll()ing */
if (event_pollset && listener_is_wakeable) {
@@ -914,7 +930,7 @@ static void signal_threads(int mode)
* workers to exit once it has stopped accepting new connections
*/
if (mode == ST_UNGRACEFUL) {
- workers_may_exit = 1;
+ apr_atomic_set32(&workers_may_exit, 1);
ap_queue_interrupt_all(worker_queue);
close_worker_sockets(); /* forcefully kill all current connections */
}
@@ -993,7 +1009,7 @@ static int event_query(int query_code, int *result, apr_status_t *rv)
static void event_note_child_stopped(int slot, pid_t pid, ap_generation_t gen)
{
if (slot != -1) { /* child had a scoreboard slot? */
- process_score *ps = &ap_scoreboard_image->parent[slot];
+ volatile process_score *ps = &ap_scoreboard_image->parent[slot];
int i;
pid = ps->pid;
@@ -1079,8 +1095,9 @@ static int child_fatal;
static apr_status_t decrement_connection_count(void *cs_)
{
- int is_last_connection;
event_conn_state_t *cs = cs_;
+ int is_last_connection, is_dying;
+
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
"connection %" CS_FMT_TO " cleaned up",
CS_ARG_TO(cs));
@@ -1097,12 +1114,13 @@ static apr_status_t decrement_connection_count(void *cs_)
* now accept new connections.
*/
is_last_connection = !apr_atomic_dec32(&connection_count);
+ is_dying = apr_atomic_read32(&dying);
if (listener_is_wakeable
- && ((is_last_connection && listener_may_exit)
+ && ((is_last_connection && is_dying)
|| should_enable_listensocks())) {
apr_pollset_wakeup(event_pollset);
}
- if (dying) {
+ if (is_dying) {
/* Help worker_thread_should_exit_early() */
ap_queue_interrupt_one(worker_queue);
}
@@ -1325,7 +1343,7 @@ static int pollset_add_at(event_conn_state_t *cs, int sense,
}
/* close_worker_sockets() may have closed it already */
- if (workers_may_exit) {
+ if (apr_atomic_read32(&workers_may_exit)) {
AP_DEBUG_ASSERT(APR_STATUS_IS_EBADF(rv));
}
else {
@@ -1742,10 +1760,14 @@ static void check_infinite_requests(void)
static void set_child_dying(void)
{
+ volatile process_score *ps;
+
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, "quiescing");
+ ps = &ap_scoreboard_image->parent[ap_child_slot];
+ ps->quiescing = 1;
- dying = 1;
- ap_scoreboard_image->parent[ap_child_slot].quiescing = 1;
+ apr_atomic_set32(&dying, 1);
+ disable_listensocks(); /* definitively with dying = 1 */
ap_close_listeners_ex(my_bucket->listeners);
#if 0
@@ -2340,7 +2362,7 @@ static APR_INLINE void shrink_timeout_queue(struct timeout_queue *queue,
if (count) {
ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
"All workers are %s, %s queue shrinked (%u done, %u left)",
- dying ? "dying" : "busy", queue->name,
+ apr_atomic_read32(&dying) ? "dying" : "busy", queue->name,
count, apr_atomic_read32(queue->total));
}
}
@@ -2384,8 +2406,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
now = poll_time = event_time_now();
- if (listener_may_exit) {
- int once = !dying;
+ if (apr_atomic_read32(&listener_may_exit)) {
+ int once = !apr_atomic_read32(&dying);
if (once) {
set_child_dying();
}
@@ -2519,7 +2541,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
timers_next_expiry ? timers_next_expiry - now : 0,
listensocks_disabled() ? "no" : "yes",
apr_atomic_read32(&connection_count),
- listener_may_exit, dying);
+ apr_atomic_read32(&listener_may_exit),
+ apr_atomic_read32(&dying));
rc = apr_pollset_poll(event_pollset, timeout, &num, &out_pfd);
if (rc != APR_SUCCESS) {
@@ -2554,7 +2577,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
timers_next_expiry ? timers_next_expiry - now : 0,
listensocks_disabled() ? "no" : "yes",
apr_atomic_read32(&connection_count),
- listener_may_exit, dying);
+ apr_atomic_read32(&listener_may_exit),
+ apr_atomic_read32(&dying));
for (user_chain = NULL; num > 0; --num, ++out_pfd) {
listener_poll_type *pt = out_pfd->client_data;
@@ -2601,7 +2625,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
*/
continue;
}
- if (!dying) {
+ if (!apr_atomic_read32(&dying)) {
void *csd = NULL;
ap_listen_rec *lr = (ap_listen_rec *) pt->baton;
apr_pool_t *ptrans; /* Pool for per-transaction stuff */
@@ -2744,7 +2768,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
process_timeout_queue(keepalive_q, now);
/* The linger queue can be shrinked any time under pressure */
- if (workers_were_busy || dying) {
+ if (workers_were_busy || apr_atomic_read32(&dying)) {
shrink_timeout_queue(linger_q, now);
next_shrink_time = now + QUEUES_SHRINK_TIMEOUT;
}
@@ -2778,7 +2802,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ps->connections = apr_atomic_read32(&connection_count);
}
else if (next_shrink_time <= now
- && (workers_were_busy || dying)
+ && (workers_were_busy || apr_atomic_read32(&dying))
&& apr_atomic_read32(linger_q->total)) {
apr_thread_mutex_lock(timeout_mutex);
shrink_timeout_queue(linger_q, now);
@@ -2870,17 +2894,18 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
}
ap_update_child_status_from_indexes(process_slot, thread_slot,
- dying ? SERVER_GRACEFUL
- : SERVER_READY,
+ (apr_atomic_read32(&dying)
+ ? SERVER_GRACEFUL : SERVER_READY),
NULL);
- if (workers_may_exit) {
+ if (apr_atomic_read32(&workers_may_exit)) {
ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
"worker thread %i/%i may exit",
thread_slot, threads_per_child);
break;
}
- if (dying && worker_thread_should_exit_early(thread_slot)) {
+ if (apr_atomic_read32(&dying)
+ && worker_thread_should_exit_early(thread_slot)) {
break;
}
@@ -2907,7 +2932,7 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
* may have already been cleaned up. Don't log the "error" if
* workers_may_exit is set.
*/
- if (!APR_STATUS_IS_EINTR(rv) && !workers_may_exit) {
+ if (!APR_STATUS_IS_EINTR(rv) && !apr_atomic_read32(&workers_may_exit)) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
APLOGNO(03099) "ap_queue_pop_event failed");
AP_DEBUG_ASSERT(0);
@@ -2966,8 +2991,8 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
}
ap_update_child_status_from_indexes(process_slot, thread_slot,
- dying ? SERVER_DEAD
- : SERVER_GRACEFUL,
+ (apr_atomic_read32(&dying)
+ ? SERVER_DEAD : SERVER_GRACEFUL),
NULL);
apr_thread_exit(thd, APR_SUCCESS);
@@ -3240,7 +3265,8 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
}
- if (start_thread_may_exit || threads_created == threads_per_child) {
+ if (apr_atomic_read32(&start_thread_may_exit)
+ || threads_created == threads_per_child) {
break;
}
/* wait for previous generation to clean up an entry */
@@ -3290,9 +3316,9 @@ static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
*/
iter = 0;
- while (!dying) {
+ while (!apr_atomic_read32(&dying)) {
apr_sleep(apr_time_from_msec(500));
- if (dying || ++iter > 10) {
+ if (apr_atomic_read32(&dying) || ++iter > 10) {
break;
}
/* listener has not stopped accepting yet */
@@ -3332,10 +3358,11 @@ static void join_start_thread(apr_thread_t * start_thread_id)
{
apr_status_t rv, thread_rv;
- start_thread_may_exit = 1; /* tell it to give up in case it is still
- * trying to take over slots from a
- * previous generation
- */
+ /* tell it to give up in case it is still trying to take over slots
+ * from a previous generation
+ */
+ apr_atomic_set32(&start_thread_may_exit, 1);
+
rv = apr_thread_join(&thread_rv, start_thread_id);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
From 143a83e09ba4496cfe6e41e4eb003ce17a76ce2d Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 3 Jun 2024 16:47:50 +0200
Subject: [PATCH 16/22] mpm_event: Periodic scoreboard stats update (1s).
---
server/mpm/event/event.c | 115 +++++++++++++++++++++++++++------------
1 file changed, 79 insertions(+), 36 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 37e6f1b63fd..7141c46ce87 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -161,6 +161,9 @@
/* Shrink linger_q at this period (min) when busy */
#define QUEUES_SHRINK_TIMEOUT apr_time_from_msec(500)
+/* Update scoreboard stats at this period */
+#define STATS_UPDATE_TIMEOUT apr_time_from_msec(1000)
+
/* Don't wait more time in poll() if APR_POLLSET_WAKEABLE is not implemented */
#define NON_WAKEABLE_TIMEOUT apr_time_from_msec(100)
@@ -2367,15 +2370,53 @@ static APR_INLINE void shrink_timeout_queue(struct timeout_queue *queue,
}
}
+static void update_stats(process_score *ps, apr_time_t now,
+ apr_time_t *when, int force)
+{
+ int expired = (*when <= now);
+
+ if (expired || force) {
+ apr_atomic_set32(&ps->wait_io, apr_atomic_read32(waitio_q->total));
+ apr_atomic_set32(&ps->write_completion, apr_atomic_read32(write_completion_q->total));
+ apr_atomic_set32(&ps->keep_alive, apr_atomic_read32(keepalive_q->total));
+ apr_atomic_set32(&ps->shutdown, apr_atomic_read32(shutdown_q->total));
+ apr_atomic_set32(&ps->lingering_close, apr_atomic_read32(linger_q->total));
+ apr_atomic_set32(&ps->backlog, apr_atomic_read32(backlog_q->total));
+ apr_atomic_set32(&ps->suspended, apr_atomic_read32(&suspended_count));
+ apr_atomic_set32(&ps->connections, apr_atomic_read32(&connection_count));
+ }
+
+ if (expired) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
+ "child: idlers:%i conns:%u backlog:%u "
+ "waitio:%u write:%u keepalive:%u shutdown:%u linger:%u "
+ "timers:%u suspended:%u (%u/%u workers shutdown)",
+ ap_queue_info_idlers_count(worker_queue_info),
+ apr_atomic_read32(&connection_count),
+ apr_atomic_read32(backlog_q->total),
+ apr_atomic_read32(waitio_q->total),
+ apr_atomic_read32(write_completion_q->total),
+ apr_atomic_read32(keepalive_q->total),
+ apr_atomic_read32(shutdown_q->total),
+ apr_atomic_read32(linger_q->total),
+ apr_atomic_read32(&timers_count),
+ apr_atomic_read32(&suspended_count),
+ apr_atomic_read32(&threads_shutdown),
+ threads_per_child);
+
+ *when = now + STATS_UPDATE_TIMEOUT;
+ }
+}
+
static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
{
apr_status_t rc;
proc_info *ti = dummy;
int process_slot = ti->pslot;
process_score *ps = ap_get_scoreboard_process(process_slot);
- apr_time_t last_log, next_shrink_time = 0;
+ apr_time_t next_stats_time = 0, next_shrink_time = 0;
+ apr_interval_time_t min_poll_timeout = -1;
- last_log = event_time_now();
free(ti);
#if HAVE_SERF
@@ -2388,11 +2429,21 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
unblock_signal(LISTENER_SIGNAL);
+ /* Don't wait in poll() for more than NON_WAKEABLE_TIMEOUT if the pollset
+ * is not wakeable, and not more then the stats update period either.
+ */
+ if (!listener_is_wakeable) {
+ min_poll_timeout = NON_WAKEABLE_TIMEOUT;
+ }
+ if (min_poll_timeout < 0 || min_poll_timeout > STATS_UPDATE_TIMEOUT) {
+ min_poll_timeout = STATS_UPDATE_TIMEOUT;
+ }
+
for (;;) {
apr_int32_t num = 0;
apr_time_t next_expiry = -1;
apr_interval_time_t timeout = -1;
- int workers_were_busy = 0;
+ int workers_were_busy = 0, force_stats = 0;
socket_callback_baton_t *user_chain;
const apr_pollfd_t *out_pfd;
apr_time_t now, poll_time;
@@ -2426,22 +2477,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
}
}
- /* trace log status every second */
- if (APLOGtrace6(ap_server_conf) && now - last_log > apr_time_from_sec(1)) {
- ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
- "connections: %u (waitio:%d write:%d keepalive:%d "
- "lingering:%d suspended:%u), workers: %u/%u shutdown",
- apr_atomic_read32(&connection_count),
- apr_atomic_read32(waitio_q->total),
- apr_atomic_read32(write_completion_q->total),
- apr_atomic_read32(keepalive_q->total),
- apr_atomic_read32(linger_q->total),
- apr_atomic_read32(&suspended_count),
- apr_atomic_read32(&threads_shutdown),
- threads_per_child);
- last_log = now;
- }
-
#if HAVE_SERF
rc = serf_context_prerun(g_serf);
if (rc != APR_SUCCESS) {
@@ -2512,15 +2547,32 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
timeout = next_expiry > now ? next_expiry - now : 0;
}
- /* When non-wakeable, don't wait more than 100 ms, in any case. */
- if (!listener_is_wakeable && (timeout < 0 || timeout > NON_WAKEABLE_TIMEOUT)) {
- timeout = NON_WAKEABLE_TIMEOUT;
+ /* So long as there are connections, wake up at most every
+ * min_poll_timeout to refresh the scoreboard stats.
+ */
+ if (timeout < 0 || timeout > min_poll_timeout) {
+ if (timeout > 0
+ || !listener_is_wakeable
+ || apr_atomic_read32(&connection_count)) {
+ timeout = next_stats_time - now;
+ if (timeout <= 0 || timeout > min_poll_timeout) {
+ timeout = min_poll_timeout;
+ }
+ }
+ else {
+ /* No connections and entering infinite poll(),
+ * clear the stats first.
+ */
+ force_stats = 1;
+ }
}
- else if (timeout > 0) {
- /* apr_pollset_poll() might round down the timeout to
- * milliseconds, let's forcibly round up here to never
- * return before the timeout.
- */
+ update_stats(ps, now, &next_stats_time, force_stats);
+
+ /* apr_pollset_poll() might round down the timeout to
+ * milliseconds, let's forcibly round up here to never
+ * return before the timeout.
+ */
+ if (timeout > 0) {
timeout = apr_time_from_msec(
apr_time_as_msec(timeout + apr_time_from_msec(1) - 1)
);
@@ -2791,15 +2843,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
ap_log_error(APLOG_MARK, APLOG_TRACE7, 0, ap_server_conf,
"queues maintained: next timeout=%" APR_TIME_T_FMT,
next_expiry ? next_expiry - now : -1);
-
- ps->wait_io = apr_atomic_read32(waitio_q->total);
- ps->write_completion = apr_atomic_read32(write_completion_q->total);
- ps->keep_alive = apr_atomic_read32(keepalive_q->total);
- ps->shutdown = apr_atomic_read32(shutdown_q->total);
- ps->lingering_close = apr_atomic_read32(linger_q->total);
- ps->backlog = apr_atomic_read32(backlog_q->total);
- ps->suspended = apr_atomic_read32(&suspended_count);
- ps->connections = apr_atomic_read32(&connection_count);
}
else if (next_shrink_time <= now
&& (workers_were_busy || apr_atomic_read32(&dying))
From fccc1622e5b321815e57c7304cdee27f736f4211 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 27 Jun 2023 05:33:34 +0200
Subject: [PATCH 17/22] mpm_event: Autotuning from MaxRequestWorkers.
---
server/mpm/event/event.c | 494 ++++++++++++++++++++++++++-------------
1 file changed, 329 insertions(+), 165 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 7141c46ce87..3007dc8b33b 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -149,6 +149,21 @@
#define MAX_THREAD_LIMIT 100000
#endif
+#ifndef DEFAULT_ASYNC_FACTOR
+#define DEFAULT_ASYNC_FACTOR 2.0
+#endif
+
+#ifndef MAX_SPARE_THREADS_RATIO
+#define MAX_SPARE_THREADS_RATIO 0.75 /* of MaxRequestWorkers */
+#endif
+#ifndef MAX_DAEMONS_THREADS_RATIO
+#define MAX_DAEMONS_THREADS_RATIO 32
+#endif
+
+#ifndef SCOREBOARD_DAEMONS_FACTOR
+#define SCOREBOARD_DAEMONS_FACTOR 4
+#endif
+
#define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
#if !APR_VERSION_AT_LEAST(1,4,0)
@@ -183,13 +198,6 @@
* Actual definitions of config globals
*/
-#ifndef DEFAULT_WORKER_FACTOR
-#define DEFAULT_WORKER_FACTOR 2
-#endif
-#define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */
-static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
- /* AsyncRequestWorkerFactor * 16 */
-
static int threads_per_child = 0; /* ThreadsPerChild */
static int ap_daemons_to_start = 0; /* StartServers */
static int min_spare_threads = 0; /* MinSpareThreads */
@@ -200,6 +208,12 @@ static int server_limit = 0; /* ServerLimit */
static int thread_limit = 0; /* ThreadLimit */
static int conns_this_child = 0; /* MaxConnectionsPerChild, only accessed
in listener thread */
+static double async_factor = DEFAULT_ASYNC_FACTOR; /* AsyncRequestWorkerFactor */
+
+static int auto_settings = 0; /* Auto settings based on max_workers
+ and num_online_cpus */
+static int num_online_cpus = 0; /* Number of CPUs detected */
+
static /*atomic*/ apr_uint32_t dying = 0;
static /*atomic*/ apr_uint32_t workers_may_exit = 0;
static /*atomic*/ apr_uint32_t start_thread_may_exit = 0;
@@ -627,11 +641,16 @@ typedef struct event_retained_data {
apr_pool_t *gen_pool; /* generation pool (children start->stop lifetime) */
event_child_bucket *buckets; /* children buckets (reset per generation) */
+ ap_listen_rec **listen_buckets;
+ int num_listen_buckets;
+
int first_server_limit;
int first_thread_limit;
+ int first_server_sb_limit;
int sick_child_detected;
int maxclients_reported;
int near_maxclients_reported;
+
/*
* The max child slot ever assigned, preserved across restarts. Necessary
* to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
@@ -815,7 +834,7 @@ static APR_INLINE int connections_above_limit(int *busy)
/* Off by 'listensocks_disabled()' to avoid flip flop */
|| c_count - l_count < (apr_uint32_t)threads_per_child +
(i_count - listensocks_disabled()) *
- (worker_factor / WORKER_FACTOR_SCALE)) {
+ async_factor) {
return 0;
}
}
@@ -3082,11 +3101,12 @@ static void setup_threads_runtime(void)
const int good_methods[] = { APR_POLLSET_PORT,
APR_POLLSET_KQUEUE,
APR_POLLSET_EPOLL };
- /* XXX: K-A or lingering close connection included in the async factor */
- unsigned int async_factor = (worker_factor < WORKER_FACTOR_SCALE * 2
- ? WORKER_FACTOR_SCALE * 2 : worker_factor);
- unsigned int async_threads = (threads_per_child * async_factor / WORKER_FACTOR_SCALE);
- const apr_size_t pollset_size = (num_listensocks + async_threads + POLLSET_RESERVE_SIZE);
+ const double threads_factor = (async_factor < DEFAULT_ASYNC_FACTOR
+ ? DEFAULT_ASYNC_FACTOR
+ : async_factor);
+ const apr_size_t pollset_size = ((unsigned int)(threads_per_child * threads_factor) +
+ (unsigned int)num_listensocks +
+ POLLSET_RESERVE_SIZE);
int pollset_flags;
/* Event's skiplist operations will happen concurrently with other modules'
@@ -4063,76 +4083,27 @@ static void server_main_loop(int remaining_children_to_start)
static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
{
- ap_listen_rec **listen_buckets = NULL;
- int num_buckets = retained->mpm->num_buckets;
int remaining_children_to_start;
+ int num_buckets, i;
apr_status_t rv;
- int i;
ap_log_pid(pconf, ap_pid_fname);
- /* On first startup create gen_pool to satisfy the lifetime of the
- * parent's PODs and listeners; on restart stop the children from the
- * previous generation and clear gen_pool for the next one.
- */
- if (!retained->gen_pool) {
- apr_pool_create(&retained->gen_pool, ap_pglobal);
- }
- else {
- if (retained->mpm->was_graceful) {
- /* wake up the children...time to die. But we'll have more soon */
- for (i = 0; i < num_buckets; i++) {
- ap_mpm_podx_killpg(retained->buckets[i].pod,
- active_daemons_limit, AP_MPM_PODX_GRACEFUL);
- }
- }
- else {
- /* Kill 'em all. Since the child acts the same on the parents SIGTERM
- * and a SIGHUP, we may as well use the same signal, because some user
- * pthreads are stealing signals from us left and right.
- */
- for (i = 0; i < num_buckets; i++) {
- ap_mpm_podx_killpg(retained->buckets[i].pod,
- active_daemons_limit, AP_MPM_PODX_RESTART);
- }
- ap_reclaim_child_processes(1, /* Start with SIGTERM */
- event_note_child_stopped);
- }
- apr_pool_clear(retained->gen_pool);
- retained->buckets = NULL;
-
- /* advance to the next generation */
- /* XXX: we really need to make sure this new generation number isn't in
- * use by any of the previous children.
- */
- ++retained->mpm->my_generation;
- }
-
- /* On graceful restart, preserve the scoreboard and the listeners buckets.
- * When ungraceful, clear the scoreboard and set num_buckets to zero to let
- * ap_duplicate_listeners() below determine how many are needed/configured.
- */
- if (!retained->mpm->was_graceful) {
- if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
- retained->mpm->mpm_state = AP_MPMQ_STOPPING;
- return !OK;
- }
- num_buckets = (one_process) ? 1 : 0; /* one_process => one bucket */
- retained->mpm->num_buckets = 0; /* reset idle_spawn_rate below */
+ /* Preserve the scoreboard on graceful restart, reset when ungraceful */
+ if (!retained->mpm->was_graceful
+ && ap_run_pre_mpm(s->process->pool, SB_SHARED)) {
+ retained->mpm->mpm_state = AP_MPMQ_STOPPING;
+ return !OK;
}
/* Now on for the new generation. */
ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
ap_unixd_mpm_set_signals(pconf, one_process);
- if ((rv = ap_duplicate_listeners(retained->gen_pool, ap_server_conf,
- &listen_buckets, &num_buckets))) {
- ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
- ap_server_conf, APLOGNO(03273)
- "could not duplicate listeners");
- return !OK;
- }
-
+ /* Set the buckets listeners from the listen_buckets initialized
+ * in event_open_logs().
+ */
+ num_buckets = retained->num_listen_buckets;
retained->buckets = apr_pcalloc(retained->gen_pool,
num_buckets * sizeof(event_child_bucket));
for (i = 0; i < num_buckets; i++) {
@@ -4144,8 +4115,11 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
"could not open pipe-of-death");
return !OK;
}
- retained->buckets[i].listeners = listen_buckets[i];
+ retained->buckets[i].listeners = retained->listen_buckets[i];
}
+ /* Reset for the next generation/restart */
+ retained->listen_buckets = NULL;
+ retained->num_listen_buckets = 0;
/* If num_buckets changed, adjust max_spawn_rate and the free_slots buffer */
if (retained->mpm->num_buckets != num_buckets) {
@@ -4178,23 +4152,14 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
retained->mpm->num_buckets = num_buckets;
}
- /* Don't thrash since num_buckets depends on the
- * system and the number of online CPU cores...
- */
- if (active_daemons_limit < num_buckets)
- active_daemons_limit = num_buckets;
- if (ap_daemons_to_start < num_buckets)
- ap_daemons_to_start = num_buckets;
- /* We want to create as much children at a time as the number of buckets,
- * so to optimally accept connections (evenly distributed across buckets).
- * Thus min_spare_threads should at least maintain num_buckets children,
- * and max_spare_threads allow num_buckets more children w/o triggering
- * immediately (e.g. num_buckets idle threads margin, one per bucket).
- */
- if (min_spare_threads < threads_per_child * (num_buckets - 1) + num_buckets)
- min_spare_threads = threads_per_child * (num_buckets - 1) + num_buckets;
- if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
- max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
+ ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(10464)
+ "MPM event settings%s: MaxRequestWorkers=%d AsyncRequestWorkerFactor=%.1lf "
+ "ThreadsPerChild=%d ThreadLimit=%d MinSpareThreads=%d MaxSpareThreads=%d "
+ "ServerLimit=%d/%d StartServers=%d Buckets=%d CPUs=%d",
+ auto_settings ? " (auto)" : "", max_workers, async_factor,
+ threads_per_child, thread_limit, min_spare_threads, max_spare_threads,
+ active_daemons_limit, server_limit, ap_daemons_to_start,
+ num_buckets, num_online_cpus);
/* If we're doing a graceful_restart then we're going to see a lot
* of children exiting immediately when we get into the main loop
@@ -4382,12 +4347,18 @@ static int event_protocol_switch(conn_rec *c, request_rec *r, server_rec *s,
/* This really should be a post_config hook, but the error log is already
* redirected by that point, so we need to do this in the open_logs phase.
+ * We compute num_buckets here too, thus the definitive AP_MPMQ_* settings
+ * which need it and which may be needed by the post_config hooks of other
+ * modules.
*/
static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
apr_pool_t * ptemp, server_rec * s)
{
int startup = 0;
int level_flags = 0;
+ int num_buckets = 0, i;
+ int min_threads;
+ apr_status_t rv;
pconf = p;
@@ -4408,6 +4379,152 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
return !OK;
}
+ /* On first startup create gen_pool to satisfy the lifetime of the
+ * parent's PODs and listeners; on restart stop the children from the
+ * previous generation and clear gen_pool for the next one.
+ */
+ if (!retained->gen_pool) {
+ apr_pool_create(&retained->gen_pool, ap_pglobal);
+ }
+ else {
+ num_buckets = retained->mpm->num_buckets;
+ if (retained->mpm->was_graceful) {
+ /* wake up the children...time to die. But we'll have more soon */
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(retained->buckets[i].pod,
+ active_daemons_limit, AP_MPM_PODX_GRACEFUL);
+ }
+ }
+ else {
+ /* Kill 'em all. Since the child acts the same on the parents SIGTERM
+ * and a SIGHUP, we may as well use the same signal, because some user
+ * pthreads are stealing signals from us left and right.
+ */
+ for (i = 0; i < num_buckets; i++) {
+ ap_mpm_podx_killpg(retained->buckets[i].pod,
+ active_daemons_limit, AP_MPM_PODX_RESTART);
+ }
+ ap_reclaim_child_processes(1, /* Start with SIGTERM */
+ event_note_child_stopped);
+ }
+ apr_pool_clear(retained->gen_pool);
+ retained->buckets = NULL;
+
+ /* advance to the next generation */
+ /* XXX: we really need to make sure this new generation number isn't in
+ * use by any of the previous children.
+ */
+ ++retained->mpm->my_generation;
+ }
+
+ /* On graceful restart, preserve the listeners buckets. When ungraceful,
+ * set num_buckets to zero to let ap_duplicate_listeners() below determine
+ * how many are needed/configured.
+ */
+ if (!retained->mpm->was_graceful) {
+ num_buckets = (one_process) ? 1 : 0; /* one_process => one bucket */
+ retained->mpm->num_buckets = 0; /* old gen's until event_run() */
+ }
+ if ((rv = ap_duplicate_listeners(retained->gen_pool, ap_server_conf,
+ &retained->listen_buckets,
+ &num_buckets))) {
+ ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, rv,
+ (startup ? NULL : s), APLOGNO(03273)
+ "could not duplicate listeners, shutting down");
+ return !OK;
+ }
+ retained->num_listen_buckets = num_buckets;
+
+ /* Don't thrash since num_buckets depends on the system and the
+ * number of CPU cores, so make the settings consistent.
+ */
+ if (retained->first_thread_limit) {
+ if (threads_per_child > retained->first_thread_limit) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(10465)
+ "ThreadsPerChild (%d) exceeds initial ThreadLimit, "
+ "forcing ThreadsPerChild to %d",
+ threads_per_child, retained->first_thread_limit);
+ threads_per_child = retained->first_thread_limit;
+ }
+ }
+ else {
+ if (thread_limit < threads_per_child) {
+ thread_limit = threads_per_child;
+ }
+ retained->first_thread_limit = thread_limit;
+ }
+ min_threads = threads_per_child * num_buckets;
+ if (max_workers < min_threads) {
+ max_workers = min_threads;
+ }
+ else {
+ max_workers = (max_workers / min_threads) * min_threads;
+ }
+ active_daemons_limit = max_workers / threads_per_child;
+ if (retained->first_server_limit) {
+ if (active_daemons_limit > retained->first_server_sb_limit) {
+ int new_max_workers = retained->first_server_sb_limit * threads_per_child;
+ if (new_max_workers < min_threads) {
+ new_max_workers = min_threads;
+ }
+ else {
+ new_max_workers = (new_max_workers / min_threads) * min_threads;
+ }
+ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(10466)
+ "MaxRequestWorkers (%d) / ThreadsPerChild (%d) would "
+ "exceed initial scoreboard limit (%d), forcing "
+ "MaxRequestWorkers to %d",
+ max_workers, threads_per_child,
+ retained->first_server_sb_limit,
+ new_max_workers);
+ max_workers = new_max_workers;
+ active_daemons_limit = retained->first_server_sb_limit;
+ }
+ server_limit = retained->first_server_sb_limit;
+ }
+ else {
+ /* Save the initial ServerLimit which cannot be changed on restart, but
+ * leave some spare room in the actual server_[sb_]limit (used to size
+ * the scoreboard) to allow for children restarting while the old gen
+ * is gracefully exiting.
+ */
+ retained->first_server_limit = server_limit;
+ if (server_limit < active_daemons_limit * SCOREBOARD_DAEMONS_FACTOR) {
+ server_limit = active_daemons_limit * SCOREBOARD_DAEMONS_FACTOR;
+ }
+ retained->first_server_sb_limit = server_limit;
+ }
+ if (ap_daemons_to_start < num_buckets) {
+ ap_daemons_to_start = num_buckets;
+ }
+ else if (ap_daemons_to_start < active_daemons_limit) {
+ ap_daemons_to_start = (ap_daemons_to_start / num_buckets) * num_buckets;
+ }
+ else {
+ ap_daemons_to_start = active_daemons_limit;
+ }
+ if (min_spare_threads < ap_daemons_to_start * threads_per_child) {
+ min_spare_threads = ap_daemons_to_start * threads_per_child;
+ }
+ else if (min_spare_threads < max_workers) {
+ min_spare_threads = (min_spare_threads / min_threads) * min_threads;
+ }
+ else {
+ min_spare_threads = max_workers;
+ }
+ if (max_spare_threads < 0) { /* auto settings */
+ max_spare_threads = max_workers * MAX_SPARE_THREADS_RATIO;
+ }
+ if (max_spare_threads < min_spare_threads + min_threads) {
+ max_spare_threads = min_spare_threads + min_threads;
+ }
+ else if (max_spare_threads < max_workers) {
+ max_spare_threads = (max_spare_threads / min_threads) * min_threads;
+ }
+ else {
+ max_spare_threads = max_workers;
+ }
+
return OK;
}
@@ -4465,7 +4582,8 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
/* sigh, want this only the second time around */
if (retained->mpm->module_loads == 2) {
- rv = apr_pollset_create(&event_pollset, 1, plog,
+ apr_pollset_t *tmp = NULL;
+ rv = apr_pollset_create(&tmp, 1, plog,
APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
@@ -4474,7 +4592,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
"Also check system or user limits!");
return HTTP_INTERNAL_SERVER_ERROR;
}
- apr_pollset_destroy(event_pollset);
+ apr_pollset_destroy(tmp);
if (!one_process && !foreground) {
/* before we detach, setup crash handlers to log to errorlog */
@@ -4492,21 +4610,25 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
parent_pid = ap_my_pid = getpid();
ap_listen_pre_config();
- ap_daemons_to_start = DEFAULT_START_DAEMON;
- min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
- max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
- server_limit = DEFAULT_SERVER_LIMIT;
- thread_limit = DEFAULT_THREAD_LIMIT;
- active_daemons_limit = server_limit;
- threads_per_child = DEFAULT_THREADS_PER_CHILD;
- max_workers = active_daemons_limit * threads_per_child;
had_healthy_child = 0;
ap_extended_status = 0;
- event_pollset = NULL;
- worker_queue_info = NULL;
- listener_os_thread = NULL;
- listener_is_wakeable = 0;
+ max_workers = -1;
+ threads_per_child = -1;
+ min_spare_threads = max_spare_threads = -1;
+ server_limit = thread_limit = -1;
+ ap_daemons_to_start = -1;
+ auto_settings = 0;
+
+#ifndef _SC_NPROCESSORS_ONLN
+ num_online_cpus = 1;
+#else
+ num_online_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (num_online_cpus < 1) {
+ num_online_cpus = 1;
+ }
+#endif
+ async_factor = DEFAULT_ASYNC_FACTOR;
return OK;
}
@@ -4563,7 +4685,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
startup = 1;
}
- if (server_limit > MAX_SERVER_LIMIT) {
+ if (server_limit < 0) {
+ server_limit = DEFAULT_SERVER_LIMIT;
+ }
+ else if (server_limit > MAX_SERVER_LIMIT) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
"WARNING: ServerLimit of %d exceeds compile-time "
@@ -4577,7 +4702,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
server_limit = MAX_SERVER_LIMIT;
}
- else if (server_limit < 1) {
+ else if (server_limit == 0) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
"WARNING: ServerLimit of %d not allowed, "
@@ -4589,14 +4714,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
server_limit = 1;
}
-
/* you cannot change ServerLimit across a restart; ignore
* any such attempts
*/
- if (!retained->first_server_limit) {
- retained->first_server_limit = server_limit;
- }
- else if (server_limit != retained->first_server_limit) {
+ if (retained->first_server_limit && server_limit != retained->first_server_limit) {
/* don't need a startup console version here */
ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
"changing ServerLimit to %d from original value of %d "
@@ -4605,7 +4726,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
server_limit = retained->first_server_limit;
}
- if (thread_limit > MAX_THREAD_LIMIT) {
+ if (thread_limit < 0) {
+ thread_limit = DEFAULT_THREAD_LIMIT;
+ }
+ else if (thread_limit > MAX_THREAD_LIMIT) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
"WARNING: ThreadLimit of %d exceeds compile-time "
@@ -4619,7 +4743,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
thread_limit = MAX_THREAD_LIMIT;
}
- else if (thread_limit < 1) {
+ else if (thread_limit == 0) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
"WARNING: ThreadLimit of %d not allowed, "
@@ -4631,14 +4755,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
thread_limit = 1;
}
-
/* you cannot change ThreadLimit across a restart; ignore
* any such attempts
*/
- if (!retained->first_thread_limit) {
- retained->first_thread_limit = thread_limit;
- }
- else if (thread_limit != retained->first_thread_limit) {
+ if (retained->first_thread_limit && thread_limit != retained->first_thread_limit) {
/* don't need a startup console version here */
ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
"changing ThreadLimit to %d from original value of %d "
@@ -4647,7 +4767,41 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
thread_limit = retained->first_thread_limit;
}
- if (threads_per_child > thread_limit) {
+ /* Auto settings depend on max_workers and num_buckets, the latter being
+ * known in event_open_logs() only. So defer to there (with no warnings
+ * since it's somewhat auto..).
+ */
+ if (auto_settings) {
+ if (max_workers <= 0) {
+ /* This used to warn before auto settings, just take the
+ * default value still but silently.
+ */
+ max_workers = DEFAULT_SERVER_LIMIT * DEFAULT_THREADS_PER_CHILD;
+ }
+ if (threads_per_child <= 0) {
+ /* Default threads_per_child is the number of CPUs */
+ threads_per_child = num_online_cpus;
+
+ /* With a lot of workers and not so much CPUs to handle them,
+ * spawn more threads to get a reasonable active_daemons_limit
+ * i.e. processes / threads ratio.
+ */
+ while (max_workers / threads_per_child >
+ threads_per_child * MAX_DAEMONS_THREADS_RATIO) {
+ threads_per_child *= 2;
+ }
+ }
+ return OK; /* => event_open_logs() */
+ }
+
+ /* No auto settings; use the default for anything not set (or set to
+ * some negative value), warn about nonsense values and adjust otherwise.
+ */
+
+ if (threads_per_child < 0) {
+ threads_per_child = DEFAULT_THREADS_PER_CHILD;
+ }
+ else if (threads_per_child > thread_limit) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
"WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
@@ -4662,7 +4816,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
threads_per_child = thread_limit;
}
- else if (threads_per_child < 1) {
+ else if (threads_per_child == 0) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
"WARNING: ThreadsPerChild of %d not allowed, "
@@ -4675,7 +4829,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
threads_per_child = 1;
}
- if (max_workers < threads_per_child) {
+ if (max_workers < 0) {
+ max_workers = DEFAULT_SERVER_LIMIT * DEFAULT_THREADS_PER_CHILD;
+ }
+ else if (max_workers < threads_per_child) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
"WARNING: MaxRequestWorkers of %d is less than "
@@ -4693,27 +4850,6 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
}
active_daemons_limit = max_workers / threads_per_child;
-
- if (max_workers % threads_per_child) {
- int tmp_max_workers = active_daemons_limit * threads_per_child;
-
- if (startup) {
- ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
- "WARNING: MaxRequestWorkers of %d is not an integer "
- "multiple of ThreadsPerChild of %d, decreasing to nearest "
- "multiple %d, for a maximum of %d servers.",
- max_workers, threads_per_child, tmp_max_workers,
- active_daemons_limit);
- } else {
- ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
- "MaxRequestWorkers of %d is not an integer multiple "
- "of ThreadsPerChild of %d, decreasing to nearest "
- "multiple %d", max_workers, threads_per_child,
- tmp_max_workers);
- }
- max_workers = tmp_max_workers;
- }
-
if (active_daemons_limit > server_limit) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
@@ -4730,10 +4866,34 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
server_limit * threads_per_child);
}
active_daemons_limit = server_limit;
+ max_workers = active_daemons_limit * threads_per_child;
+ }
+ else if (max_workers % threads_per_child) {
+ int new_max_workers = active_daemons_limit * threads_per_child;
+ if (startup) {
+ ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
+ "WARNING: MaxRequestWorkers of %d is not an integer "
+ "multiple of ThreadsPerChild of %d, decreasing to nearest "
+ "multiple %d, for a maximum of %d servers.",
+ max_workers, threads_per_child, new_max_workers,
+ active_daemons_limit);
+ } else {
+ ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
+ "MaxRequestWorkers of %d is not an integer multiple "
+ "of ThreadsPerChild of %d, decreasing to nearest "
+ "multiple %d", max_workers, threads_per_child,
+ new_max_workers);
+ }
+ max_workers = new_max_workers;
}
- /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
- if (ap_daemons_to_start < 1) {
+ if (ap_daemons_to_start < 0) {
+ ap_daemons_to_start = DEFAULT_START_DAEMON;
+ }
+ else if (ap_daemons_to_start > active_daemons_limit) {
+ ap_daemons_to_start = active_daemons_limit;
+ }
+ else if (ap_daemons_to_start == 0) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
"WARNING: StartServers of %d not allowed, "
@@ -4746,7 +4906,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
ap_daemons_to_start = 1;
}
- if (min_spare_threads < 1) {
+ if (min_spare_threads < 0) {
+ min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
+ }
+ else if (min_spare_threads == 0) {
if (startup) {
ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
"WARNING: MinSpareThreads of %d not allowed, "
@@ -4758,12 +4921,18 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
"MinSpareThreads of %d not allowed, increasing to 1",
min_spare_threads);
}
- min_spare_threads = 1;
+ min_spare_threads = threads_per_child;
}
- /* max_spare_threads < min_spare_threads + threads_per_child
- * checked in ap_mpm_run()
- */
+ if (max_spare_threads < 0) {
+ max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
+ }
+ else {
+ /* max_spare_threads value has never been checked, it's silently
+ * adjusted in event_open_logs() such that max_spare_threads >=
+ * min_spare_threads + threads_per_child.
+ */
+ }
return OK;
}
@@ -4839,7 +5008,7 @@ static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
}
static const char *set_max_workers(cmd_parms * cmd, void *dummy,
- const char *arg)
+ const char *arg, const char *arg2)
{
const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
if (err != NULL) {
@@ -4850,7 +5019,10 @@ static const char *set_max_workers(cmd_parms * cmd, void *dummy,
"MaxClients is deprecated, use MaxRequestWorkers "
"instead.");
}
+
max_workers = atoi(arg);
+ auto_settings = (arg2 && !strcasecmp(arg2, "auto"));
+
return NULL;
}
@@ -4891,23 +5063,15 @@ static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
const char *arg)
{
- double val;
char *endptr;
const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
if (err != NULL) {
return err;
}
- val = strtod(arg, &endptr);
- if (*endptr)
- return "error parsing value";
-
- if (val <= 0)
- return "AsyncRequestWorkerFactor argument must be a positive number";
-
- worker_factor = val * WORKER_FACTOR_SCALE;
- if (worker_factor < WORKER_FACTOR_SCALE) {
- worker_factor = WORKER_FACTOR_SCALE;
+ async_factor = strtod(arg, &endptr);
+ if (*endptr || async_factor < 1.0) {
+ return "AsyncRequestWorkerFactor must be a rational number greater or equal to 1";
}
return NULL;
}
@@ -4923,10 +5087,10 @@ static const command_rec event_cmds[] = {
"Minimum number of idle threads, to handle request spikes"),
AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
"Maximum number of idle threads"),
- AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
- "Deprecated name of MaxRequestWorkers"),
- AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
- "Maximum number of threads alive at the same time"),
+ AP_INIT_TAKE12("MaxClients", set_max_workers, NULL, RSRC_CONF,
+ "Deprecated name of MaxRequestWorkers"),
+ AP_INIT_TAKE12("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
+ "Maximum number of threads alive at the same time"),
AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
"Number of threads each child creates"),
AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
From fb8839306b1eb6f8a8633989f2504d7b45696edb Mon Sep 17 00:00:00 2001
From: ylavic
Date: Wed, 10 Jul 2024 15:10:50 +0200
Subject: [PATCH 18/22] mpm_event: Propose some new connections_above_limit()
heuristics.
---
server/mpm/event/event.c | 155 +++++++++++++++++++++++++++++++++++----
1 file changed, 141 insertions(+), 14 deletions(-)
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index 3007dc8b33b..e0ba249bbf7 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -214,6 +214,9 @@ static int auto_settings = 0; /* Auto settings based on max_worker
and num_online_cpus */
static int num_online_cpus = 0; /* Number of CPUs detected */
+static int workers_backlog_limit = 0; /* Max number of events in the workers' backlog
+ (above which not accepting new connections) */
+
static /*atomic*/ apr_uint32_t dying = 0;
static /*atomic*/ apr_uint32_t workers_may_exit = 0;
static /*atomic*/ apr_uint32_t start_thread_may_exit = 0;
@@ -824,23 +827,119 @@ static APR_INLINE int listensocks_disabled(void)
return apr_atomic_read32(&listensocks_off) != 0;
}
-static APR_INLINE int connections_above_limit(int *busy)
+/* Choose one of these */
+#define LIMIT_BY_CONNS_TOTAL_VS_IDLERS 0
+#define LIMIT_BY_BACKLOG_MAYBLOCK_VS_IDLERS 0
+#define LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_VS_IDLERS 1 /* the winner? */
+#define LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_AND_QUEUES_VS_IDLERS 0
+
+#if LIMIT_BY_BACKLOG_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_AND_QUEUES_VS_IDLERS
+/* The rationale for backlog_nonblock_count is that only connections about
+ * to be processed outside the MPM can make a worker thread block, since we
+ * have no guarantee that modules won't block processing them. The core will
+ * not block processing TLS handshakes or reading the HTTP header for instance,
+ * but once the connections are passed to modules they may block in a handler
+ * reading the body or whatever. Those connections are in CONN_STATE_PROCESSING
+ * state in the backlog, which includes newly accepted connections and the ones
+ * waking up from CONN_STATE_KEEPALIVE and CONN_STATE_ASYNC_WAITIO.
+ * But the processing by/inside MPM event will never block, so fast enough
+ * eventually to consider the connections fully handled by the MPM differently
+ * in connnections_above_limit(), where backlog_nonblock_count can help.
+ */
+static /*atomic*/ apr_uint32_t backlog_nonblock_count;
+#endif
+
+static APR_INLINE int connections_above_limit(void)
{
- apr_int32_t i_count = ap_queue_info_idlers_count(worker_queue_info);
- if (i_count > 0) {
- apr_uint32_t c_count = apr_atomic_read32(&connection_count);
- apr_uint32_t l_count = apr_atomic_read32(linger_q->total);
- if (c_count <= l_count
- /* Off by 'listensocks_disabled()' to avoid flip flop */
- || c_count - l_count < (apr_uint32_t)threads_per_child +
- (i_count - listensocks_disabled()) *
- async_factor) {
+ /* Note that idlers >= 0 gives the number of idle workers, idlers < 0 gives
+ * the number of connections in the backlog waiting for an idle worker.
+ */
+ int idlers = ap_queue_info_idlers_count(worker_queue_info);
+
+#if LIMIT_BY_CONNS_TOTAL_VS_IDLERS
+
+ /* Limit reached when the number of connections (excluding the ones in
+ * lingering close) is above the number of idle workers.
+ */
+ if (idlers >= 0) {
+ int conns = (apr_atomic_read32(&connection_count) -
+ apr_atomic_read32(linger_q->total));
+ AP_DEBUG_ASSERT(conns >= 0);
+ if (idlers >= conns) {
+ return 0;
+ }
+ }
+
+#elif LIMIT_BY_BACKLOG_MAYBLOCK_VS_IDLERS
+
+ /* Limit reached when the number of potentially blocking connections in
+ * the backlog is above the number of idle workers.
+ *
+ * Ignore connections in the backlog with "nonblocking" states by adding
+ * them back.
+ */
+ idlers += apr_atomic_read32(&backlog_nonblock_count);
+ if (idlers >= 0) {
+ return 0;
+ }
+
+#elif LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_VS_IDLERS
+
+ /* Limit reached when the number of potentially blocking connections in
+ * the backlog is above the number of idle workers, or the total number
+ * of connections waiting for a worker in the backlog is above some hard
+ * workers_backlog_limit.
+ */
+ if (idlers >= -workers_backlog_limit) {
+ /* Ignore connections in the backlog with "nonblocking" states by
+ * adding them back.
+ */
+ idlers += apr_atomic_read32(&backlog_nonblock_count);
+ if (idlers >= 0) {
+ return 0;
+ }
+ }
+
+#elif LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_AND_QUEUES_VS_IDLERS
+
+ /* Limit reached when the number of potentially blocking connections in
+ * the backlog *and* the queues is above the number of idle workers, or
+ * the total number of connections waiting for a worker in the backlog
+ * is above some hard workers_backlog_limit.
+ */
+ if (idlers >= -workers_backlog_limit) {
+ /* Ignore connections in the backlog with "nonblocking" states by
+ * adding them back.
+ */
+ idlers += apr_atomic_read32(&backlog_nonblock_count);
+ if (idlers >= (apr_atomic_read32(keepalive_q->total) +
+ apr_atomic_read32(waitio_q->total))) {
return 0;
}
}
- else if (busy) {
- *busy = 1;
+
+#else
+
+ /* Legacy but w/o ignoring the keepalive_q (not shrinked anymore).
+ * Limit reached when the number of conns (besides lingering close ones)
+ * is above some unclear limit (the total number of workers plus the
+ * number of idle workers times the async factor..).
+ */
+ int off = listensocks_disabled(); /* off by disabled() to limit flip flop */
+ if (idlers >= off) {
+ int avail = (threads_per_child + (int)((idlers - off) * async_factor));
+ int conns = (apr_atomic_read32(&connection_count) -
+ apr_atomic_read32(linger_q->total));
+ AP_DEBUG_ASSERT(conns >= 0);
+ if (avail >= conns) {
+ return 0;
+ }
}
+
+#endif
+
return 1;
}
@@ -848,7 +947,7 @@ static APR_INLINE int should_enable_listensocks(void)
{
return (listensocks_disabled()
&& !apr_atomic_read32(&dying)
- && !connections_above_limit(NULL));
+ && !connections_above_limit());
}
static void close_socket_at(apr_socket_t *csd,
@@ -1888,8 +1987,34 @@ static void conn_state_backlog_cb(void *baton, int pushed)
if (pushed) {
TO_QUEUE_APPEND(cs->sc->bl_q, cs);
+#if LIMIT_BY_BACKLOG_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_AND_QUEUES_VS_IDLERS
+ if (cs->pub.state != CONN_STATE_PROCESSING) {
+ /* These connections won't block when processed.
+ *
+ * Increment *after* TO_QUEUE_APPEND() to make sure that:
+ * cs->sc->bl_q->total >= backlog_nonblock_count
+ * always holds.
+ */
+ apr_atomic_inc32(&backlog_nonblock_count);
+ }
+#endif
}
else { /* popped */
+#if LIMIT_BY_BACKLOG_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_VS_IDLERS \
+ || LIMIT_BY_BACKLOG_TOTAL_AND_MAYBLOCK_AND_QUEUES_VS_IDLERS
+ if (cs->pub.state != CONN_STATE_PROCESSING) {
+ /* These connections won't block when processed.
+ *
+ * Decrement *before* TO_QUEUE_REMOVE() to make sure that:
+ * cs->sc->bl_q->total >= backlog_nonblock_count
+ * always holds.
+ */
+ apr_atomic_dec32(&backlog_nonblock_count);
+ }
+#endif
TO_QUEUE_REMOVE(cs->sc->bl_q, cs);
/* not in backlog anymore */
@@ -1932,7 +2057,7 @@ static void push2worker(event_conn_state_t *cs, timer_event_t *te,
* the situation settles down. The listener and new idling workers will
* test for should_enable_listensocks() to recover (when suitable).
*/
- if (connections_above_limit(NULL)) {
+ if (connections_above_limit()) {
disable_listensocks();
if (above_limit) {
*above_limit = 1;
@@ -4525,6 +4650,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
max_spare_threads = max_workers;
}
+ workers_backlog_limit = threads_per_child * async_factor;
+
return OK;
}
From 94baa05601f6c0ea936ae42c18f2acd923859091 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Mon, 8 Jul 2024 19:19:22 +0200
Subject: [PATCH 19/22] mod_ssl: Nonblocking/async handshakes in
CONN_STATE_PROCESSING phase.
If AP_MPMQ_CAN_WAITIO, make mod_ssl perform non blocking TLS handshakes and
go async when it would block.
---
changes-entries/mod_ssl_async_handshakes.txt | 1 +
modules/ssl/mod_ssl.c | 66 ++++++++++++++++----
modules/ssl/ssl_engine_io.c | 59 ++++++++++++++---
modules/ssl/ssl_private.h | 7 +++
4 files changed, 112 insertions(+), 21 deletions(-)
create mode 100644 changes-entries/mod_ssl_async_handshakes.txt
diff --git a/changes-entries/mod_ssl_async_handshakes.txt b/changes-entries/mod_ssl_async_handshakes.txt
new file mode 100644
index 00000000000..e19eeb629de
--- /dev/null
+++ b/changes-entries/mod_ssl_async_handshakes.txt
@@ -0,0 +1 @@
+ *) mod_ssl: Perform non blocking and async TLS handshakes. [Graham Leggett]
diff --git a/modules/ssl/mod_ssl.c b/modules/ssl/mod_ssl.c
index 420ae6b79ac..5cae44a64a8 100644
--- a/modules/ssl/mod_ssl.c
+++ b/modules/ssl/mod_ssl.c
@@ -29,6 +29,7 @@
#include "util_md5.h"
#include "util_mutex.h"
#include "ap_provider.h"
+#include "ap_mpm.h"
#include "http_config.h"
#include "mod_proxy.h" /* for proxy_hook_section_post_config() */
@@ -40,6 +41,8 @@
int ssl_running_on_valgrind = 0;
#endif
+static int mpm_can_waitio = 0;
+
#if HAVE_OPENSSL_INIT_SSL || (OPENSSL_VERSION_NUMBER >= 0x10100000L && \
!defined(LIBRESSL_VERSION_NUMBER))
/* Openssl v1.1+ handles all termination automatically from
@@ -464,6 +467,16 @@ static int ssl_hook_pre_config(apr_pool_t *pconf,
return OK;
}
+static int ssl_hook_post_config(apr_pool_t *pconf, apr_pool_t *plog,
+ apr_pool_t *ptemp, server_rec *s)
+{
+ if (ap_mpm_query(AP_MPMQ_CAN_WAITIO, &mpm_can_waitio) != APR_SUCCESS) {
+ mpm_can_waitio = 0;
+ }
+
+ return OK;
+}
+
static SSLConnRec *ssl_init_connection_ctx(conn_rec *c,
ap_conf_vector_t *per_dir_config,
int reinit)
@@ -692,8 +705,9 @@ static int ssl_hook_pre_connection(conn_rec *c, void *csd)
static int ssl_hook_process_connection(conn_rec* c)
{
SSLConnRec *sslconn = myConnConfig(c);
+ int status = DECLINED;
- if (sslconn && !sslconn->disabled) {
+ if (sslconn && !sslconn->disabled && !sslconn->initialized) {
/* On an active SSL connection, let the input filters initialize
* themselves which triggers the handshake, which again triggers
* all kinds of useful things such as SNI and ALPN.
@@ -701,23 +715,50 @@ static int ssl_hook_process_connection(conn_rec* c)
apr_bucket_brigade* temp;
apr_status_t rv;
- temp = apr_brigade_create(c->pool, c->bucket_alloc);
- rv = ap_get_brigade(c->input_filters, temp,
- AP_MODE_INIT, APR_BLOCK_READ, 0);
- apr_brigade_destroy(temp);
-
- if (APR_SUCCESS != APR_SUCCESS) {
+ temp = ap_acquire_brigade(c);
+ rv = ap_get_brigade(c->input_filters, temp, AP_MODE_INIT,
+ mpm_can_waitio ? APR_NONBLOCK_READ : APR_BLOCK_READ,
+ 0);
+ ap_release_brigade(c, temp);
+
+ if (rv == APR_SUCCESS) {
+ /* great news, lets continue */
+ ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(10370)
+ "SSL handshake completed, continuing");
+ sslconn->initialized = 1;
+ }
+ else if (rv == MODSSL_ERROR_HTTP_ON_HTTPS) {
+ /* Plain HTTP spoken on https port, mod_ssl wants to be called
+ * without AP_MODE_INIT.
+ */
+ ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(10371)
+ "SSL handshake with plain HTTP, continuing");
+ sslconn->initialized = 1;
+ }
+ else if (mpm_can_waitio && APR_STATUS_IS_EAGAIN(rv)) {
+ /* Take advantage of an async MPM. If we see an EAGAIN,
+ * loop round and don't block.
+ */
+ ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(10372)
+ "SSL handshake in progress, try again later");
if (c->cs) {
- c->cs->state = CONN_STATE_LINGER;
+ c->cs->state = CONN_STATE_ASYNC_WAITIO;
}
- ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, c, APLOGNO(10373)
+ status = OK;
+ }
+ else {
+ /* we failed, give up */
+ ap_log_cerror(APLOG_MARK, APLOG_INFO, rv, c, APLOGNO(10373)
"SSL handshake was not completed, "
"closing connection");
- return OK;
+ if (c->cs) {
+ c->cs->state = CONN_STATE_LINGER;
+ }
+ status = OK;
}
}
-
- return DECLINED;
+
+ return status;
}
/*
@@ -746,6 +787,7 @@ static void ssl_register_hooks(apr_pool_t *p)
ap_hook_http_scheme (ssl_hook_http_scheme, NULL,NULL, APR_HOOK_MIDDLE);
ap_hook_default_port (ssl_hook_default_port, NULL,NULL, APR_HOOK_MIDDLE);
ap_hook_pre_config (ssl_hook_pre_config, NULL,NULL, APR_HOOK_MIDDLE);
+ ap_hook_post_config (ssl_hook_post_config, NULL,NULL, APR_HOOK_MIDDLE);
ap_hook_child_init (ssl_init_Child, NULL,NULL, APR_HOOK_MIDDLE);
ap_hook_post_read_request(ssl_hook_ReadReq, pre_prr,NULL, APR_HOOK_MIDDLE);
ap_hook_check_access (ssl_hook_Access, NULL,NULL, APR_HOOK_MIDDLE,
diff --git a/modules/ssl/ssl_engine_io.c b/modules/ssl/ssl_engine_io.c
index 3a2e841ae02..06ebeac2247 100644
--- a/modules/ssl/ssl_engine_io.c
+++ b/modules/ssl/ssl_engine_io.c
@@ -292,6 +292,7 @@ typedef struct {
} char_buffer_t;
typedef struct {
+ conn_rec *c;
SSL *ssl;
BIO *bio_out;
ap_filter_t *f;
@@ -730,6 +731,32 @@ static apr_status_t ssl_io_input_read(bio_filter_in_ctx_t *inctx,
* (This is usually the case when the client forces an SSL
* renegotiation which is handled implicitly by OpenSSL.)
*/
+ if (inctx->c->cs) {
+ inctx->c->cs->sense = CONN_SENSE_WANT_READ;
+ }
+ inctx->rc = APR_EAGAIN;
+
+ if (*len > 0) {
+ inctx->rc = APR_SUCCESS;
+ break;
+ }
+ if (inctx->block == APR_NONBLOCK_READ) {
+ break;
+ }
+ continue; /* Blocking and nothing yet? Try again. */
+ }
+ if (ssl_err == SSL_ERROR_WANT_WRITE) {
+ /*
+ * If OpenSSL wants to write during read, and we were
+ * nonblocking, report as an EAGAIN. Otherwise loop,
+ * pulling more data from network filter.
+ *
+ * (This is usually the case when the client forces an SSL
+ * renegotiation which is handled implicitly by OpenSSL.)
+ */
+ if (inctx->c->cs) {
+ inctx->c->cs->sense = CONN_SENSE_WANT_WRITE;
+ }
inctx->rc = APR_EAGAIN;
if (*len > 0) {
@@ -895,7 +922,9 @@ static apr_status_t ssl_filter_write(ap_filter_t *f,
* (This is usually the case when the client forces an SSL
* renegotiation which is handled implicitly by OpenSSL.)
*/
- outctx->c->cs->sense = CONN_SENSE_WANT_READ;
+ if (outctx->c->cs) {
+ outctx->c->cs->sense = CONN_SENSE_WANT_READ;
+ }
outctx->rc = APR_EAGAIN;
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, outctx->c,
"Want read during nonblocking write");
@@ -950,10 +979,6 @@ static apr_status_t ssl_filter_write(ap_filter_t *f,
sizeof(HTTP_ON_HTTPS_PORT) - 1, \
alloc)
-/* Custom apr_status_t error code, used when a plain HTTP request is
- * received on an SSL port. */
-#define MODSSL_ERROR_HTTP_ON_HTTPS (APR_OS_START_USERERR + 0)
-
/* Custom apr_status_t error code, used when the proxy cannot
* establish an outgoing SSL connection. */
#define MODSSL_ERROR_BAD_GATEWAY (APR_OS_START_USERERR + 1)
@@ -989,7 +1014,7 @@ static apr_status_t ssl_io_filter_error(bio_filter_in_ctx_t *inctx,
f->c->keepalive = AP_CONN_CLOSE;
if (is_init) {
sslconn->non_ssl_request = NON_SSL_SEND_REQLINE;
- return AP_FILTER_ERROR;
+ return MODSSL_ERROR_HTTP_ON_HTTPS;
}
sslconn->non_ssl_request = NON_SSL_SEND_HDR_SEP;
@@ -1424,10 +1449,25 @@ static apr_status_t ssl_io_filter_handshake(ssl_filter_ctx_t *filter_ctx)
}
else if (ssl_err == SSL_ERROR_WANT_READ) {
/*
- * This is in addition to what was present earlier. It is
- * borrowed from openssl_state_machine.c [mod_tls].
- * TBD.
+ * Call us back when ready to read *\/
*/
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, outctx->c,
+ "Want read during nonblocking accept");
+ if (outctx->c->cs) {
+ outctx->c->cs->sense = CONN_SENSE_WANT_READ;
+ }
+ outctx->rc = APR_EAGAIN;
+ return APR_EAGAIN;
+ }
+ else if (ssl_err == SSL_ERROR_WANT_WRITE) {
+ /*
+ * Call us back when ready to write *\/
+ */
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, outctx->c,
+ "Want write during nonblocking accept");
+ if (outctx->c->cs) {
+ outctx->c->cs->sense = CONN_SENSE_WANT_WRITE;
+ }
outctx->rc = APR_EAGAIN;
return APR_EAGAIN;
}
@@ -2230,6 +2270,7 @@ static apr_status_t ssl_io_input_add_filter(ssl_filter_ctx_t *filter_ctx, conn_r
}
BIO_set_data(filter_ctx->pbioRead, (void *)inctx);
+ inctx->c = c;
inctx->ssl = ssl;
inctx->bio_out = filter_ctx->pbioWrite;
inctx->f = filter_ctx->pInputFilter;
diff --git a/modules/ssl/ssl_private.h b/modules/ssl/ssl_private.h
index 2f7bb51fa5a..dc2f4f0d98b 100644
--- a/modules/ssl/ssl_private.h
+++ b/modules/ssl/ssl_private.h
@@ -367,6 +367,12 @@ APLOG_USE_MODULE(ssl);
#define mySrvConfigFromConn(c) mySrvConfig(mySrvFromConn(c))
#define myModConfigFromConn(c) myModConfig(mySrvFromConn(c))
+/**
+ * Custom apr_status_t error code, used when a plain HTTP request is
+ * received on an SSL port.
+ */
+#define MODSSL_ERROR_HTTP_ON_HTTPS (APR_OS_START_USERERR + 0)
+
/**
* Defaults for the configuration
*/
@@ -582,6 +588,7 @@ typedef struct {
const char *verify_info;
const char *verify_error;
int verify_depth;
+ int initialized;
int disabled;
enum {
NON_SSL_OK = 0, /* is SSL request, or error handling completed */
From 6cbda1f1fa81fc7fdfdeeb8e45619978c1f8950f Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 9 Jul 2024 11:37:58 +0200
Subject: [PATCH 20/22] core,http: Non blocking HTTP header read.
---
include/http_protocol.h | 75 +++-
include/httpd.h | 3 +
include/mod_core.h | 8 +-
modules/http/http_core.c | 43 +-
modules/http/http_filters.c | 158 ++++---
modules/http2/h2_stream.c | 2 +-
modules/proxy/mod_proxy_http.c | 6 +-
server/core.c | 2 +-
server/core_filters.c | 71 ++--
server/protocol.c | 753 +++++++++++++++++----------------
10 files changed, 647 insertions(+), 474 deletions(-)
diff --git a/include/http_protocol.h b/include/http_protocol.h
index 2b509b341fe..0290abef450 100644
--- a/include/http_protocol.h
+++ b/include/http_protocol.h
@@ -54,19 +54,30 @@ AP_DECLARE_DATA extern ap_filter_rec_t *ap_old_write_func;
*/
/**
- * Read an empty request and set reasonable defaults.
+ * Create an empty request and set reasonable defaults.
* @param c The current connection
* @return The new request_rec
*/
AP_DECLARE(request_rec *) ap_create_request(conn_rec *c);
/**
- * Read a request and fill in the fields.
+ * Read the request line and header fields.
* @param c The current connection
* @return The new request_rec
*/
AP_DECLARE(request_rec *) ap_read_request(conn_rec *c);
+/**
+ * Read the request line and header fields, possibly non-blocking.
+ * @param r The request read
+ * @param c The connection to read from
+ * @param block How the read should be performed
+ * ::APR_BLOCK_READ, ::APR_NONBLOCK_READ
+ * @return APR_SUCCESS, APR_EAGAIN or APR_EGENERAL
+ */
+AP_DECLARE(apr_status_t) ap_read_request_ex(request_rec **r, conn_rec *c,
+ apr_read_type_e block);
+
/**
* Assign the method, uri and protocol (in HTTP/1.x the
* items from the first line) to the request.
@@ -107,6 +118,12 @@ AP_DECLARE(int) ap_parse_request_line(request_rec *r);
*/
AP_DECLARE(int) ap_check_request_header(request_rec *r);
+/**
+ * Reentrant state for ap_fgetline_ex() and ap_get_mime_headers_ex()
+ */
+struct ap_getline_state; /* opaque */
+typedef struct ap_getline_state ap_getline_state_t;
+
/**
* Read the mime-encoded headers.
* @param r The current request
@@ -122,6 +139,23 @@ AP_DECLARE(void) ap_get_mime_headers(request_rec *r);
AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r,
apr_bucket_brigade *bb);
+/**
+ * Reentrant version of ap_get_mime_headers() reading from an input
+ * filter in blocking or non-blocking mode.
+ * @param r The current request
+ * @param f Input filter to read from
+ * @param block How the operations should be performed
+ * ::APR_BLOCK_READ, ::APR_NONBLOCK_READ
+ * @param bb temp brigade
+ * @param state_p State of the parsing, must point to NULL on first call
+ * and points to NULL on output if APR_EAGAIN is not returned
+ */
+AP_DECLARE(apr_status_t) ap_get_mime_headers_ex(request_rec *r,
+ ap_filter_t *f,
+ apr_read_type_e block,
+ apr_bucket_brigade *bb,
+ ap_getline_state_t **state_p);
+
/**
* Run post_read_request hook and validate.
* @param r The current request
@@ -744,11 +778,13 @@ AP_DECLARE(apr_status_t) ap_get_basic_auth_components(const request_rec *r,
*/
AP_CORE_DECLARE(void) ap_parse_uri(request_rec *r, const char *uri);
-#define AP_GETLINE_FOLD (1 << 0) /* Whether to merge continuation lines */
-#define AP_GETLINE_CRLF (1 << 1) /* Whether line ends must be CRLF */
-#define AP_GETLINE_NOSPC_EOL (1 << 2) /* Whether to consume up to and including
- the end of line on APR_ENOSPC */
-#define AP_GETLINE_NONBLOCK (1 << 3) /* Whether to read non-blocking */
+#define AP_GETLINE_FOLD (1 << 0) /* Whether to merge continuation lines */
+#define AP_GETLINE_CRLF (1 << 1) /* Whether line ends must be CRLF */
+#define AP_GETLINE_NOSPC_EOL (1 << 2) /* Whether to consume up to and including
+ the end of line on APR_ENOSPC */
+#define AP_GETLINE_NONBLOCK (1 << 3) /* Whether to read non-blocking */
+#define AP_GETLINE_ALLOC (1 << 4) /* Whether to allocate the returned line */
+#define AP_GETLINE_FOLD_COL (1 << 5 | AP_GETLINE_FOLD) /* Fold after colon only */
/**
* Get the next line of input for the request
@@ -783,6 +819,31 @@ AP_DECLARE(apr_status_t) ap_fgetline(char **s, apr_size_t n,
int flags, apr_bucket_brigade *bb,
apr_pool_t *p);
+/**
+ * Get the next line from an input filter, reentrant (e.g. EAGAIN).
+ *
+ * @param s Pointer to the pointer to the buffer into which the line
+ * should be read; if *s==NULL, a buffer of the necessary size
+ * to hold the data will be allocated from \p p
+ * @param n The size of the buffer
+ * @param read The length of the line.
+ * @param f Input filter to read from
+ * @param flags Bit mask of AP_GETLINE_* options
+ * @param bb Working brigade to use when reading buckets
+ * @param state_p State of the parsing, must point to NULL on first call
+ * and points to NULL on output if APR_EAGAIN is not returned
+ * @param p The pool to allocate the buffer from (if needed)
+ * @return APR_SUCCESS, if successful
+ * APR_ENOSPC, if the line is too big to fit in the buffer
+ * APR_EAGAIN, if non-blocking IO would block
+ * Other errors where appropriate
+ */
+AP_DECLARE(apr_status_t) ap_fgetline_ex(char **s, apr_size_t n,
+ apr_size_t *read, ap_filter_t *f,
+ int flags, apr_bucket_brigade *bb,
+ ap_getline_state_t **state_p,
+ apr_pool_t *p);
+
/**
* @see ap_fgetline
*
diff --git a/include/httpd.h b/include/httpd.h
index c3f72fceb7e..ae08740b227 100644
--- a/include/httpd.h
+++ b/include/httpd.h
@@ -1315,6 +1315,9 @@ struct conn_rec {
int async_filter;
int outgoing;
+
+ /** Partial request being read (non-blocking) */
+ request_rec *partial_request;
};
struct conn_slave_rec {
diff --git a/include/mod_core.h b/include/mod_core.h
index f9cc0611f4c..b4a40de2d5d 100644
--- a/include/mod_core.h
+++ b/include/mod_core.h
@@ -41,7 +41,7 @@ extern "C" {
/* Handles for core filters */
AP_DECLARE_DATA extern ap_filter_rec_t *ap_http_input_filter_handle;
-AP_DECLARE_DATA extern ap_filter_rec_t *ap_h1_request_in_filter_handle;
+AP_DECLARE_DATA extern ap_filter_rec_t *ap_h1_header_in_filter_handle;
AP_DECLARE_DATA extern ap_filter_rec_t *ap_h1_body_in_filter_handle;
AP_DECLARE_DATA extern ap_filter_rec_t *ap_http_header_filter_handle;
AP_DECLARE_DATA extern ap_filter_rec_t *ap_chunk_filter_handle;
@@ -55,9 +55,9 @@ apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes);
-apr_status_t ap_h1_request_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
- ap_input_mode_t mode, apr_read_type_e block,
- apr_off_t readbytes);
+apr_status_t ap_h1_header_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
+ ap_input_mode_t mode, apr_read_type_e block,
+ apr_off_t readbytes);
apr_status_t ap_h1_body_in_filter(ap_filter_t *f, apr_bucket_brigade *b,
ap_input_mode_t mode, apr_read_type_e block,
diff --git a/modules/http/http_core.c b/modules/http/http_core.c
index 85858ab2b57..7e9f82f87dd 100644
--- a/modules/http/http_core.c
+++ b/modules/http/http_core.c
@@ -37,7 +37,7 @@
/* Handles for core filters */
AP_DECLARE_DATA ap_filter_rec_t *ap_http_input_filter_handle;
-AP_DECLARE_DATA ap_filter_rec_t *ap_h1_request_in_filter_handle;
+AP_DECLARE_DATA ap_filter_rec_t *ap_h1_header_in_filter_handle;
AP_DECLARE_DATA ap_filter_rec_t *ap_h1_body_in_filter_handle;
AP_DECLARE_DATA ap_filter_rec_t *ap_http_header_filter_handle;
AP_DECLARE_DATA ap_filter_rec_t *ap_h1_response_out_filter_handle;
@@ -50,7 +50,8 @@ AP_DECLARE_DATA const char *ap_multipart_boundary;
/* If we are using an MPM That Supports Async Connections,
* use a different processing function
*/
-static int async_mpm = 0;
+static int mpm_is_async = 0;
+static int mpm_can_waitio = 0;
static const char *set_keep_alive_timeout(cmd_parms *cmd, void *dummy,
const char *arg)
@@ -145,18 +146,34 @@ static int ap_process_http_async_connection(conn_rec *c)
AP_DEBUG_ASSERT(cs->state == CONN_STATE_PROCESSING);
if (cs->state == CONN_STATE_PROCESSING) {
+ apr_read_type_e block = APR_BLOCK_READ;
+ apr_status_t rv;
+
+ /* slave connections (i.e. h2_c2) not ready for WAITIO yet */
+ if (mpm_can_waitio && !c->master) {
+ block = APR_NONBLOCK_READ;
+ }
+
ap_update_child_status_from_conn(c->sbh, SERVER_BUSY_READ, c);
if (ap_extended_status) {
ap_set_conn_count(c->sbh, r, c->keepalives);
}
- if ((r = ap_read_request(c))) {
+
+ rv = ap_read_request_ex(&r, c, block);
+ if (APR_STATUS_IS_EAGAIN(rv)) {
+ cs->state = CONN_STATE_ASYNC_WAITIO;
+ return OK;
+ }
+ if (rv == APR_SUCCESS) {
if (r->status == HTTP_OK) {
cs->state = CONN_STATE_HANDLER;
+
if (ap_extended_status) {
ap_set_conn_count(c->sbh, r, c->keepalives + 1);
}
ap_update_child_status(c->sbh, SERVER_BUSY_WRITE, r);
ap_process_async_request(r);
+
/* After the call to ap_process_request, the
* request pool may have been deleted. We set
* r=NULL here to ensure that any dereference
@@ -168,7 +185,8 @@ static int ap_process_http_async_connection(conn_rec *c)
}
if (cs->state != CONN_STATE_WRITE_COMPLETION &&
- cs->state != CONN_STATE_SUSPENDED) {
+ cs->state != CONN_STATE_SUSPENDED &&
+ cs->state != CONN_STATE_LINGER) {
/* Something went wrong; close the connection */
cs->state = CONN_STATE_LINGER;
}
@@ -246,7 +264,7 @@ static int ap_process_http_sync_connection(conn_rec *c)
static int ap_process_http_connection(conn_rec *c)
{
- if (async_mpm && !c->clogging_input_filters) {
+ if (mpm_is_async && !c->clogging_input_filters) {
return ap_process_http_async_connection(c);
}
else {
@@ -276,7 +294,7 @@ static void h1_pre_read_request(request_rec *r, conn_rec *c)
if (!r->main && !r->prev
&& !strcmp(AP_PROTOCOL_HTTP1, ap_get_protocol(c))) {
if (r->proxyreq == PROXYREQ_NONE) {
- ap_add_input_filter_handle(ap_h1_request_in_filter_handle,
+ ap_add_input_filter_handle(ap_h1_header_in_filter_handle,
NULL, r, r->connection);
}
ap_add_output_filter_handle(ap_h1_response_out_filter_handle,
@@ -343,9 +361,14 @@ static int http_send_options(request_rec *r)
static int http_post_config(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *s)
{
apr_uint64_t val;
- if (ap_mpm_query(AP_MPMQ_IS_ASYNC, &async_mpm) != APR_SUCCESS) {
- async_mpm = 0;
+
+ if (ap_mpm_query(AP_MPMQ_IS_ASYNC, &mpm_is_async) != APR_SUCCESS) {
+ mpm_is_async = 0;
}
+ if (ap_mpm_query(AP_MPMQ_CAN_WAITIO, &mpm_can_waitio) != APR_SUCCESS) {
+ mpm_can_waitio = 0;
+ }
+
ap_random_insecure_bytes(&val, sizeof(val));
ap_multipart_boundary = apr_psprintf(p, "%0" APR_UINT64_T_HEX_FMT, val);
@@ -369,8 +392,8 @@ static void register_hooks(apr_pool_t *p)
ap_http_input_filter_handle =
ap_register_input_filter("HTTP_IN", ap_http_filter,
NULL, AP_FTYPE_PROTOCOL);
- ap_h1_request_in_filter_handle =
- ap_register_input_filter("HTTP1_REQUEST_IN", ap_h1_request_in_filter,
+ ap_h1_header_in_filter_handle =
+ ap_register_input_filter("HTTP1_HEADER_IN", ap_h1_header_in_filter,
NULL, AP_FTYPE_PROTOCOL);
ap_h1_body_in_filter_handle =
ap_register_input_filter("HTTP1_BODY_IN", ap_h1_body_in_filter,
diff --git a/modules/http/http_filters.c b/modules/http/http_filters.c
index 426fe2fcb97..d7667c8c361 100644
--- a/modules/http/http_filters.c
+++ b/modules/http/http_filters.c
@@ -264,9 +264,10 @@ static apr_status_t read_chunked_trailers(http_ctx_t *ctx, ap_filter_t *f,
apr_bucket *e;
request_rec *r = f->r;
apr_table_t *trailers;
- apr_table_t *saved_headers_in = r->headers_in;
+ apr_table_t *saved_headers_in;
int saved_status = r->status;
+ saved_headers_in = r->headers_in;
trailers = apr_table_make(r->pool, 5);
r->status = HTTP_OK;
r->headers_in = trailers;
@@ -2174,18 +2175,34 @@ typedef struct h1_request_ctx {
const char *method;
const char *uri;
const char *protocol;
+
+ /* parsing context */
+ ap_getline_state_t *getline_state;
+ apr_bucket_brigade *tmp_bb;
+ int num_blank_lines;
} h1_request_ctx;
-static apr_status_t read_request_line(h1_request_ctx *ctx, apr_bucket_brigade *bb)
+static apr_status_t read_request_line(h1_request_ctx *ctx,
+ ap_filter_t *f, apr_read_type_e block,
+ apr_bucket_brigade *bb)
{
- apr_size_t len;
- int num_blank_lines = DEFAULT_LIMIT_BLANK_LINES;
- core_server_config *conf = ap_get_core_module_config(ctx->r->server->module_config);
+ request_rec *r = ctx->r;
+ apr_size_t max_size = r->server->limit_req_line + 2 + 1; /* + CRLF + \0 */
+ core_server_config *conf = ap_get_core_module_config(r->server->module_config);
int strict = (conf->http_conformance != AP_HTTP_CONFORMANCE_UNSAFE);
+ int flags = AP_GETLINE_ALLOC;
apr_status_t rv;
+ if (strict) {
+ flags |= AP_GETLINE_CRLF;
+ }
+ if (block == APR_NONBLOCK_READ) {
+ flags |= AP_GETLINE_NONBLOCK;
+ }
+
/* Read past empty lines until we get a real request line,
* a read error, the connection closes (EOF), or we timeout.
+ * Reentrance on EAGAIN is handled in/by ctx->getline_state.
*
* We skip empty lines because browsers have to tack a CRLF on to the end
* of POSTs to support old CERN webservers. But note that we may not
@@ -2199,52 +2216,35 @@ static apr_status_t read_request_line(h1_request_ctx *ctx, apr_bucket_brigade *b
* have to block during a read.
*/
do {
- /* ensure ap_rgetline allocates memory each time thru the loop
- * if there are empty lines
- */
- ctx->request_line = NULL;
- len = 0;
- rv = ap_rgetline(&ctx->request_line, (apr_size_t)(ctx->r->server->limit_req_line + 2),
- &len, ctx->r, strict ? AP_GETLINE_CRLF : 0, bb);
+ apr_size_t len = 0;
+ /* allocates memory each time thru the loop */
+ rv = ap_fgetline_ex(&ctx->request_line, max_size, &len, f, flags,
+ bb, &ctx->getline_state, r->pool);
if (rv != APR_SUCCESS) {
return rv;
}
- else if (len > 0) {
- /* got the line in ctx->r->the_request */
+ if (len > 0) {
+ /* got full line */
return APR_SUCCESS;
}
- } while (--num_blank_lines >= 0);
+ } while (--ctx->num_blank_lines >= 0);
+
/* too many blank lines */
return APR_EINVAL;
}
-static void sanitize_brigade(apr_bucket_brigade *bb)
-{
- apr_bucket *e, *next;
-
- for (e = APR_BRIGADE_FIRST(bb);
- e != APR_BRIGADE_SENTINEL(bb);
- e = next)
- {
- next = APR_BUCKET_NEXT(e);
- if (!APR_BUCKET_IS_METADATA(e) && e->length == 0) {
- apr_bucket_delete(e);
- }
- }
-}
-
-apr_status_t ap_h1_request_in_filter(ap_filter_t *f,
- apr_bucket_brigade *bb,
- ap_input_mode_t mode,
- apr_read_type_e block,
- apr_off_t readbytes)
+apr_status_t ap_h1_header_in_filter(ap_filter_t *f,
+ apr_bucket_brigade *bb,
+ ap_input_mode_t mode,
+ apr_read_type_e block,
+ apr_off_t readbytes)
{
request_rec *r = f->r;
- apr_bucket *e;
h1_request_ctx *ctx = f->ctx;
apr_status_t rv = APR_SUCCESS;
int http_status = HTTP_OK;
+ apr_bucket *e;
/* just get out of the way for things we don't want to handle. */
if (mode != AP_MODE_READBYTES && mode != AP_MODE_GETLINE) {
@@ -2255,15 +2255,23 @@ apr_status_t ap_h1_request_in_filter(ap_filter_t *f,
f->ctx = ctx = apr_pcalloc(r->pool, sizeof(*ctx));
ctx->r = r;
ctx->state = REQ_LINE;
+ ctx->num_blank_lines = DEFAULT_LIMIT_BLANK_LINES;
+ ctx->tmp_bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
}
- /* This filter needs to get out of the way of read_request_line() */
- ap_remove_input_filter(f);
-
- while (APR_SUCCESS == rv) {
+ for (;;) {
switch (ctx->state) {
case REQ_LINE:
- if ((rv = read_request_line(ctx, bb)) != APR_SUCCESS) {
+ rv = read_request_line(ctx, f->next, block, ctx->tmp_bb);
+ apr_brigade_cleanup(ctx->tmp_bb);
+
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE6, rv, r,
+ "reading request line");
+ rv = APR_EAGAIN;
+ goto cleanup;
+ }
+ if (rv != APR_SUCCESS) {
/* certain failures are answered with a HTTP error bucket
* and are terminal for parsing a request */
ctx->method = ctx->uri = "-";
@@ -2280,60 +2288,76 @@ apr_status_t ap_h1_request_in_filter(ap_filter_t *f,
else if (APR_STATUS_IS_EINVAL(rv)) {
http_status = HTTP_BAD_REQUEST;
}
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, rv, r,
+ "failed reading request line (status %d)",
+ http_status != HTTP_OK ? http_status : -1);
goto cleanup;
}
if (!ap_h1_tokenize_request_line(r, ctx->request_line,
- &ctx->method, &ctx->uri, &ctx->protocol)) {
+ &ctx->method, &ctx->uri,
+ &ctx->protocol)) {
http_status = HTTP_BAD_REQUEST;
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "failed tokenizing request line, "
+ "returning error bucket %d",
+ http_status);
goto cleanup;
}
+
/* got the request line and it looked to contain what we need */
ctx->state = REQ_HEADERS;
break;
case REQ_HEADERS:
- ap_get_mime_headers_core(r, bb);
- if (r->status != HTTP_OK) {
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00567)
- "request failed: error reading the headers");
- http_status = r->status;
+ rv = ap_get_mime_headers_ex(r, f->next, block, ctx->tmp_bb,
+ &ctx->getline_state);
+ apr_brigade_cleanup(ctx->tmp_bb);
+
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE6, rv, r,
+ "reading request headers");
+ goto cleanup;
+ }
+ if (rv != APR_SUCCESS || r->status != HTTP_OK) {
+ http_status = (r->status == HTTP_OK
+ ? HTTP_INTERNAL_SERVER_ERROR
+ : r->status);
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, r, APLOGNO(00567)
+ "request failed: error reading the headers (%i)",
+ http_status);
+ r->status = HTTP_OK;
goto cleanup;
}
- /* clear the brigade, as ap_get_mime_headers_core() leaves the last
- * empty line in there, insert the REQUEST bucket and return */
- apr_brigade_cleanup(bb);
+
e = ap_bucket_request_createn(ctx->method, ctx->uri,
ctx->protocol, r->headers_in,
r->pool, r->connection->bucket_alloc);
- /* reading may leave 0 length data buckets in the brigade,
- * get rid of those. */
- sanitize_brigade(bb);
- APR_BRIGADE_INSERT_HEAD(bb, e);
- ctx->state = REQ_BODY;
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, rv, r,
+ APR_BRIGADE_INSERT_TAIL(bb, e);
+
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
"http1 request and headers parsed: %s %s %s",
ctx->method, ctx->uri, ctx->protocol);
- goto cleanup;
-
- case REQ_BODY:
- /* we should not come here */
- AP_DEBUG_ASSERT(0);
- rv = ap_get_brigade(f->next, bb, mode, block, readbytes);
+ /* Got the header, done with this filter */
+ ap_remove_input_filter(f);
+ ctx->state = REQ_BODY;
goto cleanup;
case REQ_ERROR:
- default:
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "invalid request read while in error");
rv = APR_EINVAL;
goto cleanup;
+
+ default:
+ /* we should never come here */
+ ap_assert(0);
+ break;
}
- } /* while(APR_SUCCESS == rv) */
+ }
cleanup:
if (http_status != HTTP_OK) {
- ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
- "failed reading request line, returning error bucket %d", http_status);
- apr_brigade_cleanup(bb);
e = ap_bucket_error_create(http_status, NULL, r->pool,
f->c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(bb, e);
diff --git a/modules/http2/h2_stream.c b/modules/http2/h2_stream.c
index ee87555f9f3..b050b4d962c 100644
--- a/modules/http2/h2_stream.c
+++ b/modules/http2/h2_stream.c
@@ -755,7 +755,7 @@ apr_status_t h2_stream_add_header(h2_stream *stream,
}
if (session->s->limit_req_fields > 0
- && stream->request_headers_added > session->s->limit_req_fields) {
+ && stream->request_headers_added >= session->s->limit_req_fields) {
/* already over limit, count this attempt, but do not take it in */
++stream->request_headers_added;
}
diff --git a/modules/proxy/mod_proxy_http.c b/modules/proxy/mod_proxy_http.c
index bfeee868558..38da5b0f7f6 100644
--- a/modules/proxy/mod_proxy_http.c
+++ b/modules/proxy/mod_proxy_http.c
@@ -888,10 +888,8 @@ static apr_status_t ap_proxy_read_headers(request_rec *r, request_rec *rr,
tmp_bb = apr_brigade_create(r->pool, c->bucket_alloc);
while (1) {
- rc = ap_proxygetline(tmp_bb, buffer, size, rr,
- AP_GETLINE_FOLD | AP_GETLINE_NOSPC_EOL, &len);
-
-
+ const int flags = AP_GETLINE_FOLD_COL;
+ rc = ap_proxygetline(tmp_bb, buffer, size, rr, flags, &len);
if (rc != APR_SUCCESS) {
if (APR_STATUS_IS_ENOSPC(rc)) {
int trunc = (len > 128 ? 128 : len) / 2;
diff --git a/server/core.c b/server/core.c
index 4d5d569d93b..632af394d8f 100644
--- a/server/core.c
+++ b/server/core.c
@@ -5551,7 +5551,7 @@ static conn_rec *core_create_conn(apr_pool_t *ptrans, server_rec *s,
c->id = id;
c->bucket_alloc = alloc;
c->async_filter = sconf->async_filter;
-
+ c->keepalive = AP_CONN_UNKNOWN;
c->clogging_input_filters = 0;
if (sconf->conn_log_level) {
diff --git a/server/core_filters.c b/server/core_filters.c
index 0887603b9ab..2dbc5afbb83 100644
--- a/server/core_filters.c
+++ b/server/core_filters.c
@@ -142,13 +142,18 @@ apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
if (mode == AP_MODE_GETLINE) {
/* we are reading a single LF line, e.g. the HTTP headers */
rv = apr_brigade_split_line(b, ctx->bb, block, HUGE_STRING_LEN);
- /* We should treat EAGAIN here the same as we do for EOF (brigade is
- * empty). We do this by returning whatever we have read. This may
- * or may not be bogus, but is consistent (for now) with EOF logic.
+
+ /* To distinguish EAGAIN from EOS (for which apr_brigade_split_line()
+ * returns an empty brigade), return an empty brigade only for the
+ * former and APR_EOF for the latter.
*/
if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
rv = APR_SUCCESS;
}
+ else if (rv == APR_SUCCESS && APR_BRIGADE_EMPTY(b)) {
+ AP_DEBUG_ASSERT(APR_BRIGADE_EMPTY(ctx->bb));
+ rv = APR_EOF;
+ }
goto cleanup;
}
@@ -234,31 +239,43 @@ apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
AP_DEBUG_ASSERT(readbytes > 0);
- e = APR_BRIGADE_FIRST(ctx->bb);
- rv = apr_bucket_read(e, &str, &len, block);
- if (rv != APR_SUCCESS) {
- if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ do {
+ e = APR_BRIGADE_FIRST(ctx->bb);
+ rv = apr_bucket_read(e, &str, &len, block);
+ if (rv != APR_SUCCESS) {
/* getting EAGAIN for a blocking read is an error; not for a
- * non-blocking read, return an empty brigade. */
- rv = APR_SUCCESS;
+ * non-blocking read, return an empty brigade w/ APR_SUCCESS */
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ rv = APR_SUCCESS;
+ }
+ goto cleanup;
}
- goto cleanup;
- }
- else if (block == APR_BLOCK_READ && len == 0) {
- /* We wanted to read some bytes in blocking mode. We read
- * 0 bytes. Hence, we now assume we are EOS.
- *
- * When we are in normal mode, return an EOS bucket to the
- * caller.
- * When we are in speculative mode, leave ctx->bb empty, so
- * that the next call returns an EOS bucket.
- */
- apr_bucket_delete(e);
+ if (len > 0) {
+ break;
+ }
+ if (APR_BUCKET_IS_METADATA(e)) {
+ APR_BUCKET_REMOVE(e);
+ APR_BRIGADE_INSERT_TAIL(b, e);
+ }
+ else {
+ apr_bucket_delete(e);
+ }
+ } while (!APR_BRIGADE_EMPTY(ctx->bb));
- if (mode == AP_MODE_READBYTES) {
+ if (len == 0) {
+ /* We are at EOS.
+ * In normal blocking mode, return an EOS bucket.
+ * Otherwise it's not expected by the caller, so return APR_EOF
+ * directly.
+ */
+ AP_DEBUG_ASSERT(APR_BRIGADE_EMPTY(ctx->bb));
+ if (mode == AP_MODE_READBYTES && block == APR_BLOCK_READ) {
e = apr_bucket_eos_create(c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(b, e);
}
+ else if (APR_BRIGADE_EMPTY(b)) {
+ rv = APR_EOF;
+ }
goto cleanup;
}
@@ -266,7 +283,7 @@ apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
if (len < readbytes) {
apr_size_t bucket_len;
- /* We already registered the data in e in len */
+ /* We already accounted for e in len */
e = APR_BUCKET_NEXT(e);
while ((len < readbytes) && (rv == APR_SUCCESS)
&& (e != APR_BRIGADE_SENTINEL(ctx->bb))) {
@@ -290,11 +307,11 @@ apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
}
}
}
- }
- /* We can only return at most what we read. */
- if (len < readbytes) {
- readbytes = len;
+ /* We can only return at most what we read. */
+ if (len < readbytes) {
+ readbytes = len;
+ }
}
rv = apr_brigade_partition(ctx->bb, readbytes, &e);
diff --git a/server/protocol.c b/server/protocol.c
index 9ac4e3fe929..e0334722cda 100644
--- a/server/protocol.c
+++ b/server/protocol.c
@@ -61,6 +61,10 @@
#undef APLOG_MODULE_INDEX
#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
+#ifndef AP_ASCII_COLON
+#define AP_ASCII_COLON '\x3a'
+#endif
+
APR_HOOK_STRUCT(
APR_HOOK_LINK(pre_read_request)
APR_HOOK_LINK(post_read_request)
@@ -210,55 +214,66 @@ AP_DECLARE(apr_time_t) ap_rationalize_mtime(request_rec *r, apr_time_t mtime)
* If no LF is detected on the last line due to a dropped connection
* or a full buffer, that's considered an error.
*/
-static apr_status_t ap_fgetline_core(char **s, apr_size_t n,
- apr_size_t *read, ap_filter_t *f,
- int flags, apr_bucket_brigade *bb,
- apr_pool_t *p)
+enum folding_state_e {
+ NOT_FOLDING = 0,
+ FOLDING_FIND,
+ FOLDING_READ,
+ FOLDING_DONE,
+};
+struct ap_getline_state {
+ char *buf;
+ apr_size_t len;
+ apr_size_t max_size;
+ apr_size_t alloc_size;
+ apr_size_t folding_len;
+ enum folding_state_e folding_state;
+ unsigned int folding_col :1,
+ allocate :1,
+ reusable :1;
+};
+static apr_status_t ap_fgetline_core(ap_getline_state_t *state,
+ ap_filter_t *f, int flags,
+ apr_bucket_brigade *bb,
+ apr_pool_t *p,
+ int rec)
{
apr_status_t rv;
- apr_bucket *e;
- apr_size_t bytes_handled = 0, current_alloc = 0;
- char *pos, *last_char = *s;
- int do_alloc = (*s == NULL), saw_eos = 0;
+ apr_read_type_e block;
int fold = flags & AP_GETLINE_FOLD;
int crlf = flags & AP_GETLINE_CRLF;
+ int do_alloc = (flags & AP_GETLINE_ALLOC) || state->allocate;
int nospc_eol = flags & AP_GETLINE_NOSPC_EOL;
- int saw_eol = 0, saw_nospc = 0;
- apr_read_type_e block;
+ apr_status_t late_rv = APR_SUCCESS;
+ int seen_eol = 0, seen_nospc = 0;
+ apr_bucket *e;
- if (!n) {
+ state->reusable = 0; /* until further notice */
+
+ if (state->max_size == 0) {
/* Needs room for NUL byte at least */
- *read = 0;
return APR_BADARG;
}
block = (flags & AP_GETLINE_NONBLOCK) ? APR_NONBLOCK_READ
: APR_BLOCK_READ;
- /*
- * Initialize last_char as otherwise a random value will be compared
- * against APR_ASCII_LF at the end of the loop if bb only contains
- * zero-length buckets.
- */
- if (last_char)
- *last_char = '\0';
-
+ if (state->folding_state == FOLDING_FIND) {
+ /* EAGAIN looking up for folding line, continue there */
+ goto find_folding;
+ }
do {
apr_brigade_cleanup(bb);
rv = ap_get_brigade(f, bb, AP_MODE_GETLINE, block, 0);
if (rv != APR_SUCCESS) {
goto cleanup;
}
-
- /* Something horribly wrong happened. Someone didn't block!
- * (this also happens at the end of each keepalive connection)
- * (this also happens when non-blocking is asked too, not that wrong)
- */
if (APR_BRIGADE_EMPTY(bb)) {
- if (block != APR_NONBLOCK_READ) {
+ if (block == APR_BLOCK_READ) {
+ /* Something horribly wrong happened. Someone didn't block! */
rv = APR_EGENERAL;
}
else {
+ /* Non blocking (which would block) gets us here */
rv = APR_EAGAIN;
}
goto cleanup;
@@ -271,10 +286,10 @@ static apr_status_t ap_fgetline_core(char **s, apr_size_t n,
const char *str;
apr_size_t len;
- /* If we see an EOS, don't bother doing anything more. */
+ /* APR_EOF on EOS (CRLF is missing) */
if (APR_BUCKET_IS_EOS(e)) {
- saw_eos = 1;
- break;
+ rv = APR_EOF;
+ goto cleanup;
}
rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
@@ -282,6 +297,27 @@ static apr_status_t ap_fgetline_core(char **s, apr_size_t n,
goto cleanup;
}
+ /* If folding, trim leading blanks */
+ if (state->folding_state == FOLDING_READ && len > 0) {
+ size_t i;
+ for (i = 0; i < len; ++i) {
+ const char c = str[i];
+ if (c != APR_ASCII_BLANK && c != APR_ASCII_TAB) {
+ break;
+ }
+ }
+ state->folding_len += i;
+ ap_assert(state->folding_len > 0);
+ str += i;
+ len -= i;
+
+ /* Fail if the line is composed of blanks only */
+ if ((len > 0 && str[0] == APR_ASCII_LF)
+ || (len > 1 && str[0] == APR_ASCII_CR
+ && str[1] == APR_ASCII_LF)) {
+ late_rv = APR_EINVAL;
+ }
+ }
if (len == 0) {
/* no use attempting a zero-byte alloc (hurts when
* using --with-efence --enable-pool-debug) or
@@ -290,11 +326,13 @@ static apr_status_t ap_fgetline_core(char **s, apr_size_t n,
continue;
}
- /* Would this overrun our buffer? If so, we'll die. */
- if (n < bytes_handled + len) {
+ /* Would this exceed the limit? If so, we'll die. */
+ if (state->len + state->folding_len + len >= state->max_size) {
+ apr_size_t read_len = state->len + state->folding_len;
+
/* Before we die, let's fill the buffer up to its limit (i.e.
* fall through with the remaining length, if any), setting
- * saw_eol on LF to stop the outer loop appropriately; we may
+ * seen_eol on LF to stop the outer loop appropriately; we may
* come back here once the buffer is filled (no LF seen), and
* either be done at that time or continue to wait for LF here
* if nospc_eol is set.
@@ -306,248 +344,299 @@ static apr_status_t ap_fgetline_core(char **s, apr_size_t n,
* we have to handle the case so that it's not returned to the
* caller as part of the truncated line (it's not!). This is
* easier to consider that LF is out of counting and thus fall
- * through with no error (saw_eol is set to 2 so that we later
+ * through with no error (seen_eol is set to 2 so that we later
* ignore LF handling already done here), while folding and
* nospc_eol logics continue to work (or fail) appropriately.
*/
- saw_eol = (str[len - 1] == APR_ASCII_LF);
- if (/* First time around */
- saw_eol && !saw_nospc
- /* Single LF completing the buffered CR, */
- && ((len == 1 && ((*s)[bytes_handled - 1] == APR_ASCII_CR))
- /* or trailing CRLF overuns by LF only */
- || (len > 1 && str[len - 2] == APR_ASCII_CR
- && n - bytes_handled + 1 == len))) {
- /* In both cases *last_char is (to be) the CR stripped by
- * later 'bytes_handled = last_char - *s'.
- */
- saw_eol = 2;
+ seen_eol = (str[len - 1] == APR_ASCII_LF);
+ if (!seen_eol
+ || seen_nospc
+ || read_len + len != state->max_size) {
+ /* Some data lost */
+ late_rv = APR_ENOSPC;
+ seen_nospc = 1;
+ }
+ else if ((len == 1
+ && state->len > 0
+ && state->buf[state->len - 1] == APR_ASCII_CR)
+ || (len > 1 && str[len - 2] == APR_ASCII_CR)) {
+ /* CR[LF] is to be stripped */
+ seen_eol = 2;
}
else {
- /* In any other case we'd lose data. */
- rv = APR_ENOSPC;
- saw_nospc = 1;
+ /* Single LF to be stripped (or fail if AP_GETLINE_CRLF) */
+ AP_DEBUG_ASSERT(seen_eol == 1);
}
- len = n - bytes_handled;
- if (!len) {
- if (saw_eol) {
- break;
- }
- if (nospc_eol) {
- continue;
- }
- goto cleanup;
+
+ if (read_len + 1 >= state->max_size) {
+ /* Full, check loop condition */
+ continue;
}
+
+ /* Fall through (fill buf up to len) */
+ len = state->max_size - read_len - 1;
}
/* Do we have to handle the allocation ourselves? */
if (do_alloc) {
+ apr_size_t more_len = len + (state->folding_state == FOLDING_READ);
+
/* We'll assume the common case where one bucket is enough. */
- if (!*s) {
- current_alloc = len;
- *s = apr_palloc(p, current_alloc + 1);
+ if (state->buf == NULL) {
+ state->alloc_size = more_len + 1;
+ state->buf = apr_palloc(p, state->alloc_size);
}
- else if (bytes_handled + len > current_alloc) {
+ else if (state->len + more_len >= state->alloc_size) {
/* Increase the buffer size */
- apr_size_t new_size = current_alloc * 2;
+ apr_size_t new_size;
char *new_buffer;
- if (bytes_handled + len > new_size) {
- new_size = (bytes_handled + len) * 2;
+ if (state->alloc_size >= state->max_size / 2) {
+ new_size = state->max_size;
}
+ else {
+ new_size = state->alloc_size * 2;
+ if (state->len + more_len >= new_size) {
+ new_size = state->len + more_len + 1;
+ }
+ }
+ ap_assert(new_size > state->len + more_len);
- new_buffer = apr_palloc(p, new_size + 1);
+ new_buffer = apr_palloc(p, new_size);
/* Copy what we already had. */
- memcpy(new_buffer, *s, bytes_handled);
- current_alloc = new_size;
- *s = new_buffer;
+ memcpy(new_buffer, state->buf, state->len);
+ state->alloc_size = new_size;
+ state->buf = new_buffer;
}
}
- /* Just copy the rest of the data to the end of the old buffer. */
- pos = *s + bytes_handled;
- memcpy(pos, str, len);
- last_char = pos + len - 1;
-
- /* We've now processed that new data - update accordingly. */
- bytes_handled += len;
+ if (state->folding_state == FOLDING_READ) {
+ /* Replace all blanks with a single one. */
+ state->buf[state->len++] = APR_ASCII_BLANK;
+ state->folding_state = FOLDING_DONE;
+ }
+ /* Just copy new data to the end of the buffer. */
+ memcpy(state->buf + state->len, str, len);
+ state->len += len;
}
/* If we got a full line of input, stop reading */
- if (last_char && (*last_char == APR_ASCII_LF)) {
- saw_eol = 1;
+ if (state->len && state->buf[state->len - 1] == APR_ASCII_LF) {
+ seen_eol = 1;
}
- } while (!saw_eol);
+ } while (!seen_eol && (!seen_nospc || nospc_eol));
- if (rv != APR_SUCCESS) {
- /* End of line after APR_ENOSPC above */
+ if (late_rv != APR_SUCCESS) {
+ rv = late_rv;
+ goto cleanup;
+ }
+ if (state->folding_state == FOLDING_READ) {
+ /* Folding is blank only */
+ rv = APR_EINVAL;
goto cleanup;
}
/* Now terminate the string at the end of the line;
* if the last-but-one character is a CR, terminate there.
- * LF is handled above (not accounted) when saw_eol == 2,
+ * LF is handled above (not accounted) when seen_eol == 2,
* the last char is CR to terminate at still.
*/
- if (saw_eol < 2) {
- if (last_char > *s && last_char[-1] == APR_ASCII_CR) {
- last_char--;
+ state->len--;
+ if (seen_eol != 2) {
+ if (state->len && state->buf[state->len - 1] == APR_ASCII_CR) {
+ state->len--;
}
else if (crlf) {
rv = APR_EINVAL;
goto cleanup;
}
}
- bytes_handled = last_char - *s;
- /* If we're folding, we have more work to do.
+ /* If we have to search for folding, we have more work to do.
+ * If folding already, let the (recursive) caller loop for the next
+ * folding line if any and thus issue terminal recursions only.
*
- * Note that if an EOS was seen, we know we can't have another line.
+ * Note that if an empty line or an EOS was seen, we know we can't have
+ * another line.
*/
- if (fold && bytes_handled && !saw_eos) {
+ if (fold && !state->folding_state && state->len) {
+ state->folding_state = FOLDING_FIND;
+find_folding:
+ flags &= ~AP_GETLINE_FOLD;
for (;;) {
const char *str;
apr_size_t len;
- char c;
-
- /* Clear the temp brigade for this filter read. */
- apr_brigade_cleanup(bb);
+ char c = 0;
/* We only care about the first byte. */
+ apr_brigade_cleanup(bb);
rv = ap_get_brigade(f, bb, AP_MODE_SPECULATIVE, block, 1);
if (rv != APR_SUCCESS) {
goto cleanup;
}
-
if (APR_BRIGADE_EMPTY(bb)) {
+ if (block != APR_NONBLOCK_READ) {
+ rv = APR_EGENERAL;
+ }
+ else {
+ rv = APR_EAGAIN;
+ }
break;
}
+ do {
+ e = APR_BRIGADE_FIRST(bb);
- e = APR_BRIGADE_FIRST(bb);
-
- /* If we see an EOS, don't bother doing anything more. */
- if (APR_BUCKET_IS_EOS(e)) {
- break;
- }
-
- rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
- if (rv != APR_SUCCESS) {
- apr_brigade_cleanup(bb);
- goto cleanup;
- }
-
- /* Found one, so call ourselves again to get the next line.
- *
- * FIXME: If the folding line is completely blank, should we
- * stop folding? Does that require also looking at the next
- * char?
- */
- /* When we call destroy, the buckets are deleted, so save that
- * one character we need. This simplifies our execution paths
- * at the cost of one character read.
- */
- c = *str;
- if (c == APR_ASCII_BLANK || c == APR_ASCII_TAB) {
- /* Do we have enough space? We may be full now. */
- if (bytes_handled >= n) {
- rv = APR_ENOSPC;
+ /* APR_EOF on EOS (CRLF is missing) */
+ if (APR_BUCKET_IS_EOS(e)) {
+ rv = APR_EOF;
goto cleanup;
}
- else {
- apr_size_t next_size, next_len;
- char *tmp;
- /* If we're doing the allocations for them, we have to
- * give ourselves a NULL and copy it on return.
- */
- if (do_alloc) {
- tmp = NULL;
- }
- else {
- tmp = last_char;
- }
-
- next_size = n - bytes_handled;
-
- rv = ap_fgetline_core(&tmp, next_size, &next_len, f,
- flags & ~AP_GETLINE_FOLD, bb, p);
- if (rv != APR_SUCCESS) {
- goto cleanup;
- }
-
- if (do_alloc && next_len > 0) {
- char *new_buffer;
- apr_size_t new_size = bytes_handled + next_len + 1;
-
- /* we need to alloc an extra byte for a null */
- new_buffer = apr_palloc(p, new_size);
+ rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
+ }
+ if (len > 0) {
+ c = *str;
+ break;
+ }
- /* Copy what we already had. */
- memcpy(new_buffer, *s, bytes_handled);
+ apr_bucket_delete(e);
+ } while (!APR_BRIGADE_EMPTY(bb));
- /* copy the new line, including the trailing null */
- memcpy(new_buffer + bytes_handled, tmp, next_len);
- *s = new_buffer;
- }
+ if (APR_BRIGADE_EMPTY(bb)) {
+ /* No useful data, continue reading */
+ continue;
+ }
+ if (c != APR_ASCII_BLANK && c != APR_ASCII_TAB) {
+ /* Not a continuation line */
+ state->folding_state = NOT_FOLDING;
+ state->folding_col = 0;
+ break;
+ }
- last_char += next_len;
- bytes_handled += next_len;
+ /* Found one, may be allowed after a colon char only */
+ if ((flags & AP_GETLINE_FOLD_COL) && !state->folding_col) {
+ if (!memchr(state->buf, AP_ASCII_COLON, state->len)) {
+ rv = APR_EINVAL;
+ goto cleanup;
}
+ state->folding_col = 1;
}
- else { /* next character is not tab or space */
- break;
+
+ /* Before folding, trim trailing blanks */
+ while (state->len
+ && (state->buf[state->len - 1] == APR_ASCII_BLANK
+ || state->buf[state->len - 1] == APR_ASCII_TAB)) {
+ state->folding_len++;
+ state->len--;
+ }
+
+ /* Call ourselves again to get the next line. */
+ state->folding_state = FOLDING_READ;
+ rv = ap_fgetline_core(state, f, flags, bb, p, 1);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
+ state->folding_state = FOLDING_FIND;
}
}
cleanup:
- if (bytes_handled >= n) {
- bytes_handled = n - 1;
+ if (rec) {
+ /* On recursion, let the caller do the finalization */
+ return rv;
}
+ if (state->buf) {
+ apr_size_t len;
- *read = bytes_handled;
- if (*s) {
/* ensure the string is NUL terminated */
- (*s)[*read] = '\0';
+ state->buf[state->len] = '\0';
/* PR#43039: We shouldn't accept NULL bytes within the line */
- bytes_handled = strlen(*s);
- if (bytes_handled < *read) {
+ len = strlen(state->buf);
+ if (len < state->len) {
ap_log_data(APLOG_MARK, APLOG_DEBUG, ap_server_conf,
- "NULL bytes in header", *s, *read, 0);
- *read = bytes_handled;
+ "NULL bytes in header", state->buf, state->len, 0);
if (rv == APR_SUCCESS) {
rv = APR_EINVAL;
}
+ state->len = len;
}
}
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ state->reusable = 1;
+ rv = APR_EAGAIN;
+ }
+ apr_brigade_cleanup(bb);
return rv;
}
-AP_DECLARE(apr_status_t) ap_fgetline(char **s, apr_size_t n,
- apr_size_t *read, ap_filter_t *f,
- int flags, apr_bucket_brigade *bb,
- apr_pool_t *p)
+AP_DECLARE(apr_status_t) ap_fgetline_ex(char **s, apr_size_t n,
+ apr_size_t *read, ap_filter_t *f,
+ int flags, apr_bucket_brigade *bb,
+ ap_getline_state_t **state_p,
+ apr_pool_t *p)
{
apr_status_t rv;
-
- rv = ap_fgetline_core(s, n, read, f, flags, bb, p);
+ ap_getline_state_t *state = *state_p;
+#if APR_CHARSET_EBCDIC
+ apr_size_t prev_len = 0;
+#endif
+ if (!state || !state->reusable) {
+ if (!state) {
+ *state_p = state = apr_pcalloc(p, sizeof(*state));
+ }
+ else {
+ memset(state, 0, sizeof(*state));
+ }
+ if (*s && !(flags & AP_GETLINE_ALLOC)) {
+ state->buf = *s;
+ }
+ else {
+ state->allocate = 1;
+ *s = NULL;
+ }
+ state->max_size = n;
+ }
+#if APR_CHARSET_EBCDIC
+ else {
+ prev_len = state->len;
+ }
+#endif
+
+ rv = ap_fgetline_core(state, f, flags, bb, p, 0);
+
+ *s = state->buf;
+ *read = state->len;
#if APR_CHARSET_EBCDIC
/* On EBCDIC boxes, each complete http protocol input line needs to be
* translated into the code page used by the compiler. Since
* ap_fgetline_core uses recursion, we do the translation in a wrapper
* function to ensure that each input character gets translated only once.
*/
- if (*read) {
- ap_xlate_proto_from_ascii(*s, *read);
+ if (*read > prev_len) {
+ ap_xlate_proto_from_ascii(*s + prev_len, *read - prev_len);
}
#endif
return rv;
}
+AP_DECLARE(apr_status_t) ap_fgetline(char **s, apr_size_t n,
+ apr_size_t *read, ap_filter_t *f,
+ int flags, apr_bucket_brigade *bb,
+ apr_pool_t *p)
+{
+ ap_getline_state_t stack_state;
+ ap_getline_state_t *state = &stack_state;
+ state->reusable = 0;
+
+ return ap_fgetline_ex(s, n, read, f, flags, bb, &state, p);
+}
+
/* Same as ap_fgetline(), working on r's pool and protocol input filters.
* Pulls from r->proto_input_filters instead of r->input_filters for
* stricter protocol adherence and better input filter behavior during
@@ -557,22 +646,8 @@ AP_DECLARE(apr_status_t) ap_rgetline(char **s, apr_size_t n,
apr_size_t *read, request_rec *r,
int flags, apr_bucket_brigade *bb)
{
- apr_status_t rv;
-
- rv = ap_fgetline_core(s, n, read, r->proto_input_filters, flags,
- bb, r->pool);
-#if APR_CHARSET_EBCDIC
- /* On EBCDIC boxes, each complete http protocol input line needs to be
- * translated into the code page used by the compiler. Since
- * ap_fgetline_core uses recursion, we do the translation in a wrapper
- * function to ensure that each input character gets translated only once.
- */
- if (*read) {
- ap_xlate_proto_from_ascii(*s, *read);
- }
-#endif
-
- return rv;
+ return ap_fgetline(s, n, read, r->proto_input_filters,
+ flags, bb, r->pool);
}
AP_DECLARE(int) ap_getline(char *s, int n, request_rec *r, int flags)
@@ -790,30 +865,40 @@ static int table_do_fn_check_lengths(void *r_, const char *key,
return 0;
}
-AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb)
+AP_DECLARE(apr_status_t) ap_get_mime_headers_ex(request_rec *r,
+ ap_filter_t *f,
+ apr_read_type_e block,
+ apr_bucket_brigade *bb,
+ ap_getline_state_t **state_p)
{
- char *last_field = NULL;
- apr_size_t last_len = 0;
- apr_size_t alloc_len = 0;
- char *field;
- char *value;
- apr_size_t len;
- int fields_read = 0;
- char *tmp_field;
+ apr_status_t rv = APR_SUCCESS;
core_server_config *conf = ap_get_core_module_config(r->server->module_config);
int strict = (conf->http_conformance != AP_HTTP_CONFORMANCE_UNSAFE);
+ apr_size_t max_size = r->server->limit_req_fieldsize + 1;
+ int flags = AP_GETLINE_ALLOC | AP_GETLINE_FOLD_COL;
+ int fields_read = 0;
+
+ if (strict) {
+ flags |= AP_GETLINE_CRLF;
+ }
+ if (block == APR_NONBLOCK_READ) {
+ flags |= AP_GETLINE_NONBLOCK;
+ }
/*
* Read header lines until we get the empty separator line, a read error,
* the connection closes (EOF), reach the server limit, or we timeout.
*/
while(1) {
- apr_status_t rv;
-
- field = NULL;
- rv = ap_rgetline(&field, r->server->limit_req_fieldsize + 2,
- &len, r, strict ? AP_GETLINE_CRLF : 0, bb);
+ char *field = NULL;
+ apr_size_t len = 0;
+ /* max_size + 2 for CRLF */
+ rv = ap_fgetline_ex(&field, max_size + 2, &len, f, flags, bb,
+ state_p, r->pool);
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ goto cleanup;
+ }
if (rv != APR_SUCCESS) {
if (APR_STATUS_IS_TIMEUP(rv)) {
r->status = HTTP_REQUEST_TIME_OUT;
@@ -822,7 +907,7 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
r->status = HTTP_BAD_REQUEST;
}
- /* ap_rgetline returns APR_ENOSPC if it fills up the buffer before
+ /* ap_fgetline returns APR_ENOSPC if it fills up the buffer before
* finding the end-of-line. This is only going to happen if it
* exceeds the configured limit for a field size.
*/
@@ -837,7 +922,12 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
(field) ? field_name_len(field) : 0,
(field) ? field : "");
}
- return;
+ goto cleanup;
+ }
+
+ /* Found the terminating empty end-of-headers line, stop. */
+ if (len == 0) {
+ break;
}
/* For all header values, and all obs-fold lines, the presence of
@@ -849,82 +939,11 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
field[--len] = '\0';
}
- if (*field == '\t' || *field == ' ') {
-
- /* Append any newly-read obs-fold line onto the preceding
- * last_field line we are processing
- */
- apr_size_t fold_len;
-
- if (last_field == NULL) {
- r->status = HTTP_BAD_REQUEST;
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(03442)
- "Line folding encountered before first"
- " header line");
- return;
- }
-
- if (field[1] == '\0') {
- r->status = HTTP_BAD_REQUEST;
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(03443)
- "Empty folded line encountered");
- return;
- }
-
- /* Leading whitespace on an obs-fold line can be
- * similarly discarded */
- while (field[1] == '\t' || field[1] == ' ') {
- ++field; --len;
- }
-
- /* This line is a continuation of the preceding line(s),
- * so append it to the line that we've set aside.
- * Note: this uses a power-of-two allocator to avoid
- * doing O(n) allocs and using O(n^2) space for
- * continuations that span many many lines.
- */
- fold_len = last_len + len + 1; /* trailing null */
-
- if (fold_len >= (apr_size_t)(r->server->limit_req_fieldsize)) {
- r->status = HTTP_BAD_REQUEST;
- /* report what we have accumulated so far before the
- * overflow (last_field) as the field with the problem
- */
- apr_table_setn(r->notes, "error-notes",
- "Size of a request header field "
- "exceeds server limit.");
- ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00562)
- "Request header exceeds LimitRequestFieldSize "
- "after folding: %.*s",
- field_name_len(last_field), last_field);
- return;
- }
-
- if (fold_len > alloc_len) {
- char *fold_buf;
- alloc_len += alloc_len;
- if (fold_len > alloc_len) {
- alloc_len = fold_len;
- }
- fold_buf = (char *)apr_palloc(r->pool, alloc_len);
- memcpy(fold_buf, last_field, last_len);
- last_field = fold_buf;
- }
- memcpy(last_field + last_len, field, len +1); /* +1 for nul */
- /* Replace obs-fold w/ SP per RFC 7230 3.2.4 */
- last_field[last_len] = ' ';
- last_len += len;
-
- /* We've appended this obs-fold line to last_len, proceed to
- * read the next input line
- */
- continue;
- }
- else if (last_field != NULL) {
+ {
+ char *value;
- /* Process the previous last_field header line with all obs-folded
- * segments already concatenated (this is not operating on the
- * most recently read input line).
+ /* Process the header line with all obs-folded segments already
+ * concatenated.
*/
if (r->server->limit_req_fields
@@ -936,37 +955,40 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00563)
"Number of request headers exceeds "
"LimitRequestFields");
- return;
+ rv = APR_ENOSPC;
+ goto cleanup;
}
- if (!strict)
- {
+ if (!strict) {
/* Not Strict ('Unsafe' mode), using the legacy parser */
- if (!(value = strchr(last_field, ':'))) { /* Find ':' or */
+ if (!(value = strchr(field, ':'))) { /* Find ':' or */
r->status = HTTP_BAD_REQUEST; /* abort bad request */
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00564)
"Request header field is missing ':' "
"separator: %.*s", (int)LOG_NAME_MAX_LEN,
- last_field);
- return;
+ field);
+ rv = APR_EINVAL;
+ goto cleanup;
}
- if (value == last_field) {
+ if (value == field) {
r->status = HTTP_BAD_REQUEST;
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(03453)
"Request header field name was empty");
- return;
+ rv = APR_EINVAL;
+ goto cleanup;
}
*value++ = '\0'; /* NUL-terminate at colon */
- if (strpbrk(last_field, "\t\n\v\f\r ")) {
+ if (strpbrk(field, "\t\n\v\f\r ")) {
r->status = HTTP_BAD_REQUEST;
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(03452)
"Request header field name presented"
" invalid whitespace");
- return;
+ rv = APR_EINVAL;
+ goto cleanup;
}
while (*value == ' ' || *value == '\t') {
@@ -978,64 +1000,51 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(03451)
"Request header field value presented"
" bad whitespace");
- return;
+ rv = APR_EINVAL;
+ goto cleanup;
}
}
- else /* Using strict RFC7230 parsing */
- {
+ else {
+ /* Using strict RFC7230 parsing */
+
/* Ensure valid token chars before ':' per RFC 7230 3.2.4 */
- value = (char *)ap_scan_http_token(last_field);
- if ((value == last_field) || *value != ':') {
+ value = (char *)ap_scan_http_token(field);
+ if ((value == field) || *value != ':') {
r->status = HTTP_BAD_REQUEST;
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(02426)
"Request header field name is malformed: "
- "%.*s", (int)LOG_NAME_MAX_LEN, last_field);
- return;
+ "%.*s", (int)LOG_NAME_MAX_LEN, field);
+ rv = APR_EINVAL;
+ goto cleanup;
}
- *value++ = '\0'; /* NUL-terminate last_field name at ':' */
+ *value++ = '\0'; /* NUL-terminate field name at ':' */
while (*value == ' ' || *value == '\t') {
++value; /* Skip LWS of value */
}
- /* Find invalid, non-HT ctrl char, or the trailing NULL */
- tmp_field = (char *)ap_scan_http_field_content(value);
-
/* Reject value for all garbage input (CTRLs excluding HT)
* e.g. only VCHAR / SP / HT / obs-text are allowed per
* RFC7230 3.2.6 - leave all more explicit rule enforcement
* for specific header handler logic later in the cycle
*/
- if (*tmp_field != '\0') {
+ if (*ap_scan_http_field_content(value) != '\0') {
r->status = HTTP_BAD_REQUEST;
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(02427)
"Request header value is malformed: "
"%.*s", (int)LOG_NAME_MAX_LEN, value);
- return;
+ rv = APR_EINVAL;
+ goto cleanup;
}
}
- apr_table_addn(r->headers_in, last_field, value);
+ apr_table_addn(r->headers_in, field, value);
- /* This last_field header is now stored in headers_in,
+ /* This field header is now stored in headers_in,
* resume processing of the current input line.
*/
}
-
- /* Found the terminating empty end-of-headers line, stop. */
- if (len == 0) {
- break;
- }
-
- /* Keep track of this new header line so that we can extend it across
- * any obs-fold or parse it on the next loop iteration. We referenced
- * our previously allocated buffer in r->headers_in,
- * so allocate a fresh buffer if required.
- */
- alloc_len = 0;
- last_field = field;
- last_len = len;
}
/* Combine multiple message-header fields with the same
@@ -1045,14 +1054,25 @@ AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb
/* enforce LimitRequestFieldSize for merged headers */
apr_table_do(table_do_fn_check_lengths, r, r->headers_in, NULL);
+
+cleanup:
+ apr_brigade_cleanup(bb);
+ return rv;
+}
+
+AP_DECLARE(void) ap_get_mime_headers_core(request_rec *r, apr_bucket_brigade *bb)
+{
+ ap_getline_state_t *state = NULL;
+ (void)ap_get_mime_headers_ex(r, r->proto_input_filters, APR_BLOCK_READ,
+ bb, &state);
}
AP_DECLARE(void) ap_get_mime_headers(request_rec *r)
{
- apr_bucket_brigade *tmp_bb;
- tmp_bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
+ conn_rec *c = r->connection;
+ apr_bucket_brigade *tmp_bb = ap_acquire_brigade(c);
ap_get_mime_headers_core(r, tmp_bb);
- apr_brigade_destroy(tmp_bb);
+ ap_release_brigade(c, tmp_bb);
}
AP_DECLARE(request_rec *) ap_create_request(conn_rec *conn)
@@ -1305,23 +1325,42 @@ AP_DECLARE(int) ap_assign_request_line(request_rec *r,
AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
{
+ request_rec *r = NULL;
+ (void)ap_read_request_ex(&r, conn, APR_BLOCK_READ);
+ return r;
+}
+
+AP_DECLARE(apr_status_t) ap_read_request_ex(request_rec **out_r, conn_rec *conn,
+ apr_read_type_e block)
+{
+ apr_status_t rv;
int access_status;
apr_bucket_brigade *tmp_bb;
- apr_bucket *e, *bdata = NULL, *berr = NULL;
+ apr_bucket *e, *bdata = NULL;
+ ap_bucket_error *berr = NULL;
ap_bucket_request *breq = NULL;
const char *method, *uri, *protocol;
apr_table_t *headers;
- apr_status_t rv;
-
- request_rec *r = ap_create_request(conn);
+ request_rec *r;
- tmp_bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
- conn->keepalive = AP_CONN_UNKNOWN;
+ r = conn->partial_request;
+ if (conn->keepalive == AP_CONN_KEEPALIVE) {
+ conn->keepalive = AP_CONN_UNKNOWN;
+ }
+ if (!r) {
+ r = ap_create_request(conn);
+ ap_run_pre_read_request(r, conn);
+ r->request_time = apr_time_now();
+ }
- ap_run_pre_read_request(r, conn);
+ tmp_bb = ap_acquire_brigade(conn);
- r->request_time = apr_time_now();
- rv = ap_get_brigade(r->proto_input_filters, tmp_bb, AP_MODE_READBYTES, APR_BLOCK_READ, 0);
+ rv = ap_get_brigade(r->proto_input_filters, tmp_bb, AP_MODE_READBYTES, block, 0);
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ conn->partial_request = r;
+ r = NULL;
+ goto done;
+ }
if (rv != APR_SUCCESS || APR_BRIGADE_EMPTY(tmp_bb)) {
/* Not worth dying with. */
conn->keepalive = AP_CONN_CLOSE;
@@ -1337,7 +1376,7 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
if (!breq) breq = e->data;
}
else if (AP_BUCKET_IS_ERROR(e)) {
- if (!berr) berr = e;
+ if (!berr) berr = e->data;
}
else if (!APR_BUCKET_IS_METADATA(e) && e->length != 0) {
if (!bdata) bdata = e;
@@ -1345,16 +1384,11 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
}
}
- if (!breq && !berr) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10389)
- "request failed: neither request bucket nor error at start of input");
- access_status = HTTP_INTERNAL_SERVER_ERROR;
- goto die_unusable_input;
- }
-
+ /* If there is a request, we always process it, as it defines
+ * the context in which a potential error bucket is handled. */
if (breq) {
- /* If there is a request, we always process it, as it defines
- * the context in which a potential error bucket is handled. */
+ conn->partial_request = NULL;
+
if (apr_pool_is_ancestor(r->pool, breq->pool)) {
method = breq->method;
uri = breq->uri;
@@ -1369,8 +1403,7 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
}
if (!method || !uri || !protocol) {
- access_status = berr? ((ap_bucket_error *)(berr->data))->status :
- HTTP_INTERNAL_SERVER_ERROR;
+ access_status = berr ? berr->status : HTTP_INTERNAL_SERVER_ERROR;
goto die_unusable_input;
}
@@ -1414,20 +1447,31 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
goto ignore;
}
}
-
if (berr) {
- access_status = ((ap_bucket_error *)(berr->data))->status;
+ /* APLOG_ERR already raised by filters (eventually). */
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(10467)
+ "request failed: error %i at start of input",
+ berr->status);
+ access_status = berr->status;
goto die_unusable_input;
}
- else if (bdata) {
+ if (!breq) {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10389)
+ "request failed: neither request bucket nor error "
+ "at start of input");
+ access_status = HTTP_INTERNAL_SERVER_ERROR;
+ goto die_unusable_input;
+ }
+ if (bdata) {
/* Since processing of a request body depends on knowing the request, we
* cannot handle any data here. For example, chunked-encoding filters are
* added after the request is read, so any data buckets here will not
* have been de-chunked.
*/
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10391)
- "request failed: seeing DATA bucket(len=%d) of request "
- "body, too early to process", (int)bdata->length);
+ "request failed: seeing DATA bucket (len=%" APR_SIZE_T_FMT ") "
+ "of request body, too early to process",
+ bdata->length);
access_status = HTTP_INTERNAL_SERVER_ERROR;
goto die_unusable_input;
}
@@ -1480,7 +1524,9 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
AP_READ_REQUEST_SUCCESS((uintptr_t)r, (char *)r->method,
(char *)r->uri, (char *)r->server->defn_name,
r->status);
- return r;
+done:
+ ap_release_brigade(conn, tmp_bb);
+ return (*out_r = r) ? APR_SUCCESS : APR_EAGAIN;
/* Everything falls through on failure */
@@ -1523,9 +1569,10 @@ AP_DECLARE(request_rec *) ap_read_request(conn_rec *conn)
}
ignore:
- r = NULL;
+ ap_release_brigade(conn, tmp_bb);
+ *out_r = conn->partial_request = r = NULL;
AP_READ_REQUEST_FAILURE((uintptr_t)r);
- return NULL;
+ return APR_EGENERAL;
}
AP_DECLARE(int) ap_post_read_request(request_rec *r)
From 6eee3f3c338292f4782d71bd07094e90edfe18d7 Mon Sep 17 00:00:00 2001
From: ylavic
Date: Thu, 11 Jul 2024 15:24:36 +0200
Subject: [PATCH 21/22] mod_proxy,mpm_event: Replace
ap_mpm_register_poll_callback*() by ap_mpm_poll_suspended() to avoid races.
---
include/ap_mmn.h | 8 +-
include/ap_mpm.h | 51 +--
include/httpd.h | 2 +-
include/mpm_common.h | 18 +-
modules/http/http_core.c | 20 +-
modules/proxy/mod_proxy_http.c | 266 ++++++++++------
modules/proxy/mod_proxy_wstunnel.c | 220 +++++++------
modules/proxy/proxy_util.c | 2 +-
server/mpm/event/event.c | 487 ++++++++++++++++-------------
server/mpm_common.c | 39 +--
server/mpm_fdqueue.h | 1 -
11 files changed, 605 insertions(+), 509 deletions(-)
diff --git a/include/ap_mmn.h b/include/ap_mmn.h
index fb8f4512d47..aac4e1a3401 100644
--- a/include/ap_mmn.h
+++ b/include/ap_mmn.h
@@ -735,14 +735,18 @@
* ap_check_output_pending()
* 20211221.27 (2.5.1-dev) Add min_connection_timeout hook and
* ap_get_connection_timeout()
+ * 20211221.28 (2.5.1-dev) Add ap_mpm_poll_suspended() and
+ * AP_MPMQ_CAN_POLL_SUSPENDED
+ * 20240701.0 (2.5.1-dev) Axe ap_mpm_register_poll_callback and
+ * ap_mpm_register_poll_callback_timeout
*/
#define MODULE_MAGIC_COOKIE 0x41503235UL /* "AP25" */
#ifndef MODULE_MAGIC_NUMBER_MAJOR
-#define MODULE_MAGIC_NUMBER_MAJOR 20211221
+#define MODULE_MAGIC_NUMBER_MAJOR 20240701
#endif
-#define MODULE_MAGIC_NUMBER_MINOR 27 /* 0...n */
+#define MODULE_MAGIC_NUMBER_MINOR 0 /* 0...n */
/**
* Determine if the server's current MODULE_MAGIC_NUMBER is at least a
diff --git a/include/ap_mpm.h b/include/ap_mpm.h
index f2fd436d508..9a7ec6eeaa3 100644
--- a/include/ap_mpm.h
+++ b/include/ap_mpm.h
@@ -184,6 +184,8 @@ AP_DECLARE(apr_status_t) ap_os_create_privileged_process(
#define AP_MPMQ_CAN_POLL 18
/** MPM supports CONN_STATE_ASYNC_WAITIO */
#define AP_MPMQ_CAN_WAITIO 19
+/** MPM implements the poll_suspended hook */
+#define AP_MPMQ_CAN_POLL_SUSPENDED 20
/** @} */
/**
@@ -206,54 +208,13 @@ typedef void (ap_mpm_callback_fn_t)(void *baton);
/* only added support in the Event MPM.... check for APR_ENOTIMPL */
AP_DECLARE(apr_status_t) ap_mpm_resume_suspended(conn_rec *c);
/* only added support in the Event MPM.... check for APR_ENOTIMPL */
+AP_DECLARE(apr_status_t) ap_mpm_poll_suspended(conn_rec *c, apr_pool_t *p,
+ const apr_array_header_t *pfds,
+ apr_interval_time_t timeout);
+/* only added support in the Event MPM.... check for APR_ENOTIMPL */
AP_DECLARE(apr_status_t) ap_mpm_register_timed_callback(
apr_time_t t, ap_mpm_callback_fn_t *cbfn, void *baton);
-/**
- * Register a callback on the readability or writability on a group of
- * sockets/pipes.
- * @param p Pool used by the MPM for its internal allocations
- * @param pfds Array of apr_pollfd_t
- * @param cbfn The callback function
- * @param baton userdata for the callback function
- * @return APR_SUCCESS if all sockets/pipes could be added to a pollset,
- * APR_ENOTIMPL if no asynch support, or an apr_pollset_add error.
- * @remark When activity is found on any 1 socket/pipe in the list, all are removed
- * from the pollset and only 1 callback is issued.
- * @remark The passed in pool can be cleared by cbfn and tofn when called back,
- * it retains no MPM persistent data and won't be used until the next call
- * to ap_mpm_register_poll_callback[_timeout].
- */
-
-AP_DECLARE(apr_status_t) ap_mpm_register_poll_callback(
- apr_pool_t *p, const apr_array_header_t *pfds,
- ap_mpm_callback_fn_t *cbfn, void *baton);
-
-/**
- * Register a callback on the readability or writability on a group of sockets/pipes,
- * with a timeout.
- * @param p Pool used by the MPM for its internal allocations
- * @param pfds Array of apr_pollfd_t
- * @param cbfn The callback function
- * @param tofn The callback function if the timeout expires
- * @param baton userdata for the callback function
- * @param timeout timeout for I/O in microseconds, unlimited if <= 0
- * @return APR_SUCCESS if all sockets/pipes could be added to a pollset,
- * APR_ENOTIMPL if no asynch support, or an apr_pollset_add error.
- * @remark When activity is found on any 1 socket/pipe in the list, all are removed
- * from the pollset and only 1 callback is issued.
- * @remark For each call, only one of tofn or cbfn will be called, never both.
- * @remark The passed in pool can be cleared by cbfn and tofn when called back,
- * it retains no MPM persistent data and won't be used until the next call
- * to ap_mpm_register_poll_callback[_timeout].
- */
-
-AP_DECLARE(apr_status_t) ap_mpm_register_poll_callback_timeout(
- apr_pool_t *p, const apr_array_header_t *pfds,
- ap_mpm_callback_fn_t *cbfn, ap_mpm_callback_fn_t *tofn,
- void *baton, apr_time_t timeout);
-
-
typedef enum mpm_child_status {
MPM_CHILD_STARTED,
MPM_CHILD_EXITED,
diff --git a/include/httpd.h b/include/httpd.h
index ae08740b227..931f5fff49a 100644
--- a/include/httpd.h
+++ b/include/httpd.h
@@ -1334,7 +1334,7 @@ typedef enum {
CONN_STATE_PROCESSING, /* Processed by process_connection hooks */
CONN_STATE_HANDLER, /* Processed by the modules handlers */
CONN_STATE_WRITE_COMPLETION, /* Flushed by the MPM before entering CONN_STATE_KEEPALIVE */
- CONN_STATE_SUSPENDED, /* Suspended in the MPM until ap_run_resume_suspended() */
+ CONN_STATE_SUSPENDED, /* Suspended from the MPM until ap_run_resume_suspended() */
CONN_STATE_LINGER, /* MPM flushes then closes the connection with lingering */
CONN_STATE_LINGER_NORMAL, /* MPM has started lingering close with normal timeout */
CONN_STATE_LINGER_SHORT, /* MPM has started lingering close with short timeout */
diff --git a/include/mpm_common.h b/include/mpm_common.h
index 34c61e2a6c2..43320b2b5c9 100644
--- a/include/mpm_common.h
+++ b/include/mpm_common.h
@@ -422,22 +422,12 @@ AP_DECLARE_HOOK(int, mpm_query, (int query_code, int *result, apr_status_t *rv))
AP_DECLARE_HOOK(apr_status_t, mpm_register_timed_callback,
(apr_time_t t, ap_mpm_callback_fn_t *cbfn, void *baton))
-/**
- * register the specified callback
- * @ingroup hooks
- */
-AP_DECLARE_HOOK(apr_status_t, mpm_register_poll_callback,
- (apr_pool_t *p, const apr_array_header_t *pds,
- ap_mpm_callback_fn_t *cbfn, void *baton))
-
-/* register the specified callback, with timeout
+/** Put suspended connection's pollfds into the MPM's pollset
* @ingroup hooks
- *
*/
-AP_DECLARE_HOOK(apr_status_t, mpm_register_poll_callback_timeout,
- (apr_pool_t *p, const apr_array_header_t *pds,
- ap_mpm_callback_fn_t *cbfn, ap_mpm_callback_fn_t *tofn,
- void *baton, apr_time_t timeout))
+AP_DECLARE_HOOK(apr_status_t, mpm_poll_suspended,
+ (conn_rec *c, apr_pool_t *p, const apr_array_header_t *pfds,
+ apr_interval_time_t timeout))
/** Resume the suspended connection
* @ingroup hooks
diff --git a/modules/http/http_core.c b/modules/http/http_core.c
index 7e9f82f87dd..92a472d3fa7 100644
--- a/modules/http/http_core.c
+++ b/modules/http/http_core.c
@@ -182,20 +182,22 @@ static int ap_process_http_async_connection(conn_rec *c)
* of nondeterministic failures later.
*/
r = NULL;
- }
- if (cs->state != CONN_STATE_WRITE_COMPLETION &&
- cs->state != CONN_STATE_SUSPENDED &&
- cs->state != CONN_STATE_LINGER) {
- /* Something went wrong; close the connection */
- cs->state = CONN_STATE_LINGER;
+ switch (cs->state) {
+ case CONN_STATE_WRITE_COMPLETION:
+ case CONN_STATE_SUSPENDED:
+ case CONN_STATE_LINGER:
+ return OK;
+ default:
+ /* Unexpected, close */
+ break;
+ }
}
}
- else { /* ap_read_request failed - client may have closed */
- cs->state = CONN_STATE_LINGER;
- }
}
+ /* Something went wrong; close the connection */
+ cs->state = CONN_STATE_LINGER;
return OK;
}
diff --git a/modules/proxy/mod_proxy_http.c b/modules/proxy/mod_proxy_http.c
index 38da5b0f7f6..66a66af7949 100644
--- a/modules/proxy/mod_proxy_http.c
+++ b/modules/proxy/mod_proxy_http.c
@@ -19,9 +19,12 @@
#include "mod_proxy.h"
#include "ap_regex.h"
#include "ap_mpm.h"
+#include "mpm_common.h"
module AP_MODULE_DECLARE_DATA proxy_http_module;
+static int mpm_can_poll_suspended = 0;
+
static int (*ap_proxy_clear_connection_fn)(request_rec *r, apr_table_t *headers) =
NULL;
@@ -275,12 +278,6 @@ static void add_cl(apr_pool_t *p,
#define MAX_MEM_SPOOL 16384
-typedef enum {
- PROXY_HTTP_REQ_HAVE_HEADER = 0,
-
- PROXY_HTTP_TUNNELING
-} proxy_http_state;
-
typedef enum {
RB_INIT = 0,
RB_STREAM_CL,
@@ -307,7 +304,6 @@ typedef struct {
char *old_cl_val, *old_te_val;
apr_off_t cl_val;
- proxy_http_state state;
rb_methods rb_method;
const char *upgrade;
@@ -316,108 +312,148 @@ typedef struct {
apr_pool_t *async_pool;
apr_interval_time_t idle_timeout;
- unsigned int can_go_async :1,
+ unsigned int can_suspend :1,
do_100_continue :1,
prefetch_nonblocking :1,
- force10 :1;
+ force10 :1,
+ suspended :1,
+ upgraded :1;
} proxy_http_req_t;
-static void proxy_http_async_finish(proxy_http_req_t *req)
+static int proxy_http_tunnel_pump(proxy_http_req_t *req)
+{
+ int status = ap_proxy_tunnel_run(req->tunnel);
+ if (status == HTTP_GATEWAY_TIME_OUT) {
+ if (!req->can_suspend) {
+ /* ap_proxy_tunnel_run() didn't log this */
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, req->r, APLOGNO()
+ "proxy: %s tunneling timed out",
+ req->proto);
+ }
+ else {
+ status = SUSPENDED;
+ }
+ }
+ return status;
+}
+
+/* The backend and SUSPENDED client connections are done,
+ * release them (the latter in the MPM).
+ */
+static void proxy_http_async_done(proxy_http_req_t *req, int cancelled)
{
- conn_rec *c = req->r->connection;
+ request_rec *r = req->r;
+ conn_rec *c = r->connection;
+ proxy_conn_rec *backend = req->backend;
+ proxy_tunnel_rec *tunnel = req->tunnel;
+ int reusable = (!cancelled && !req->upgraded);
+
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE5, 0, r, "proxy %s: %s async",
+ req->proto, cancelled ? "cancel" : "finish");
+
+ if (req->async_pool) {
+ apr_pool_destroy(req->async_pool);
+ req->async_pool = NULL;
+ }
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, req->r,
- "proxy %s: finish async", req->proto);
+ if (!reusable) {
+ c->keepalive = AP_CONN_CLOSE;
+ backend->close = 1;
+ }
/* Report bytes exchanged by the backend */
- req->backend->worker->s->read +=
- ap_proxy_tunnel_conn_bytes_in(req->tunnel->origin);
- req->backend->worker->s->transferred +=
- ap_proxy_tunnel_conn_bytes_out(req->tunnel->origin);
+ backend->worker->s->read +=
+ ap_proxy_tunnel_conn_bytes_in(tunnel->origin);
+ backend->worker->s->transferred +=
+ ap_proxy_tunnel_conn_bytes_out(tunnel->origin);
- proxy_run_detach_backend(req->r, req->backend);
- ap_proxy_release_connection(req->proto, req->backend, req->r->server);
+ proxy_run_detach_backend(r, backend);
+ ap_proxy_release_connection(req->proto, backend, r->server);
- ap_finalize_request_protocol(req->r);
- ap_process_request_after_handler(req->r);
- /* don't touch req or req->r from here */
+ ap_finalize_request_protocol(r);
+ ap_process_request_after_handler(r);
+ /* don't dereference req or r from here! */
- c->cs->state = CONN_STATE_LINGER;
+ /* Return the client connection to the MPM */
+ if (reusable) {
+ c->cs->state = CONN_STATE_WRITE_COMPLETION;
+ }
+ else {
+ c->cs->state = CONN_STATE_LINGER;
+ }
ap_mpm_resume_suspended(c);
}
-/* If neither socket becomes readable in the specified timeout,
- * this callback will kill the request.
- * We do not have to worry about having a cancel and a IO both queued.
- */
-static void proxy_http_async_cancel_cb(void *baton)
-{
- proxy_http_req_t *req = (proxy_http_req_t *)baton;
-
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, req->r,
- "proxy %s: cancel async", req->proto);
-
- req->r->connection->keepalive = AP_CONN_CLOSE;
- req->backend->close = 1;
- proxy_http_async_finish(req);
-}
+/* Tell the MPM to poll the connections and resume when ready */
+static void proxy_http_async_poll(proxy_http_req_t *req)
+{
+ conn_rec *c = req->r->connection;
+ proxy_tunnel_rec *tunnel = req->tunnel;
-/* Invoked by the event loop when data is ready on either end.
- * We don't need the invoke_mtx, since we never put multiple callback events
- * in the queue.
- */
-static void proxy_http_async_cb(void *baton)
-{
- proxy_http_req_t *req = (proxy_http_req_t *)baton;
- int status;
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE5, 0, req->r,
+ "proxy %s: going async", req->proto);
+ /* Create/clear the subpool used by the MPM to allocate
+ * the temporary data needed for this polling.
+ */
if (req->async_pool) {
- /* Clear MPM's temporary data */
apr_pool_clear(req->async_pool);
}
+ else {
+ apr_pool_create(&req->async_pool, req->p);
+ }
- switch (req->state) {
- case PROXY_HTTP_TUNNELING:
- /* Pump both ends until they'd block and then start over again */
- status = ap_proxy_tunnel_run(req->tunnel);
- if (status == HTTP_GATEWAY_TIME_OUT) {
- status = SUSPENDED;
- }
- break;
+ ap_mpm_poll_suspended(c, req->async_pool, tunnel->pfds, req->idle_timeout);
+}
- default:
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, req->r,
- "proxy %s: unexpected async state (%i)",
- req->proto, (int)req->state);
- status = HTTP_INTERNAL_SERVER_ERROR;
- break;
- }
+/* The resume_connection hook called by the MPM when async polling completes (or times out) */
+static void proxy_http_resume_connection(conn_rec *c, request_rec *r)
+{
+ proxy_http_req_t *req = NULL;
+ int status;
- if (status == SUSPENDED) {
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, req->r,
- "proxy %s: suspended, going async",
- req->proto);
-
- if (!req->async_pool) {
- /* Create the subpool used by the MPM to alloc its own
- * temporary data, which we want to clear on the next
- * round (above) to avoid leaks.
- */
- apr_pool_create(&req->async_pool, req->p);
- }
+ if (r) {
+ req = ap_get_module_config(r->request_config, &proxy_http_module);
+ }
+ if (!req || !req->suspended) {
+ return;
+ }
+ ap_assert(req->r == r);
- ap_mpm_register_poll_callback_timeout(req->async_pool,
- req->tunnel->pfds,
- proxy_http_async_cb,
- proxy_http_async_cancel_cb,
- req, req->idle_timeout);
+ if (c->cs->state == CONN_STATE_SUSPENDED) {
+ status = proxy_http_tunnel_pump(req);
+ }
+ else {
+ AP_DEBUG_ASSERT(c->cs->state == CONN_STATE_LINGER);
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+ "proxy: %s async tunneling timed out (state %i)",
+ req->proto, c->cs->state);
+ status = DONE;
}
- else if (ap_is_HTTP_ERROR(status)) {
- proxy_http_async_cancel_cb(req);
+ if (status == SUSPENDED) {
+ /* Keep polling in the MPM */
+ proxy_http_async_poll(req);
}
else {
- proxy_http_async_finish(req);
+ /* Done with tunneling */
+ proxy_http_async_done(req, status != OK);
+ }
+}
+
+/* The suspend_connection hook called once the MPM gets the SUSPENDED connection */
+static void proxy_http_suspend_connection(conn_rec *c, request_rec *r)
+{
+ proxy_http_req_t *req = NULL;
+
+ if (r) {
+ req = ap_get_module_config(r->request_config, &proxy_http_module);
}
+ if (!req || !req->suspended) {
+ return;
+ }
+ ap_assert(req->r == r);
+
+ proxy_http_async_poll(req);
}
static int stream_reqbody(proxy_http_req_t *req)
@@ -1553,22 +1589,40 @@ int ap_proxy_http_process_response(proxy_http_req_t *req)
"can't create tunnel for %s", upgrade);
return HTTP_INTERNAL_SERVER_ERROR;
}
+ if (req->can_suspend) {
+ /* If the MPM allows async polling, this thread will tunnel
+ * all it can now so long as it's not timeouting on the (short)
+ * async delay, returning to the MPM otherwise to get scheduled
+ * again when the connections are ready.
+ */
+ req->tunnel->timeout = dconf->async_delay;
+ }
+ else {
+ /* If the MPM doesn't allow async polling, the full tunneling
+ * happens now in this thread and timeouting is a showstopper.
+ */
+ req->tunnel->timeout = req->idle_timeout;
+ }
r->status = HTTP_SWITCHING_PROTOCOLS;
req->proto = upgrade;
-
- if (req->can_go_async) {
- /* Let the MPM schedule the work when idle */
- req->state = PROXY_HTTP_TUNNELING;
- req->tunnel->timeout = dconf->async_delay;
- proxy_http_async_cb(req);
+ req->upgraded = 1;
+
+ status = proxy_http_tunnel_pump(req);
+ if (status == SUSPENDED) {
+ /* Let the MPM call proxy_http_suspend_connection() when
+ * the connection is returned to it (i.e. not handled anywhere
+ * else anymore). This prevents the connection from being seen
+ * or handled by multiple threads at the same time, which could
+ * happen if we'd call ap_mpm_poll_suspended() directly from
+ * here, during the time for the connection to actually reaches
+ * the MPM whilst a new IO causes the connection to be
+ * rescheduled quickly.
+ */
+ req->suspended = 1;
return SUSPENDED;
}
- /* Let proxy tunnel forward everything within this thread */
- req->tunnel->timeout = req->idle_timeout;
- status = ap_proxy_tunnel_run(req->tunnel);
-
/* Report bytes exchanged by the backend */
backend->worker->s->read +=
ap_proxy_tunnel_conn_bytes_in(req->tunnel->origin);
@@ -1932,7 +1986,6 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
proxy_http_req_t *req = NULL;
proxy_conn_rec *backend = NULL;
apr_bucket_brigade *input_brigade = NULL;
- int mpm_can_poll = 0;
int is_ssl = 0;
conn_rec *c = r->connection;
proxy_dir_conf *dconf;
@@ -1972,7 +2025,6 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
backend->is_ssl = is_ssl;
dconf = ap_get_module_config(r->per_dir_config, &proxy_module);
- ap_mpm_query(AP_MPMQ_CAN_POLL, &mpm_can_poll);
req = apr_pcalloc(p, sizeof(*req));
req->p = p;
@@ -1983,12 +2035,13 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
req->backend = backend;
req->proto = scheme;
req->bucket_alloc = c->bucket_alloc;
- req->can_go_async = (mpm_can_poll &&
- dconf->async_delay_set &&
- dconf->async_delay >= 0);
- req->state = PROXY_HTTP_REQ_HAVE_HEADER;
+ req->can_suspend = (mpm_can_poll_suspended &&
+ dconf->async_delay_set &&
+ dconf->async_delay >= 0);
req->rb_method = RB_INIT;
+ ap_set_module_config(r->request_config, &proxy_http_module, req);
+
if (apr_table_get(r->subprocess_env, "force-proxy-request-1.0")) {
req->force10 = 1;
}
@@ -2004,9 +2057,9 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
}
}
- if (req->can_go_async || req->upgrade) {
+ if (req->can_suspend || req->upgrade) {
/* If ProxyAsyncIdleTimeout is not set, use backend timeout */
- if (req->can_go_async && dconf->async_idle_timeout_set) {
+ if (req->can_suspend && dconf->async_idle_timeout_set) {
req->idle_timeout = dconf->async_idle_timeout;
}
else if (worker->s->timeout_set) {
@@ -2045,7 +2098,7 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
* data to the backend ASAP?
*/
if (input_brigade
- || req->can_go_async
+ || req->can_suspend
|| req->do_100_continue
|| apr_table_get(r->subprocess_env,
"proxy-prefetch-nonblocking")) {
@@ -2190,13 +2243,18 @@ static int proxy_http_handler(request_rec *r, proxy_worker *worker,
static int proxy_http_post_config(apr_pool_t *pconf, apr_pool_t *plog,
apr_pool_t *ptemp, server_rec *s)
{
-
/* proxy_http_post_config() will be called twice during startup. So, don't
* set up the static data the 1st time through. */
if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
return OK;
}
+#ifdef AP_MPMQ_CAN_POLL_SUSPENDED
+ if (ap_mpm_query(AP_MPMQ_CAN_POLL_SUSPENDED, &mpm_can_poll_suspended)) {
+ mpm_can_poll_suspended = 0;
+ }
+#endif
+
ap_proxy_clear_connection_fn =
APR_RETRIEVE_OPTIONAL_FN(ap_proxy_clear_connection);
if (!ap_proxy_clear_connection_fn) {
@@ -2214,6 +2272,10 @@ static void ap_proxy_http_register_hook(apr_pool_t *p)
proxy_hook_scheme_handler(proxy_http_handler, NULL, NULL, APR_HOOK_FIRST);
proxy_hook_canon_handler(proxy_http_canon, NULL, NULL, APR_HOOK_FIRST);
warn_rx = ap_pregcomp(p, "[0-9]{3}[ \t]+[^ \t]+[ \t]+\"[^\"]*\"([ \t]+\"([^\"]+)\")?", 0);
+
+ /* For when the tunnel connections are suspended to and resumed from the MPM */
+ ap_hook_suspend_connection(proxy_http_suspend_connection, NULL, NULL, APR_HOOK_FIRST);
+ ap_hook_resume_connection(proxy_http_resume_connection, NULL, NULL, APR_HOOK_FIRST);
}
AP_DECLARE_MODULE(proxy_http) = {
diff --git a/modules/proxy/mod_proxy_wstunnel.c b/modules/proxy/mod_proxy_wstunnel.c
index 0e5e6cb8128..3439b08b18d 100644
--- a/modules/proxy/mod_proxy_wstunnel.c
+++ b/modules/proxy/mod_proxy_wstunnel.c
@@ -17,13 +17,15 @@
#include "mod_proxy.h"
#include "http_config.h"
#include "ap_mpm.h"
+#include "mpm_common.h"
module AP_MODULE_DECLARE_DATA proxy_wstunnel_module;
+static int mpm_can_poll_suspended = 0;
+
typedef struct {
unsigned int fallback_to_proxy_http :1,
fallback_to_proxy_http_set :1;
- int mpm_can_poll;
apr_time_t idle_timeout;
apr_time_t async_delay;
} proxyws_dir_conf;
@@ -32,83 +34,130 @@ typedef struct ws_baton_t {
request_rec *r;
proxy_conn_rec *backend;
proxy_tunnel_rec *tunnel;
+ apr_time_t idle_timeout;
apr_pool_t *async_pool;
const char *scheme;
+ int suspended;
} ws_baton_t;
static int can_fallback_to_proxy_http;
-static void proxy_wstunnel_callback(void *b);
-
-static int proxy_wstunnel_pump(ws_baton_t *baton, int async)
+static int proxy_wstunnel_pump(ws_baton_t *baton)
{
int status = ap_proxy_tunnel_run(baton->tunnel);
if (status == HTTP_GATEWAY_TIME_OUT) {
- if (!async) {
+ if (!mpm_can_poll_suspended) {
/* ap_proxy_tunnel_run() didn't log this */
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, baton->r, APLOGNO(10225)
- "Tunnel timed out");
+ "proxy: %s tunneling timed out",
+ baton->scheme);
}
else {
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, baton->r, APLOGNO(02542)
- "Attempting to go async");
status = SUSPENDED;
}
}
return status;
}
-static void proxy_wstunnel_finish(ws_baton_t *baton)
-{
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, baton->r, "proxy_wstunnel_finish");
- ap_proxy_release_connection(baton->scheme, baton->backend, baton->r->server);
- ap_finalize_request_protocol(baton->r);
- ap_lingering_close(baton->r->connection);
- ap_mpm_resume_suspended(baton->r->connection);
- ap_process_request_after_handler(baton->r); /* don't touch baton or r after here */
+/* The backend and SUSPENDED client connections are done,
+ * release them (the latter in the MPM).
+ */
+static void proxy_wstunnel_done(ws_baton_t *baton, int cancelled)
+{
+ request_rec *r = baton->r;
+ conn_rec *c = r->connection;
+ proxy_conn_rec *backend = baton->backend;
+
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE5, 0, r, "proxy %s: %s async",
+ baton->scheme, cancelled ? "cancel" : "finish");
+
+ /* Upgraded connections not reusable */
+ c->keepalive = AP_CONN_CLOSE;
+ backend->close = 1;
+
+ ap_proxy_release_connection(baton->scheme, backend, r->server);
+
+ ap_finalize_request_protocol(r);
+ ap_process_request_after_handler(r);
+ /* don't dereference baton or r from here! */
+
+ /* Return the client connection to the MPM */
+ c->cs->state = CONN_STATE_LINGER;
+ ap_mpm_resume_suspended(c);
}
-/* If neither socket becomes readable in the specified timeout,
- * this callback will kill the request. We do not have to worry about
- * having a cancel and a IO both queued.
- */
-static void proxy_wstunnel_cancel_callback(void *b)
-{
- ws_baton_t *baton = (ws_baton_t*)b;
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, baton->r,
- "proxy_wstunnel_cancel_callback, IO timed out");
- proxy_wstunnel_finish(baton);
+/* Tell the MPM to poll the connections and resume when ready */
+static void proxy_wstunnel_poll(ws_baton_t *baton)
+{
+ request_rec *r = baton->r;
+ conn_rec *c = r->connection;
+
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE5, 0, r,
+ "proxy %s: going async", baton->scheme);
+
+ /* Create/clear the subpool used by the MPM to allocate
+ * the temporary data needed for this polling.
+ */
+ if (baton->async_pool) {
+ apr_pool_clear(baton->async_pool);
+ }
+ else {
+ apr_pool_create(&baton->async_pool, r->pool);
+ }
+
+ c->cs->state = CONN_STATE_SUSPENDED;
+ ap_mpm_poll_suspended(c, baton->async_pool, baton->tunnel->pfds,
+ baton->idle_timeout);
}
-/* Invoked by the event loop when data is ready on either end.
- * Pump both ends until they'd block and then start over again
- * We don't need the invoke_mtx, since we never put multiple callback events
- * in the queue.
- */
-static void proxy_wstunnel_callback(void *b)
-{
- ws_baton_t *baton = (ws_baton_t*)b;
+/* The resume_connection hook called by the MPM when polling completes (or times out) */
+static void proxy_wstunnel_resume_connection(conn_rec *c, request_rec *r)
+{
+ ws_baton_t *baton = NULL;
+ int status;
- /* Clear MPM's temporary data */
- AP_DEBUG_ASSERT(baton->async_pool != NULL);
- apr_pool_clear(baton->async_pool);
+ if (r) {
+ baton = ap_get_module_config(r->request_config, &proxy_wstunnel_module);
+ }
+ if (!baton || !baton->suspended) {
+ return;
+ }
+ ap_assert(baton->r == r);
- if (proxy_wstunnel_pump(baton, 1) == SUSPENDED) {
- proxyws_dir_conf *dconf = ap_get_module_config(baton->r->per_dir_config,
- &proxy_wstunnel_module);
+ if (c->cs->state == CONN_STATE_SUSPENDED) {
+ status = proxy_wstunnel_pump(baton);
+ }
+ else {
+ AP_DEBUG_ASSERT(c->cs->state == CONN_STATE_LINGER);
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO()
+ "proxy: %s async tunneling timed out (state %i)",
+ baton->scheme, c->cs->state);
+ status = DONE;
+ }
+ if (status == SUSPENDED) {
+ /* Keep polling in the MPM */
+ proxy_wstunnel_poll(baton);
+ }
+ else {
+ /* Done with tunneling */
+ proxy_wstunnel_done(baton, status != OK);
+ }
+}
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, baton->r,
- "proxy_wstunnel_callback suspend");
+/* The suspend_connection hook called once the MPM gets the SUSPENDED connection */
+static void proxy_wstunnel_suspend_connection(conn_rec *c, request_rec *r)
+{
+ ws_baton_t *baton = NULL;
- ap_mpm_register_poll_callback_timeout(baton->async_pool,
- baton->tunnel->pfds,
- proxy_wstunnel_callback,
- proxy_wstunnel_cancel_callback,
- baton, dconf->idle_timeout);
+ if (r) {
+ baton = ap_get_module_config(r->request_config, &proxy_wstunnel_module);
}
- else {
- proxy_wstunnel_finish(baton);
+ if (!baton || !baton->suspended) {
+ return;
}
+ ap_assert(baton->r == r);
+
+ proxy_wstunnel_poll(baton);
}
static int proxy_wstunnel_check_trans(request_rec *r, const char *url)
@@ -296,51 +345,35 @@ static int proxy_wstunnel_request(apr_pool_t *p, request_rec *r,
"error creating websocket tunnel");
return HTTP_INTERNAL_SERVER_ERROR;
}
+ if (mpm_can_poll_suspended) {
+ tunnel->timeout = dconf->async_delay;
+ }
+ else {
+ tunnel->timeout = dconf->idle_timeout;
+ }
baton = apr_pcalloc(r->pool, sizeof(*baton));
baton->r = r;
baton->backend = conn;
baton->tunnel = tunnel;
baton->scheme = scheme;
-
- if (!dconf->mpm_can_poll) {
- tunnel->timeout = dconf->idle_timeout;
- status = proxy_wstunnel_pump(baton, 0);
- }
- else {
- tunnel->timeout = dconf->async_delay;
- status = proxy_wstunnel_pump(baton, 1);
- if (status == SUSPENDED) {
- /* Create the subpool used by the MPM to alloc its own
- * temporary data, which we want to clear on the next
- * round (above) to avoid leaks.
- */
- apr_pool_create(&baton->async_pool, baton->r->pool);
-
- rv = ap_mpm_register_poll_callback_timeout(
- baton->async_pool,
- baton->tunnel->pfds,
- proxy_wstunnel_callback,
- proxy_wstunnel_cancel_callback,
- baton,
- dconf->idle_timeout);
- if (rv == APR_SUCCESS) {
- return SUSPENDED;
- }
-
- if (APR_STATUS_IS_ENOTIMPL(rv)) {
- ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, APLOGNO(02544) "No async support");
- tunnel->timeout = dconf->idle_timeout;
- status = proxy_wstunnel_pump(baton, 0); /* force no async */
- }
- else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(10211)
- "error registering websocket tunnel");
- status = HTTP_INTERNAL_SERVER_ERROR;
- }
- }
+ baton->idle_timeout = dconf->idle_timeout;
+ ap_set_module_config(r->request_config, &proxy_wstunnel_module, baton);
+
+ status = proxy_wstunnel_pump(baton);
+ if (status == SUSPENDED) {
+ /* Let the MPM call proxy_wstunnel_suspend_connection() when
+ * the connection is returned to it (i.e. not handled anywhere
+ * else anymore). This prevents the connection from being seen
+ * or handled by multiple threads at the same time, which could
+ * happen if we'd call ap_mpm_poll_suspended() directly from
+ * here, during the time for the connection to actually reaches
+ * the MPM whilst a new IO causes the connection to be
+ * rescheduled quickly.
+ */
+ baton->suspended = 1;
+ return SUSPENDED;
}
-
if (ap_is_HTTP_ERROR(status)) {
/* Don't send an error page down an upgraded connection */
if (!tunnel->replied) {
@@ -462,8 +495,6 @@ static void *create_proxyws_dir_config(apr_pool_t *p, char *dummy)
new->fallback_to_proxy_http = 1;
new->idle_timeout = -1; /* no timeout */
- ap_mpm_query(AP_MPMQ_CAN_POLL, &new->mpm_can_poll);
-
return (void *) new;
}
@@ -477,7 +508,6 @@ static void *merge_proxyws_dir_config(apr_pool_t *p, void *vbase, void *vadd)
: base->fallback_to_proxy_http;
new->fallback_to_proxy_http_set = (add->fallback_to_proxy_http_set
|| base->fallback_to_proxy_http_set);
- new->mpm_can_poll = add->mpm_can_poll;
new->idle_timeout = add->idle_timeout;
new->async_delay = add->async_delay;
@@ -514,6 +544,12 @@ static int proxy_wstunnel_post_config(apr_pool_t *pconf, apr_pool_t *plog,
can_fallback_to_proxy_http =
(ap_find_linked_module("mod_proxy_http.c") != NULL);
+#ifdef AP_MPMQ_CAN_POLL_SUSPENDED
+ if (ap_mpm_query(AP_MPMQ_CAN_POLL_SUSPENDED, &mpm_can_poll_suspended)) {
+ mpm_can_poll_suspended = 0;
+ }
+#endif
+
return OK;
}
@@ -542,6 +578,10 @@ static void ws_proxy_hooks(apr_pool_t *p)
proxy_hook_scheme_handler(proxy_wstunnel_handler, NULL, aszSucc, APR_HOOK_FIRST);
proxy_hook_check_trans(proxy_wstunnel_check_trans, NULL, aszSucc, APR_HOOK_MIDDLE);
proxy_hook_canon_handler(proxy_wstunnel_canon, NULL, aszSucc, APR_HOOK_FIRST);
+
+ /* For when the tunnel connections are suspended to and resumed from the MPM */
+ ap_hook_suspend_connection(proxy_wstunnel_suspend_connection, NULL, NULL, APR_HOOK_FIRST);
+ ap_hook_resume_connection(proxy_wstunnel_resume_connection, NULL, NULL, APR_HOOK_FIRST);
}
AP_DECLARE_MODULE(proxy_wstunnel) = {
diff --git a/modules/proxy/proxy_util.c b/modules/proxy/proxy_util.c
index 88d174220d8..52595a03ec5 100644
--- a/modules/proxy/proxy_util.c
+++ b/modules/proxy/proxy_util.c
@@ -5898,7 +5898,7 @@ PROXY_DECLARE(int) ap_proxy_tunnel_run(proxy_tunnel_rec *tunnel)
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(10221)
"proxy: %s: %s flushing failed (%i)",
scheme, out->name, rc);
- status = rc;
+ status = HTTP_BAD_GATEWAY;
goto done;
}
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c
index e0ba249bbf7..1a71f214c8c 100644
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -246,6 +246,8 @@ typedef struct event_srv_cfg_s event_srv_cfg;
struct timeout_queue;
static apr_thread_mutex_t *timeout_mutex;
+struct user_poll_baton;
+
/*
* The pollset for sockets that are in any of the timeout queues. Currently
* we use the timeout_mutex to make sure that connections are added/removed
@@ -297,6 +299,8 @@ struct event_conn_state_t {
struct timeout_queue *q;
/** the timer event for this entry */
timer_event_t *te;
+ /** user pollfds (for suspended connection) */
+ struct user_poll_baton *user_baton;
/*
* when queued to workers
@@ -317,6 +321,8 @@ struct event_conn_state_t {
* hooks)
*/
suspended :1,
+ /** Did the connection timed out? */
+ timed_out :1,
/** Is lingering close from defer_lingering_close()? */
deferred_linger :1,
/** Has ap_start_lingering_close() been called? */
@@ -497,6 +503,15 @@ static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *cs)
apr_time_t elem_expiry;
apr_time_t next_expiry;
+ /* It greatly simplifies the logic to use a single timeout value per q
+ * because the new element can just be added to the end of the list and
+ * it will stay sorted in expiration time sequence. If brand new
+ * sockets are sent to the event thread for a readability check, this
+ * will be a slight behavior change - they use the non-keepalive
+ * timeout today. With a normal client, the socket will be readable in
+ * a few milliseconds anyway.
+ */
+
ap_assert(q && !cs->q);
cs->q = q;
@@ -619,14 +634,14 @@ typedef struct
void *baton;
} listener_poll_type;
-typedef struct socket_callback_baton
-{
- ap_mpm_callback_fn_t *cbfunc;
- void *user_baton;
+struct user_poll_baton {
+ apr_pool_t *pool;
+ event_conn_state_t *cs;
apr_array_header_t *pfds;
+ apr_thread_mutex_t *mutex; /* pfds added/removed atomically */
timer_event_t *cancel_event; /* If a timeout was requested, a pointer to the timer event */
- struct socket_callback_baton *next;
-} socket_callback_baton_t;
+ struct user_poll_baton *next; /* chaining */
+};
typedef struct event_child_bucket {
ap_pod_t *pod;
@@ -1120,6 +1135,9 @@ static int event_query(int query_code, int *result, apr_status_t *rv)
case AP_MPMQ_CAN_WAITIO:
*result = 1;
break;
+ case AP_MPMQ_CAN_POLL_SUSPENDED:
+ *result = 1;
+ break;
default:
*rv = APR_ENOTIMPL;
break;
@@ -1223,11 +1241,8 @@ static apr_status_t decrement_connection_count(void *cs_)
"connection %" CS_FMT_TO " cleaned up",
CS_ARG_TO(cs));
- switch (cs->pub.state) {
- case CONN_STATE_SUSPENDED:
+ if (cs->suspended) {
apr_atomic_dec32(&suspended_count);
- default:
- break;
}
/* Unblock the listener if it's waiting for connection_count = 0,
@@ -1250,15 +1265,24 @@ static apr_status_t decrement_connection_count(void *cs_)
static void notify_suspend(event_conn_state_t *cs)
{
- ap_run_suspend_connection(cs->c, cs->r);
- cs->c->sbh = NULL;
+ AP_DEBUG_ASSERT(!cs->suspended);
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "Suspend connection %" CS_FMT, CS_ARG(cs));
+ apr_atomic_inc32(&suspended_count);
cs->suspended = 1;
+
+ cs->c->sbh = NULL;
+ cs->c->suspended_baton = cs;
+ ap_run_suspend_connection(cs->c, cs->r);
}
-static void notify_resume(event_conn_state_t *cs, int cleanup)
+static void notify_resume(event_conn_state_t *cs)
{
- cs->suspended = 0;
- cs->c->sbh = cleanup ? NULL : cs->sbh;
+ AP_DEBUG_ASSERT(cs->suspended);
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "Resume connection %" CS_FMT, CS_ARG(cs));
+
+ cs->c->sbh = cs->sbh;
ap_run_resume_connection(cs->c, cs->r);
}
@@ -1360,12 +1384,13 @@ static void shutdown_connection(event_conn_state_t *cs, apr_time_t now,
* if the connection is currently suspended as far as modules
* know, provide notification of resumption.
*/
-static apr_status_t ptrans_pre_cleanup(void *dummy)
+static apr_status_t ptrans_pre_cleanup(void *arg)
{
- event_conn_state_t *cs = dummy;
-
+ event_conn_state_t *cs = arg;
if (cs->suspended) {
- notify_resume(cs, 1);
+ cs->sbh = NULL;
+ cs->pub.state = CONN_STATE_LINGER;
+ notify_resume(cs);
}
return APR_SUCCESS;
}
@@ -1440,7 +1465,8 @@ static int pollset_add_at(event_conn_state_t *cs, int sense,
(int)cs->pfd.reqevents,
CS_ARG(cs), at, line);
- ap_assert(cs->q == NULL && cs->te == NULL && ((q != NULL) ^ (te != NULL)));
+ ap_assert((q != NULL) ^ (te != NULL));
+ ap_assert(cs->q == NULL && cs->te == NULL);
set_conn_state_sense(cs, sense);
@@ -1497,8 +1523,6 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
(int)cs->pfd.reqevents,
CS_ARG(cs), at, line);
- ap_assert((cs->q != NULL) ^ (cs->te != NULL));
-
if (cs->q) {
if (!locked) {
apr_thread_mutex_lock(timeout_mutex);
@@ -1508,7 +1532,7 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
apr_thread_mutex_unlock(timeout_mutex);
}
}
- else {
+ else if (cs->te) {
cs->te->canceled = 1;
cs->te = NULL;
}
@@ -1537,8 +1561,7 @@ static int pollset_del_at(event_conn_state_t *cs, int locked,
/* Forward declare */
static timer_event_t *get_timer_event(apr_time_t timeout,
ap_mpm_callback_fn_t *cbfn, void *baton,
- int insert,
- apr_array_header_t *pfds);
+ int insert);
static void process_lingering_close(event_conn_state_t *cs);
static event_conn_state_t *make_conn_state(apr_pool_t *p, apr_socket_t *csd)
@@ -1640,22 +1663,28 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
close_connection(cs);
return;
}
-
- cs->pub.sense = CONN_SENSE_DEFAULT;
}
else { /* The connection is scheduled back */
c = cs->c;
c->current_thread = thd;
c->id = conn_id; /* thread number is part of ID */
ap_update_sb_handle(cs->sbh, my_child_num, my_thread_num);
- notify_resume(cs, 0);
+ }
+
+ /* Suspended connections hooks run here and don't fall through */
+ if (cs->suspended) {
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "resuming connection %" CS_FMT, CS_ARG(cs));
+ notify_resume(cs);
+ return;
}
ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
"processing connection %" CS_FMT " (aborted %d, clogging %d)",
CS_ARG(cs), c->aborted, c->clogging_input_filters);
- if (cs->pub.state == CONN_STATE_LINGER) {
+ if (cs->pub.state == CONN_STATE_LINGER || c->aborted) {
+ cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
}
@@ -1697,16 +1726,15 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
* worker or prefork MPMs for instance.
*/
switch (rc) {
- case DONE:
- rc = OK; /* same as OK, fall through */
case OK:
+ case DONE: /* same as OK, fall through */
if (cs->pub.state == CONN_STATE_PROCESSING) {
cs->pub.state = CONN_STATE_LINGER;
}
else if (cs->pub.state == CONN_STATE_KEEPALIVE) {
cs->pub.state = CONN_STATE_WRITE_COMPLETION;
}
- break;
+ rc = OK;
}
if (rc != OK || (cs->pub.state != CONN_STATE_LINGER
&& cs->pub.state != CONN_STATE_ASYNC_WAITIO
@@ -1735,7 +1763,6 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
* event thread poll for read/writeability.
*/
ap_update_child_status(cs->sbh, SERVER_BUSY_READ, NULL);
- notify_suspend(cs);
/* If the connection timeout is actually different than the waitio_q's,
* use a timer event to honor it (e.g. mod_reqtimeout may enforce its
@@ -1747,7 +1774,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
if (timeout < TIMERS_FUDGE_TIMEOUT) {
timeout = TIMERS_FUDGE_TIMEOUT;
}
- te = get_timer_event(timeout, NULL, cs, 1, NULL);
+ te = get_timer_event(timeout, NULL, cs, 1);
}
else {
q = cs->sc->io_q;
@@ -1776,7 +1803,6 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
}
if (pending == AGAIN) {
/* Let the event thread poll for write */
- notify_suspend(cs);
cs->pub.sense = CONN_SENSE_DEFAULT;
if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q, NULL)) {
return; /* queued */
@@ -1804,16 +1830,7 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
if (cs->pub.state == CONN_STATE_KEEPALIVE) {
ap_update_child_status(cs->sbh, SERVER_BUSY_KEEPALIVE, NULL);
- /* It greatly simplifies the logic to use a single timeout value per q
- * because the new element can just be added to the end of the list and
- * it will stay sorted in expiration time sequence. If brand new
- * sockets are sent to the event thread for a readability check, this
- * will be a slight behavior change - they use the non-keepalive
- * timeout today. With a normal client, the socket will be readable in
- * a few milliseconds anyway.
- */
- notify_suspend(cs);
-
+ cs->pub.sense = CONN_SENSE_DEFAULT;
if (!pollset_add(cs, CONN_SENSE_WANT_READ, cs->ka_sc->ka_q, NULL)) {
cs->pub.state = CONN_STATE_LINGER;
goto lingering_close;
@@ -1823,33 +1840,149 @@ static void process_socket(apr_thread_t *thd, apr_pool_t *p,
}
if (cs->pub.state == CONN_STATE_SUSPENDED) {
- cs->c->suspended_baton = cs;
- apr_atomic_inc32(&suspended_count);
notify_suspend(cs);
- return; /* done */
+ return; /* suspended */
}
lingering_close:
process_lingering_close(cs);
}
+static apr_status_t user_poll_cleanup(void *data)
+{
+ struct user_poll_baton *user_baton = data;
+ apr_array_header_t *pfds = user_baton->pfds;
+ apr_status_t rc, final_rc = APR_SUCCESS;
+ int i;
+
+ /* All the pollfds should be added/removed atomically, so synchronize
+ * with register_user_poll().
+ */
+ apr_thread_mutex_lock(user_baton->mutex);
+ for (i = 0; i < pfds->nelts; i++) {
+ apr_pollfd_t *pfd = (apr_pollfd_t *)pfds->elts + i;
+ if (pfd->client_data) {
+ rc = apr_pollset_remove(event_pollset, pfd);
+ if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
+ final_rc = rc;
+ }
+ pfd->client_data = NULL;
+ }
+ }
+ apr_thread_mutex_unlock(user_baton->mutex);
+
+ if (final_rc) {
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
+ }
+ return final_rc;
+}
+
+/* Put some user pollfds into the listener pollset for a SUSPENDED connection */
+static apr_status_t event_poll_suspended(conn_rec *c, apr_pool_t *p,
+ const apr_array_header_t *user_pfds,
+ apr_interval_time_t timeout)
+{
+ event_conn_state_t *cs = c->suspended_baton;
+ apr_status_t rc, final_rc = APR_SUCCESS;
+ struct user_poll_baton *user_baton;
+ apr_array_header_t *pfds;
+ listener_poll_type *pt;
+ int i;
+
+ AP_DEBUG_ASSERT(cs != NULL);
+ AP_DEBUG_ASSERT(cs->suspended);
+ AP_DEBUG_ASSERT(user_pfds->nelts > 0);
+ if (cs == NULL) {
+ ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO()
+ "event_poll_suspended: suspended_baton is NULL");
+ return APR_EINVAL;
+ }
+ if (!cs->suspended) {
+ ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO()
+ "event_poll_suspended: thread isn't suspended");
+ return APR_EINVAL;
+ }
+ if (user_pfds->nelts <= 0) {
+ ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO()
+ "event_poll_suspended: no poll FDs");
+ return APR_EINVAL;
+ }
+
+ cs->pub.state = CONN_STATE_SUSPENDED;
+ cs->user_baton = user_baton = apr_pcalloc(p, sizeof(*user_baton));
+ apr_thread_mutex_create(&user_baton->mutex, APR_THREAD_MUTEX_DEFAULT, p);
+ user_baton->pfds = pfds = apr_array_copy(p, user_pfds);
+ user_baton->pool = p;
+ user_baton->cs = cs;
+
+ apr_pool_pre_cleanup_register(p, user_baton, user_poll_cleanup);
+
+ pt = apr_pcalloc(p, sizeof(*pt));
+ pt->baton = user_baton;
+ pt->type = PT_USER;
+
+ if (timeout >= 0) {
+ /* Prevent the timer from firing before the pollset is updated */
+ if (timeout < TIMERS_FUDGE_TIMEOUT) {
+ timeout = TIMERS_FUDGE_TIMEOUT;
+ }
+ user_baton->cancel_event = get_timer_event(timeout, NULL, cs, 1);
+ }
+ cs->te = user_baton->cancel_event;
+
+ /* All the pollfds should be added/removed atomically, so synchronize
+ * with user_poll_cleanup().
+ */
+ apr_thread_mutex_lock(user_baton->mutex);
+ for (i = 0; i < pfds->nelts; i++) {
+ apr_pollfd_t *pfd = (apr_pollfd_t *)pfds->elts + i;
+ if (pfd->reqevents) {
+ if (pfd->reqevents & APR_POLLIN) {
+ pfd->reqevents |= APR_POLLHUP;
+ }
+ pfd->reqevents |= APR_POLLERR;
+ pfd->client_data = pt;
+
+ rc = apr_pollset_add(event_pollset, pfd);
+ if (rc != APR_SUCCESS) {
+ final_rc = rc;
+ }
+ }
+ else {
+ pfd->client_data = NULL;
+ }
+ }
+ apr_thread_mutex_unlock(user_baton->mutex);
+
+ if (final_rc) {
+ AP_DEBUG_ASSERT(0);
+ signal_threads(ST_GRACEFUL);
+ }
+ return final_rc;
+}
+
/* Put a SUSPENDED connection back into a queue. */
-static apr_status_t event_resume_suspended (conn_rec *c)
+static apr_status_t event_resume_suspended(conn_rec *c)
{
- event_conn_state_t* cs = (event_conn_state_t*) c->suspended_baton;
+ event_conn_state_t *cs = c->suspended_baton;
+
+ AP_DEBUG_ASSERT(cs != NULL);
+ AP_DEBUG_ASSERT(cs->suspended);
if (cs == NULL) {
ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02615)
"event_resume_suspended: suspended_baton is NULL");
- return APR_EGENERAL;
+ return APR_EINVAL;
}
if (!cs->suspended) {
ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02616)
- "event_resume_suspended: Thread isn't suspended");
- return APR_EGENERAL;
+ "event_resume_suspended: thread isn't suspended");
+ return APR_EINVAL;
}
-
apr_atomic_dec32(&suspended_count);
- c->suspended_baton = NULL;
+ cs->c->suspended_baton = NULL;
+ cs->c->sbh = cs->sbh;
+ cs->suspended = 0;
cs->pub.sense = CONN_SENSE_DEFAULT;
if (cs->pub.state != CONN_STATE_LINGER) {
@@ -1857,7 +1990,6 @@ static apr_status_t event_resume_suspended (conn_rec *c)
if (pollset_add(cs, CONN_SENSE_WANT_WRITE, cs->sc->wc_q, NULL)) {
return APR_SUCCESS; /* queued */
}
-
/* fall through lingering close on error */
cs->pub.state = CONN_STATE_LINGER;
}
@@ -2150,8 +2282,7 @@ static apr_thread_mutex_t *g_timer_skiplist_mtx;
static timer_event_t *get_timer_event(apr_time_t timeout,
ap_mpm_callback_fn_t *cbfn, void *baton,
- int insert,
- apr_array_header_t *pfds)
+ int insert)
{
timer_event_t *te;
apr_time_t now = (timeout < 0) ? 0 : event_time_now();
@@ -2179,7 +2310,6 @@ static timer_event_t *get_timer_event(apr_time_t timeout,
te->baton = baton;
te->when = now + timeout;
te->timeout = timeout;
- te->pfds = pfds;
if (insert) {
apr_time_t next_expiry;
@@ -2219,122 +2349,15 @@ static void put_timer_event(timer_event_t *te, int locked)
}
}
-static apr_status_t event_register_timed_callback_ex(apr_time_t timeout,
- ap_mpm_callback_fn_t *cbfn,
- void *baton,
- apr_array_header_t *pfds)
-{
- if (!cbfn) {
- return APR_EINVAL;
- }
- get_timer_event(timeout, cbfn, baton, 1, pfds);
- return APR_SUCCESS;
-}
-
static apr_status_t event_register_timed_callback(apr_time_t timeout,
ap_mpm_callback_fn_t *cbfn,
void *baton)
{
- event_register_timed_callback_ex(timeout, cbfn, baton, NULL);
- return APR_SUCCESS;
-}
-
-static apr_status_t event_cleanup_poll_callback(void *data)
-{
- apr_status_t final_rc = APR_SUCCESS;
- apr_array_header_t *pfds = data;
- int i;
-
- for (i = 0; i < pfds->nelts; i++) {
- apr_pollfd_t *pfd = (apr_pollfd_t *)pfds->elts + i;
- if (pfd->client_data) {
- apr_status_t rc;
- rc = apr_pollset_remove(event_pollset, pfd);
- if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
- final_rc = rc;
- }
- pfd->client_data = NULL;
- }
- }
-
- if (final_rc) {
- AP_DEBUG_ASSERT(0);
- signal_threads(ST_GRACEFUL);
- }
- return final_rc;
-}
-
-static apr_status_t event_register_poll_callback_ex(apr_pool_t *p,
- const apr_array_header_t *pfds,
- ap_mpm_callback_fn_t *cbfn,
- ap_mpm_callback_fn_t *tofn,
- void *baton,
- apr_time_t timeout)
-{
- listener_poll_type *pt;
- socket_callback_baton_t *scb;
- apr_status_t rc, final_rc = APR_SUCCESS;
- int i;
-
- if (!cbfn || !tofn) {
+ if (!cbfn) {
return APR_EINVAL;
}
-
- scb = apr_pcalloc(p, sizeof(*scb));
- scb->cbfunc = cbfn;
- scb->user_baton = baton;
- scb->pfds = apr_array_copy(p, pfds);
-
- pt = apr_palloc(p, sizeof(*pt));
- pt->type = PT_USER;
- pt->baton = scb;
-
- apr_pool_pre_cleanup_register(p, scb->pfds, event_cleanup_poll_callback);
-
- for (i = 0; i < scb->pfds->nelts; i++) {
- apr_pollfd_t *pfd = (apr_pollfd_t *)scb->pfds->elts + i;
- if (pfd->reqevents) {
- if (pfd->reqevents & APR_POLLIN) {
- pfd->reqevents |= APR_POLLHUP;
- }
- pfd->reqevents |= APR_POLLERR;
- pfd->client_data = pt;
- }
- else {
- pfd->client_data = NULL;
- }
- }
-
- if (timeout > 0) {
- /* Prevent the timer from firing before the pollset is updated */
- if (timeout < TIMERS_FUDGE_TIMEOUT) {
- timeout = TIMERS_FUDGE_TIMEOUT;
- }
- scb->cancel_event = get_timer_event(timeout, tofn, baton, 1, scb->pfds);
- }
- for (i = 0; i < scb->pfds->nelts; i++) {
- apr_pollfd_t *pfd = (apr_pollfd_t *)scb->pfds->elts + i;
- if (pfd->client_data) {
- rc = apr_pollset_add(event_pollset, pfd);
- if (rc != APR_SUCCESS) {
- final_rc = rc;
- }
- }
- }
- return final_rc;
-}
-
-static apr_status_t event_register_poll_callback(apr_pool_t *p,
- const apr_array_header_t *pfds,
- ap_mpm_callback_fn_t *cbfn,
- void *baton)
-{
- return event_register_poll_callback_ex(p,
- pfds,
- cbfn,
- NULL, /* no timeout function */
- baton,
- 0 /* no timeout */);
+ get_timer_event(timeout, cbfn, baton, 1);
+ return APR_SUCCESS;
}
/*
@@ -2363,11 +2386,9 @@ static void process_lingering_close(event_conn_state_t *cs)
conn_rec *c = cs->c;
int rc = OK;
- cs->pub.state = CONN_STATE_LINGER;
-
if (!cs->linger_started) {
cs->linger_started = 1; /* once! */
- notify_suspend(cs);
+ cs->pub.state = CONN_STATE_LINGER;
/* Shutdown the connection, i.e. pre_connection_close hooks,
* SSL/TLS close notify, WC bucket, etc..
@@ -2431,8 +2452,7 @@ static void process_lingering_close(event_conn_state_t *cs)
* Pre-condition: timeout_mutex must already be locked
*/
static unsigned int process_timeout_queue_ex(struct timeout_queue *queue,
- apr_time_t now,
- int shrink)
+ apr_time_t now, int shrink)
{
unsigned int count = 0;
struct timeout_queue *q;
@@ -2466,6 +2486,7 @@ static unsigned int process_timeout_queue_ex(struct timeout_queue *queue,
break;
}
}
+ cs->timed_out = 1;
if (cs_in_backlog(cs)) {
/* Remove the backlog connection from worker_queue (note that
@@ -2473,8 +2494,8 @@ static unsigned int process_timeout_queue_ex(struct timeout_queue *queue,
* the backlog_q), and unreserve/set a worker/idler since
* none could handle the event.
*/
- ap_assert(cs_qe(cs)->cb_baton == cs);
ap_assert(cs->q == cs->sc->bl_q);
+ ap_assert(cs_qe(cs)->cb_baton == cs);
ap_queue_info_idlers_inc(worker_queue_info);
ap_queue_kill_event_locked(worker_queue, cs_qe(cs));
shutdown_connection(cs, now, 1);
@@ -2588,7 +2609,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
apr_time_t next_expiry = -1;
apr_interval_time_t timeout = -1;
int workers_were_busy = 0, force_stats = 0;
- socket_callback_baton_t *user_chain;
+ struct user_poll_baton *user_chain;
const apr_pollfd_t *out_pfd;
apr_time_t now, poll_time;
event_conn_state_t *cs;
@@ -2653,24 +2674,54 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
continue;
}
+ /* A timer without a callback is a cancel event for a cs in
+ * either:
+ * 1. CONN_STATE_ASYNC_WAITIO: the timer enforces a timeout
+ * different from the cs->sc->io_q's;
+ * 2. CONN_STATE_SUSPENDED: the timer enforces a timeout for
+ * some user pollfds bound to the cs.
+ * In both cases te->baton is the (timed out) cs.
+ * For 1. we can shutdow the connection now, but for 2. we
+ * need to resume the suspended connection in a worker thread
+ * for the responsible module to know, which we do by setting
+ * CONN_STATE_LINGER but also cs->timed_out to make sure that,
+ * after the next/last ap_run_resume_connection(), this state
+ * is maintained/restored to issue the actual close.
+ */
if (!te->cbfunc) {
cs = te->baton;
+ AP_DEBUG_ASSERT(cs != NULL);
+ AP_DEBUG_ASSERT(cs->te == te);
put_timer_event(te, 1);
- ap_assert(cs && cs->te == te);
- ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
- "timed out connection %" CS_FMT,
- CS_ARG(cs));
- (void)pollset_del(cs, 0);
- kill_connection(cs, APR_TIMEUP);
- continue;
- }
+ cs->te = te = NULL;
+ cs->timed_out = 1;
+
+ if (!cs->user_baton) {
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, cs->c,
+ "timed out connection %" CS_FMT,
+ CS_ARG(cs));
+ (void)pollset_del(cs, 0);
+ shutdown_connection(cs, now, 0);
+ continue;
+ }
+
+ /* Remove all user pollfds from the pollset */
+ AP_DEBUG_ASSERT(cs->user_baton->pfds != NULL);
+ apr_pool_cleanup_run(cs->user_baton->pool, cs->user_baton,
+ user_poll_cleanup);
+#ifdef AP_DEBUG
+ memset(cs->user_baton, 0, sizeof(*cs->user_baton));
+#endif
+ cs->user_baton = NULL;
- if (te->pfds) {
- /* remove all sockets from the pollset */
- apr_pool_cleanup_run(te->pfds->pool, te->pfds,
- event_cleanup_poll_callback);
+ AP_DEBUG_ASSERT(cs->suspended);
+ cs->pub.state = CONN_STATE_LINGER;
}
- push2worker(NULL, te, now, &workers_were_busy);
+ else {
+ cs = NULL;
+ }
+
+ push2worker(cs, te, now, &workers_were_busy);
}
if (te) {
next_expiry = te->when;
@@ -2778,7 +2829,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
for (user_chain = NULL; num > 0; --num, ++out_pfd) {
listener_poll_type *pt = out_pfd->client_data;
- socket_callback_baton_t *baton;
+ struct user_poll_baton *user_baton;
switch (pt->type) {
case PT_CSD:
@@ -2894,13 +2945,13 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
case PT_USER:
/* Multiple pfds of the same baton might trigger in this pass
* so chain once here and run the cleanup only after this loop
- * to avoid lifetime issues (i.e. pfds->pool cleared while some
- * of its pfd->client_data are still to be dereferenced here).
+ * to avoid lifetime issues (i.e. user_baton->pool cleared while
+ * some of its pfd->client_data are still to be dereferenced here).
*/
- baton = pt->baton;
- if (baton != user_chain && !baton->next) {
- baton->next = user_chain;
- user_chain = baton;
+ user_baton = pt->baton;
+ if (user_baton != user_chain && !user_baton->next) {
+ user_baton->next = user_chain;
+ user_chain = user_baton;
}
break;
}
@@ -2908,27 +2959,32 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
/* Time to queue user callbacks chained above */
while (user_chain) {
- socket_callback_baton_t *baton = user_chain;
- user_chain = user_chain->next;
- baton->next = NULL;
+ struct user_poll_baton *user_baton = user_chain;
+ user_chain = user_baton->next;
+ user_baton->next = NULL;
+
+ cs = user_baton->cs;
+ AP_DEBUG_ASSERT(cs != NULL);
+ AP_DEBUG_ASSERT(cs->user_baton == user_baton);
+ AP_DEBUG_ASSERT(cs->te == user_baton->cancel_event);
+ AP_DEBUG_ASSERT(cs->pub.state == CONN_STATE_SUSPENDED);
+ AP_DEBUG_ASSERT(cs->suspended);
/* Not expirable anymore */
- if (baton->cancel_event) {
- baton->cancel_event->canceled = 1;
- baton->cancel_event = NULL;
+ if (cs->te) {
+ cs->te->canceled = 1;
+ cs->te = NULL;
}
- /* remove all sockets from the pollset */
- apr_pool_cleanup_run(baton->pfds->pool, baton->pfds,
- event_cleanup_poll_callback);
+ /* Remove all user pollfds from the pollset */
+ apr_pool_cleanup_run(user_baton->pool, user_baton,
+ user_poll_cleanup);
+#ifdef AP_DEBUG
+ memset(user_baton, 0, sizeof(*user_baton));
+#endif
- /* masquerade as a timer event that is firing */
- te = get_timer_event(-1 /* fake timer */,
- baton->cbfunc,
- baton->user_baton,
- 0, /* don't insert it */
- NULL /* no associated socket callback */);
- push2worker(NULL, te, now, &workers_were_busy);
+ /* Schedule ap_run_resume_connection() */
+ push2worker(cs, NULL, now, &workers_were_busy);
}
/* We process the timeout queues here only when the global
@@ -2959,6 +3015,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
*/
process_timeout_queue(shutdown_q, now);
+ /* No specific requirement/order for those */
process_timeout_queue(waitio_q, now);
process_timeout_queue(write_completion_q, now);
process_timeout_queue(keepalive_q, now);
@@ -4433,7 +4490,6 @@ static void setup_slave_conn(conn_rec *c, void *csd)
cs = make_conn_state(c->pool, csd);
cs->c = c;
cs->sc = mcs->sc;
- cs->suspended = 0;
cs->bucket_alloc = c->bucket_alloc;
cs->pfd = mcs->pfd;
cs->pub = mcs->pub;
@@ -5085,14 +5141,11 @@ static void event_hooks(apr_pool_t * p)
ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
APR_HOOK_MIDDLE);
- ap_hook_mpm_register_poll_callback(event_register_poll_callback,
- NULL, NULL, APR_HOOK_MIDDLE);
- ap_hook_mpm_register_poll_callback_timeout(event_register_poll_callback_ex,
- NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_post_read_request(event_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_mpm_resume_suspended(event_resume_suspended, NULL, NULL, APR_HOOK_MIDDLE);
+ ap_hook_mpm_poll_suspended(event_poll_suspended, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_pre_connection(event_pre_connection, NULL, NULL, APR_HOOK_REALLY_FIRST);
ap_hook_protocol_switch(event_protocol_switch, NULL, NULL, APR_HOOK_REALLY_FIRST);
diff --git a/server/mpm_common.c b/server/mpm_common.c
index 2973bc9f4f2..d055fa2fd99 100644
--- a/server/mpm_common.c
+++ b/server/mpm_common.c
@@ -68,10 +68,9 @@
APR_HOOK_LINK(mpm) \
APR_HOOK_LINK(mpm_query) \
APR_HOOK_LINK(mpm_register_timed_callback) \
- APR_HOOK_LINK(mpm_register_poll_callback) \
- APR_HOOK_LINK(mpm_register_poll_callback_timeout) \
APR_HOOK_LINK(mpm_get_name) \
APR_HOOK_LINK(mpm_resume_suspended) \
+ APR_HOOK_LINK(mpm_poll_suspended) \
APR_HOOK_LINK(end_generation) \
APR_HOOK_LINK(child_status) \
APR_HOOK_LINK(output_pending) \
@@ -111,16 +110,11 @@ AP_IMPLEMENT_HOOK_RUN_FIRST(apr_status_t, mpm_register_timed_callback,
AP_IMPLEMENT_HOOK_RUN_FIRST(apr_status_t, mpm_resume_suspended,
(conn_rec *c),
(c), APR_ENOTIMPL)
-AP_IMPLEMENT_HOOK_RUN_FIRST(apr_status_t, mpm_register_poll_callback,
- (apr_pool_t *p, const apr_array_header_t *pds,
- ap_mpm_callback_fn_t *cbfn, void *baton),
- (p, pds, cbfn, baton), APR_ENOTIMPL)
-AP_IMPLEMENT_HOOK_RUN_FIRST(apr_status_t, mpm_register_poll_callback_timeout,
- (apr_pool_t *p, const apr_array_header_t *pds,
- ap_mpm_callback_fn_t *cbfn,
- ap_mpm_callback_fn_t *tofn,
- void *baton, apr_time_t timeout),
- (p, pds, cbfn, tofn, baton, timeout), APR_ENOTIMPL)
+AP_IMPLEMENT_HOOK_RUN_FIRST(apr_status_t, mpm_poll_suspended,
+ (conn_rec *c, apr_pool_t *p,
+ const apr_array_header_t *pfds,
+ apr_interval_time_t timeout),
+ (c, p, pfds, timeout), APR_ENOTIMPL)
AP_IMPLEMENT_HOOK_RUN_FIRST(int, output_pending,
(conn_rec *c), (c), DECLINED)
AP_IMPLEMENT_HOOK_RUN_FIRST(int, input_pending,
@@ -573,26 +567,17 @@ AP_DECLARE(apr_status_t) ap_mpm_resume_suspended(conn_rec *c)
return ap_run_mpm_resume_suspended(c);
}
-AP_DECLARE(apr_status_t) ap_mpm_register_timed_callback(apr_time_t t,
- ap_mpm_callback_fn_t *cbfn, void *baton)
+AP_DECLARE(apr_status_t) ap_mpm_poll_suspended(conn_rec *c, apr_pool_t *p,
+ const apr_array_header_t *pfds,
+ apr_interval_time_t timeout)
{
- return ap_run_mpm_register_timed_callback(t, cbfn, baton);
+ return ap_run_mpm_poll_suspended(c, p, pfds, timeout);
}
-AP_DECLARE(apr_status_t) ap_mpm_register_poll_callback(
- apr_pool_t *p, const apr_array_header_t *pfds,
+AP_DECLARE(apr_status_t) ap_mpm_register_timed_callback(apr_time_t t,
ap_mpm_callback_fn_t *cbfn, void *baton)
{
- return ap_run_mpm_register_poll_callback(p, pfds, cbfn, baton);
-}
-
-AP_DECLARE(apr_status_t) ap_mpm_register_poll_callback_timeout(
- apr_pool_t *p, const apr_array_header_t *pfds,
- ap_mpm_callback_fn_t *cbfn, ap_mpm_callback_fn_t *tofn,
- void *baton, apr_time_t timeout)
-{
- return ap_run_mpm_register_poll_callback_timeout(p, pfds, cbfn, tofn,
- baton, timeout);
+ return ap_run_mpm_register_timed_callback(t, cbfn, baton);
}
AP_DECLARE(const char *)ap_show_mpm(void)
diff --git a/server/mpm_fdqueue.h b/server/mpm_fdqueue.h
index 29297fd60d5..4bb17c82955 100644
--- a/server/mpm_fdqueue.h
+++ b/server/mpm_fdqueue.h
@@ -89,7 +89,6 @@ struct timer_event_t
ap_mpm_callback_fn_t *cbfunc;
void *baton;
int canceled;
- apr_array_header_t *pfds;
apr_interval_time_t timeout;
};
typedef struct timer_event_t timer_event_t;
From 92d0cdd150e7cab265dbd52b01a32954333eaf8a Mon Sep 17 00:00:00 2001
From: ylavic
Date: Tue, 27 Jun 2023 01:54:48 +0200
Subject: [PATCH 22/22] mod_status: Be less racy, improve rendering, and show
suspended connections.
---
modules/generators/mod_status.c | 240 ++++++++++++++++++++------------
modules/lua/lua_request.c | 4 +-
2 files changed, 154 insertions(+), 90 deletions(-)
diff --git a/modules/generators/mod_status.c b/modules/generators/mod_status.c
index 5ff635cc96e..20187af882b 100644
--- a/modules/generators/mod_status.c
+++ b/modules/generators/mod_status.c
@@ -71,6 +71,7 @@
#define APR_WANT_STRFUNC
#include "apr_want.h"
#include "apr_strings.h"
+#include "apr_atomic.h"
#define STATUS_MAXLINE 64
@@ -199,10 +200,15 @@ static int status_handler(request_rec *r)
int short_report;
int no_table_report;
global_score *global_record;
- worker_score *ws_record;
+ volatile process_score *ps;
process_score *ps_record;
+ worker_score *ws_record;
char *stat_buffer;
- pid_t *pid_buffer, worker_pid;
+ pid_t worker_pid;
+ struct {
+ pid_t pid;
+ ap_generation_t gen;
+ } *proc_buffer;
int *thread_idle_buffer = NULL;
int *thread_graceful_buffer = NULL;
int *thread_busy_buffer = NULL;
@@ -249,7 +255,7 @@ static int status_handler(request_rec *r)
return HTTP_INTERNAL_SERVER_ERROR;
}
- pid_buffer = apr_palloc(r->pool, server_limit * sizeof(pid_t));
+ proc_buffer = apr_palloc(r->pool, server_limit * sizeof(*proc_buffer));
stat_buffer = apr_palloc(r->pool, server_limit * thread_limit * sizeof(char));
if (is_async) {
thread_idle_buffer = apr_palloc(r->pool, server_limit * sizeof(int));
@@ -311,6 +317,7 @@ static int status_handler(request_rec *r)
}
}
+ ps_record = apr_palloc(r->pool, sizeof *ps_record);
ws_record = apr_palloc(r->pool, sizeof *ws_record);
for (i = 0; i < server_limit; ++i) {
@@ -319,7 +326,15 @@ static int status_handler(request_rec *r)
clock_t tmp_tu, tmp_ts, tmp_tcu, tmp_tcs;
#endif
- ps_record = ap_get_scoreboard_process(i);
+ /* Snapshot all in one go */
+ ps = ap_get_scoreboard_process(i);
+ do {
+ proc_buffer[i].pid = ps->pid;
+ proc_buffer[i].gen = ps->generation;
+ memcpy(ps_record, (void *)ps, sizeof(*ps_record));
+ } while (ps_record->generation != proc_buffer[i].gen
+ || ps_record->pid != proc_buffer[i].pid);
+
if (is_async) {
thread_idle_buffer[i] = 0;
thread_graceful_buffer[i] = 0;
@@ -328,7 +343,12 @@ static int status_handler(request_rec *r)
for (j = 0; j < thread_limit; ++j) {
int indx = (i * thread_limit) + j;
- ap_copy_scoreboard_worker(ws_record, i, j);
+ if (ps_record->pid) {
+ ap_copy_scoreboard_worker(ws_record, i, j);
+ }
+ else {
+ memset(ws_record, 0, sizeof(*ws_record));
+ }
res = ws_record->status;
if ((i >= max_servers || j >= threads_per_child)
@@ -337,8 +357,8 @@ static int status_handler(request_rec *r)
else
stat_buffer[indx] = status_flags[res];
- if (!ps_record->quiescing
- && ps_record->pid) {
+ if (ps_record->pid
+ && !ps_record->quiescing) {
if (res == SERVER_READY) {
if (ps_record->generation == mpm_generation)
idle++;
@@ -410,7 +430,6 @@ static int status_handler(request_rec *r)
tcu += proc_tcu;
tcs += proc_tcs;
#endif
- pid_buffer[i] = ps_record->pid;
}
/* up_time in seconds */
@@ -426,14 +445,15 @@ static int status_handler(request_rec *r)
"Apache Server Status for ", r);
ap_rvputs(r, ap_escape_html(r->pool, ap_get_server_name(r)),
" (via ", r->connection->local_ip,
- ")
\n\n", NULL);
- ap_rvputs(r, "- Server Version: ",
+ ")\n", NULL);
+ ap_rvputs(r, "
\n- Server Version: ",
ap_get_server_description(), "
\n", NULL);
- ap_rvputs(r, "- Server MPM: ",
- ap_show_mpm(), "
\n", NULL);
ap_rvputs(r, "- Server Built: ",
- ap_get_server_built(), "\n
\n", NULL);
- ap_rvputs(r, "- Current Time: ",
+ ap_get_server_built(), "
\n", NULL);
+ ap_rvputs(r, "- Server MPM: ",
+ ap_show_mpm(), "
\n
\n"
+ "
\n", NULL);
+ ap_rvputs(r, "\n- Current Time: ",
ap_ht_time(r->pool, nowtime, DEFAULT_TIME_FORMAT, 0),
"
\n", NULL);
ap_rvputs(r, "- Restart Time: ",
@@ -561,97 +581,131 @@ static int status_handler(request_rec *r)
ap_rprintf(r, "BusyWorkers: %d\nGracefulWorkers: %d\nIdleWorkers: %d\n", busy, graceful, idle);
if (!short_report)
- ap_rputs("
", r);
+ ap_rputs("
\n", r);
if (is_async) {
- int wait_io = 0, write_completion = 0, shutdown = 0, lingering_close = 0,
- keep_alive = 0, connections = 0, stopping = 0, procs = 0;
+ apr_uint32_t procs = 0, stopping = 0, accepting = 0,
+ connections = 0, backlog = 0, wait_io = 0, writing = 0,
+ keep_alive = 0, shutdown = 0, suspended = 0, closing = 0;
if (!short_report)
- ap_rputs("\n\n\n"
- "Slot | "
- "PID | "
- "Stopping | "
- "Connections | \n"
+ ap_rputs("\n"
+ "Processes | "
"Threads | "
- "Async connections |
\n"
- "total | accepting | "
- "busy | graceful | idle | "
- "wait-io | writing | keep-alive | "
- "shutdown | closing |
\n", r);
+ "Connections | "
+ "Async queues | \n"
+ "Slot | PID | stopping | accepting | "
+ "idle | busy | graceful | "
+ "total | backlog | "
+ "wait-io | writing | "
+ "keep-alive | shutdown | "
+ "suspended | closing |
\n",
+ r);
for (i = 0; i < server_limit; ++i) {
- ps_record = ap_get_scoreboard_process(i);
- if (ps_record->pid) {
+ ps = ap_get_scoreboard_process(i);
+ if (!proc_buffer[i].pid
+ || ps->pid != proc_buffer[i].pid
+ || ps->generation != proc_buffer[i].gen) {
+ continue;
+ }
+
+ /* Still the same as what we accounted for earlier? */
+ memcpy(ps_record, (void *)ps, sizeof(*ps_record));
+ if (ps_record->pid == proc_buffer[i].pid
+ && ps_record->generation == proc_buffer[i].gen) {
connections += ps_record->connections;
+ backlog += ps_record->backlog;
wait_io += ps_record->wait_io;
- write_completion += ps_record->write_completion;
+ writing += ps_record->write_completion;
keep_alive += ps_record->keep_alive;
shutdown += ps_record->shutdown;
- lingering_close += ps_record->lingering_close;
+ suspended += ps_record->suspended;
+ closing += ps_record->lingering_close;
procs++;
if (ps_record->quiescing) {
stopping++;
}
+ if (!ps_record->not_accepting) {
+ accepting++;
+ }
if (!short_report) {
const char *dying = "no";
const char *old = "";
+ const char *listening = "yes";
if (ps_record->quiescing) {
dying = "yes";
}
- if (ps_record->generation != mpm_generation)
+ if (ps_record->generation != mpm_generation) {
old = " (old gen)";
+ }
+ if (ps_record->not_accepting) {
+ listening = "no";
+ }
ap_rprintf(r, "%u | %" APR_PID_T_FMT " | "
- "%s%s | "
- "%u | %s | "
- "%u | %u | %u | %u | "
- "%u | %u | %u | %u | "
- "
\n",
+ "%s%s | %s | "
+ "%d | %d | %d | "
+ "%u | %u | "
+ "%u | %u | "
+ "%u | %u | "
+ "%u | %u | \n",
i, ps_record->pid,
- dying, old,
- ps_record->connections,
- ps_record->not_accepting ? "no" : "yes",
+ dying, old, listening,
+ thread_idle_buffer[i],
thread_busy_buffer[i],
thread_graceful_buffer[i],
- thread_idle_buffer[i],
+ ps_record->connections,
+ ps_record->backlog,
ps_record->wait_io,
ps_record->write_completion,
ps_record->keep_alive,
ps_record->shutdown,
+ ps_record->suspended,
ps_record->lingering_close);
}
}
}
if (!short_report) {
ap_rprintf(r, "Sum | "
- "%d | %d | "
- "%d | | "
- "%d | %d | %d | %d | "
- "%d | %d | %d | %d | "
- "
\n
\n",
- procs, stopping,
- connections,
- busy, graceful, idle,
- wait_io, write_completion, keep_alive,
- shutdown, lingering_close);
+ "%u | %u | %u | "
+ "%u | %u | %u | "
+ "%u | %u | "
+ "%u | %u | "
+ "%u | %u | "
+ "%u | %u |
\n"
+ "
\n",
+ procs, stopping, accepting,
+ idle, busy, graceful,
+ connections, backlog,
+ wait_io, writing,
+ keep_alive, shutdown,
+ suspended, closing);
}
else {
- ap_rprintf(r, "Processes: %d\n"
- "Stopping: %d\n"
- "ConnsTotal: %d\n"
- "ConnsAsyncWaitIO: %d\n"
- "ConnsAsyncWriting: %d\n"
- "ConnsAsyncKeepAlive: %d\n"
- "ConnsAsyncShutdown: %d\n"
- "ConnsAsyncClosing: %d\n",
- procs, stopping,
- connections,
- wait_io, write_completion, keep_alive,
- shutdown, lingering_close);
+ ap_rprintf(r, "Processes: %u\n"
+ "Stopping: %u\n"
+ "Accepting: %u\n"
+ "ThreadsIdle: %u\n"
+ "ThreadsBusy: %u\n"
+ "ThreadsGraceful: %u\n"
+ "ConnsTotal: %u\n"
+ "ConnsBacklog: %u\n"
+ "ConnsAsyncWaitIO: %u\n"
+ "ConnsAsyncWriting: %u\n"
+ "ConnsAsyncKeepAlive: %u\n"
+ "ConnsAsyncShutdown: %u\n"
+ "ConnsAsyncSuspended: %u\n"
+ "ConnsAsyncClosing: %u\n",
+ procs, stopping, accepting,
+ busy, idle, graceful,
+ connections, backlog,
+ wait_io, writing,
+ keep_alive, shutdown,
+ suspended, closing);
}
}
/* send the scoreboard 'table' out */
if (!short_report)
- ap_rputs("", r);
+ ap_rputs("\n", r);
else
ap_rputs("Scoreboard: ", r);
@@ -673,11 +727,11 @@ static int status_handler(request_rec *r)
if (short_report)
ap_rputs("\n", r);
else {
- ap_rputs("
\n"
+ ap_rputs("\n
\n"
"Scoreboard Key:
\n"
"\"_
\" Waiting for Connection, \n"
"\"S
\" Starting up, \n"
- "\"R
\" Reading Request,
\n"
+ "\"R
\" Waiting I/O,
\n"
"\"W
\" Sending Reply, \n"
"\"K
\" Keepalive (read), \n"
"\"D
\" DNS Lookup,
\n"
@@ -690,17 +744,21 @@ static int status_handler(request_rec *r)
if (!ap_extended_status) {
int j;
int k = 0;
- ap_rputs("PID Key:
\n"
+ ap_rputs("
PID Key:
\n"
"
\n", r);
for (i = 0; i < server_limit; ++i) {
+ ps = ap_get_scoreboard_process(i);
+ if (!proc_buffer[i].pid
+ || ps->pid != proc_buffer[i].pid
+ || ps->generation != proc_buffer[i].gen) {
+ continue;
+ }
for (j = 0; j < thread_limit; ++j) {
int indx = (i * thread_limit) + j;
- if (stat_buffer[indx] != '.') {
- ap_rprintf(r, " %" APR_PID_T_FMT
- " in state: %c ", pid_buffer[i],
- stat_buffer[indx]);
-
+ if (stat_buffer[indx] != status_flags[SERVER_DISABLED]) {
+ ap_rprintf(r, " %8" APR_PID_T_FMT " in state: %c ",
+ proc_buffer[i].pid, stat_buffer[indx]);
if (++k >= 3) {
ap_rputs("\n", r);
k = 0;
@@ -709,17 +767,16 @@ static int status_handler(request_rec *r)
}
}
}
-
- ap_rputs("\n"
- "
\n", r);
+ ap_rvputs(r, k ? "\n" : "", "\n", "
\n", NULL);
}
}
if (ap_extended_status && !short_report) {
if (no_table_report)
- ap_rputs("
Server Details
\n\n", r);
+ ap_rputs("
\nServer Details
\n", r);
else
- ap_rputs("\n\n"
+ ap_rputs("
\n"
+ ""
"Srv | PID | Acc | "
"M | "
#ifdef HAVE_TIMES
@@ -728,9 +785,16 @@ static int status_handler(request_rec *r)
"SS | Req | Dur | "
"Conn | Child | Slot | "
"Client | Protocol | VHost | "
- "Request |
\n\n", r);
+ "Request | \n", r);
for (i = 0; i < server_limit; ++i) {
+ ps = ap_get_scoreboard_process(i);
+ if (!proc_buffer[i].pid
+ || ps->pid != proc_buffer[i].pid
+ || ps->generation != proc_buffer[i].gen) {
+ continue;
+ }
+
for (j = 0; j < thread_limit; ++j) {
ap_copy_scoreboard_worker(ws_record, i, j);
@@ -740,8 +804,6 @@ static int status_handler(request_rec *r)
continue;
}
- ps_record = ap_get_scoreboard_process(i);
-
if (ws_record->start_time == 0L)
req_time = 0L;
else
@@ -763,8 +825,8 @@ static int status_handler(request_rec *r)
worker_generation = ws_record->generation;
}
else {
- worker_pid = ps_record->pid;
- worker_generation = ps_record->generation;
+ worker_pid = proc_buffer[i].pid;
+ worker_generation = proc_buffer[i].gen;
}
if (no_table_report) {
@@ -842,7 +904,7 @@ static int status_handler(request_rec *r)
format_byte_out(r, bytes);
ap_rputs(")\n", r);
ap_rprintf(r,
- " %s {%s} (%s) [%s]
\n\n",
+ " %s {%s} (%s) [%s]
\n",
ap_escape_html(r->pool,
ws_record->client64),
ap_escape_html(r->pool,
@@ -929,7 +991,7 @@ static int status_handler(request_rec *r)
(float)bytes / MBYTE);
ap_rprintf(r, "%s | %s | %s | "
- "%s | \n\n",
+ "%s | \n",
ap_escape_html(r->pool,
ws_record->client64),
ap_escape_html(r->pool,
@@ -945,7 +1007,7 @@ static int status_handler(request_rec *r)
if (!no_table_report) {
ap_rputs("
\n \
-
\
+\n \
\n \
Srv | Child Server number - generation |
\n \
PID | OS process ID |
\n \
@@ -962,13 +1024,15 @@ static int status_handler(request_rec *r)
Conn | Kilobytes transferred this connection |
\n \
Child | Megabytes transferred this child |
\n \
Slot | Total megabytes transferred this slot |
\n \
-
\n", r);
+
\n \
+", r);
}
} /* if (ap_extended_status && !short_report) */
else {
if (!short_report) {
- ap_rputs("
To obtain a full report with current status "
+ ap_rputs("
\n"
+ "To obtain a full report with current status "
"information you need to use the "
"ExtendedStatus On
directive.\n", r);
}
@@ -986,7 +1050,7 @@ static int status_handler(request_rec *r)
if (!short_report) {
ap_rputs(ap_psignature("
\n",r), r);
- ap_rputs("