diff --git a/Documentation/technical/api-trace2.txt b/Documentation/technical/api-trace2.txt index b9f3198fbe7..ef7fe02a8f7 100644 --- a/Documentation/technical/api-trace2.txt +++ b/Documentation/technical/api-trace2.txt @@ -613,6 +613,46 @@ stopping after the waitpid() and includes OS process creation overhead). So this time will be slightly larger than the atexit time reported by the child process itself. +`"child_ready"`:: + This event is generated after the current process has started + a background process and released all handles to it. ++ +------------ +{ + "event":"child_ready", + ... + "child_id":2, + "pid":14708, # child PID + "ready":"ready", # child ready state + "t_rel":0.110605 # observed run-time of child process +} +------------ ++ +Note that the session-id of the child process is not available to +the current/spawning process, so the child's PID is reported here as +a hint for post-processing. (But it is only a hint because the child +process may be a shell script which doesn't have a session-id.) ++ +This event is generated after the child is started in the background +and given a little time to boot up and start working. If the child +startups normally and while the parent is still waiting, the "ready" +field will have the value "ready". +If the child is too slow to start and the parent times out, the field +will have the value "timeout". +If the child starts but the parent is unable to probe it, the field +will have the value "error". ++ +After the parent process emits this event, it will release all of its +handles to the child process and treat the child as a background +daemon. So even if the child does eventually finish booting up, +the parent will not emit an updated event. ++ +Note that the `t_rel` field contains the observed run time in seconds +when the parent released the child process into the background. +The child is assumed to be a long-running daemon process and may +outlive the parent process. So the parent's child event times should +not be compared to the child's atexit times. + `"exec"`:: This event is generated before git attempts to `exec()` another command rather than starting a child process. diff --git a/compat/simple-ipc/ipc-unix-socket.c b/compat/simple-ipc/ipc-unix-socket.c index 1927e6ef4bc..4e28857a0a1 100644 --- a/compat/simple-ipc/ipc-unix-socket.c +++ b/compat/simple-ipc/ipc-unix-socket.c @@ -168,7 +168,8 @@ void ipc_client_close_connection(struct ipc_client_connection *connection) int ipc_client_send_command_to_connection( struct ipc_client_connection *connection, - const char *message, struct strbuf *answer) + const char *message, size_t message_len, + struct strbuf *answer) { int ret = 0; @@ -176,7 +177,7 @@ int ipc_client_send_command_to_connection( trace2_region_enter("ipc-client", "send-command", NULL); - if (write_packetized_from_buf_no_flush(message, strlen(message), + if (write_packetized_from_buf_no_flush(message, message_len, connection->fd) < 0 || packet_flush_gently(connection->fd) < 0) { ret = error(_("could not send IPC command")); @@ -197,7 +198,8 @@ int ipc_client_send_command_to_connection( int ipc_client_send_command(const char *path, const struct ipc_client_connect_options *options, - const char *message, struct strbuf *answer) + const char *message, size_t message_len, + struct strbuf *answer) { int ret = -1; enum ipc_active_state state; @@ -208,7 +210,9 @@ int ipc_client_send_command(const char *path, if (state != IPC_STATE__LISTENING) return ret; - ret = ipc_client_send_command_to_connection(connection, message, answer); + ret = ipc_client_send_command_to_connection(connection, + message, message_len, + answer); ipc_client_close_connection(connection); @@ -503,7 +507,7 @@ static int worker_thread__do_io( if (ret >= 0) { ret = worker_thread_data->server_data->application_cb( worker_thread_data->server_data->application_data, - buf.buf, do_io_reply_callback, &reply_data); + buf.buf, buf.len, do_io_reply_callback, &reply_data); packet_flush_gently(reply_data.fd); } diff --git a/compat/simple-ipc/ipc-win32.c b/compat/simple-ipc/ipc-win32.c index 8dc7bda087d..20ea7b65e0b 100644 --- a/compat/simple-ipc/ipc-win32.c +++ b/compat/simple-ipc/ipc-win32.c @@ -3,6 +3,8 @@ #include "strbuf.h" #include "pkt-line.h" #include "thread-utils.h" +#include "accctrl.h" +#include "aclapi.h" #ifndef SUPPORTS_SIMPLE_IPC /* @@ -49,6 +51,9 @@ static enum ipc_active_state get_active_state(wchar_t *pipe_path) if (GetLastError() == ERROR_FILE_NOT_FOUND) return IPC_STATE__PATH_NOT_FOUND; + trace2_data_intmax("ipc-debug", NULL, "getstate/waitpipe/gle", + (intmax_t)GetLastError()); + return IPC_STATE__OTHER_ERROR; } @@ -109,9 +114,15 @@ static enum ipc_active_state connect_to_server( t_start_ms = (DWORD)(getnanotime() / 1000000); if (!WaitNamedPipeW(wpath, timeout_ms)) { - if (GetLastError() == ERROR_SEM_TIMEOUT) + DWORD gleWait = GetLastError(); + + if (gleWait == ERROR_SEM_TIMEOUT) return IPC_STATE__NOT_LISTENING; + trace2_data_intmax("ipc-debug", NULL, + "connect/waitpipe/gle", + (intmax_t)gleWait); + return IPC_STATE__OTHER_ERROR; } @@ -133,17 +144,31 @@ static enum ipc_active_state connect_to_server( break; /* try again */ default: + trace2_data_intmax("ipc-debug", NULL, + "connect/createfile/gle", + (intmax_t)gle); + return IPC_STATE__OTHER_ERROR; } } if (!SetNamedPipeHandleState(hPipe, &mode, NULL, NULL)) { + gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, + "connect/setpipestate/gle", + (intmax_t)gle); + CloseHandle(hPipe); return IPC_STATE__OTHER_ERROR; } *pfd = _open_osfhandle((intptr_t)hPipe, O_RDWR|O_BINARY); if (*pfd < 0) { + gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, + "connect/openosfhandle/gle", + (intmax_t)gle); + CloseHandle(hPipe); return IPC_STATE__OTHER_ERROR; } @@ -208,7 +233,8 @@ void ipc_client_close_connection(struct ipc_client_connection *connection) int ipc_client_send_command_to_connection( struct ipc_client_connection *connection, - const char *message, struct strbuf *answer) + const char *message, size_t message_len, + struct strbuf *answer) { int ret = 0; @@ -216,7 +242,7 @@ int ipc_client_send_command_to_connection( trace2_region_enter("ipc-client", "send-command", NULL); - if (write_packetized_from_buf_no_flush(message, strlen(message), + if (write_packetized_from_buf_no_flush(message, message_len, connection->fd) < 0 || packet_flush_gently(connection->fd) < 0) { ret = error(_("could not send IPC command")); @@ -239,7 +265,8 @@ int ipc_client_send_command_to_connection( int ipc_client_send_command(const char *path, const struct ipc_client_connect_options *options, - const char *message, struct strbuf *response) + const char *message, size_t message_len, + struct strbuf *response) { int ret = -1; enum ipc_active_state state; @@ -250,7 +277,9 @@ int ipc_client_send_command(const char *path, if (state != IPC_STATE__LISTENING) return ret; - ret = ipc_client_send_command_to_connection(connection, message, response); + ret = ipc_client_send_command_to_connection(connection, + message, message_len, + response); ipc_client_close_connection(connection); @@ -458,7 +487,7 @@ static int do_io(struct ipc_server_thread_data *server_thread_data) if (ret >= 0) { ret = server_thread_data->server_data->application_cb( server_thread_data->server_data->application_data, - buf.buf, do_io_reply_callback, &reply_data); + buf.buf, buf.len, do_io_reply_callback, &reply_data); packet_flush_gently(reply_data.fd); @@ -565,11 +594,132 @@ static void *server_thread_proc(void *_server_thread_data) return NULL; } +/* + * We need to build a Windows "SECURITY_ATTRIBUTES" object and use it + * to apply an ACL when we create the initial instance of the Named + * Pipe. The construction is somewhat involved and consists of + * several sequential steps and intermediate objects. + * + * We use this structure to hold these intermediate pointers so that + * we can free them as a group. (It is unclear from the docs whether + * some of these intermediate pointers can be freed before we are + * finished using the "lpSA" member.) + */ +struct my_sa_data +{ + PSID pEveryoneSID; + PACL pACL; + PSECURITY_DESCRIPTOR pSD; + LPSECURITY_ATTRIBUTES lpSA; +}; + +static void init_sa(struct my_sa_data *d) +{ + memset(d, 0, sizeof(*d)); +} + +static void release_sa(struct my_sa_data *d) +{ + if (d->pEveryoneSID) + FreeSid(d->pEveryoneSID); + if (d->pACL) + LocalFree(d->pACL); + if (d->pSD) + LocalFree(d->pSD); + if (d->lpSA) + LocalFree(d->lpSA); + + memset(d, 0, sizeof(*d)); +} + +/* + * Create SECURITY_ATTRIBUTES to apply to the initial named pipe. The + * creator of the first server instance gets to set the ACLs on it. + * + * We allow the well-known group `EVERYONE` to have read+write access + * to the named pipe so that clients can send queries to the daemon + * and receive the response. + * + * Normally, this is not necessary since the daemon is usually + * automatically started by a foreground command like `git status`, + * but in those cases where an elevated Git command started the daemon + * (such that the daemon itself runs with elevation), we need to add + * the ACL so that non-elevated commands can write to it. + * + * The following document was helpful: + * https://docs.microsoft.com/en-us/windows/win32/secauthz/creating-a-security-descriptor-for-a-new-object-in-c-- + * + * Returns d->lpSA set to a SA or NULL. + */ +static LPSECURITY_ATTRIBUTES get_sa(struct my_sa_data *d) +{ + SID_IDENTIFIER_AUTHORITY sid_auth_world = SECURITY_WORLD_SID_AUTHORITY; +#define NR_EA (1) + EXPLICIT_ACCESS ea[NR_EA]; + DWORD dwResult; + + if (!AllocateAndInitializeSid(&sid_auth_world, 1, + SECURITY_WORLD_RID, 0,0,0,0,0,0,0, + &d->pEveryoneSID)) { + DWORD gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, "alloc-world-sid/gle", + (intmax_t)gle); + goto fail; + } + + memset(ea, 0, NR_EA * sizeof(EXPLICIT_ACCESS)); + + ea[0].grfAccessPermissions = GENERIC_READ | GENERIC_WRITE; + ea[0].grfAccessMode = SET_ACCESS; + ea[0].grfInheritance = NO_INHERITANCE; + ea[0].Trustee.MultipleTrusteeOperation = NO_MULTIPLE_TRUSTEE; + ea[0].Trustee.TrusteeForm = TRUSTEE_IS_SID; + ea[0].Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP; + ea[0].Trustee.ptstrName = (LPTSTR)d->pEveryoneSID; + + dwResult = SetEntriesInAcl(NR_EA, ea, NULL, &d->pACL); + if (dwResult != ERROR_SUCCESS) { + DWORD gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, "set-acl-entry/gle", + (intmax_t)gle); + trace2_data_intmax("ipc-debug", NULL, "set-acl-entry/dw", + (intmax_t)dwResult); + goto fail; + } + + d->pSD = (PSECURITY_DESCRIPTOR)LocalAlloc( + LPTR, SECURITY_DESCRIPTOR_MIN_LENGTH); + if (!InitializeSecurityDescriptor(d->pSD, SECURITY_DESCRIPTOR_REVISION)) { + DWORD gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, "init-sd/gle", (intmax_t)gle); + goto fail; + } + + if (!SetSecurityDescriptorDacl(d->pSD, TRUE, d->pACL, FALSE)) { + DWORD gle = GetLastError(); + trace2_data_intmax("ipc-debug", NULL, "set-sd-dacl/gle", (intmax_t)gle); + goto fail; + } + + d->lpSA = (LPSECURITY_ATTRIBUTES)LocalAlloc(LPTR, sizeof(SECURITY_ATTRIBUTES)); + d->lpSA->nLength = sizeof(SECURITY_ATTRIBUTES); + d->lpSA->lpSecurityDescriptor = d->pSD; + d->lpSA->bInheritHandle = FALSE; + + return d->lpSA; + +fail: + release_sa(d); + return NULL; +} + static HANDLE create_new_pipe(wchar_t *wpath, int is_first) { HANDLE hPipe; DWORD dwOpenMode, dwPipeMode; - LPSECURITY_ATTRIBUTES lpsa = NULL; + struct my_sa_data my_sa_data; + + init_sa(&my_sa_data); dwOpenMode = PIPE_ACCESS_INBOUND | PIPE_ACCESS_OUTBOUND | FILE_FLAG_OVERLAPPED; @@ -585,20 +735,15 @@ static HANDLE create_new_pipe(wchar_t *wpath, int is_first) * set the ACL / Security Attributes on the named * pipe; subsequent instances inherit and cannot * change them. - * - * TODO Should we allow the application layer to - * specify security attributes, such as `LocalService` - * or `LocalSystem`, when we create the named pipe? - * This question is probably not important when the - * daemon is started by a foreground user process and - * only needs to talk to the current user, but may be - * if the daemon is run via the Control Panel as a - * System Service. */ + get_sa(&my_sa_data); } hPipe = CreateNamedPipeW(wpath, dwOpenMode, dwPipeMode, - PIPE_UNLIMITED_INSTANCES, 1024, 1024, 0, lpsa); + PIPE_UNLIMITED_INSTANCES, 1024, 1024, 0, + my_sa_data.lpSA); + + release_sa(&my_sa_data); return hPipe; } diff --git a/run-command.c b/run-command.c index e4862b8f46d..7ef5cc712a9 100644 --- a/run-command.c +++ b/run-command.c @@ -1874,3 +1874,132 @@ void prepare_other_repo_env(struct strvec *env_array, const char *new_git_dir) } strvec_pushf(env_array, "%s=%s", GIT_DIR_ENVIRONMENT, new_git_dir); } + +enum start_bg_result start_bg_command(struct child_process *cmd, + start_bg_wait_cb *wait_cb, + void *cb_data, + unsigned int timeout_sec) +{ + enum start_bg_result sbgr = SBGR_ERROR; + int ret; + int wait_status; + pid_t pid_seen; + time_t time_limit; + + /* + * We do not allow clean-on-exit because the child process + * should persist in the background and possibly/probably + * after this process exits. So we don't want to kill the + * child during our atexit routine. + */ + if (cmd->clean_on_exit) + BUG("start_bg_command() does not allow non-zero clean_on_exit"); + + if (!cmd->trace2_child_class) + cmd->trace2_child_class = "background"; + + ret = start_command(cmd); + if (ret) { + /* + * We assume that if `start_command()` fails, we + * either get a complete `trace2_child_start() / + * trace2_child_exit()` pair or it fails before the + * `trace2_child_start()` is emitted, so we do not + * need to worry about it here. + * + * We also assume that `start_command()` does not add + * us to the cleanup list. And that it calls + * calls `child_process_clear()`. + */ + sbgr = SBGR_ERROR; + goto done; + } + + time(&time_limit); + time_limit += timeout_sec; + +wait: + pid_seen = waitpid(cmd->pid, &wait_status, WNOHANG); + + if (!pid_seen) { + /* + * The child is currently running. Ask the callback + * if the child is ready to do work or whether we + * should keep waiting for it to boot up. + */ + ret = (*wait_cb)(cmd, cb_data); + if (!ret) { + /* + * The child is running and "ready". + */ + trace2_child_ready(cmd, "ready"); + sbgr = SBGR_READY; + goto done; + } else if (ret > 0) { + /* + * The callback said to give it more time to boot up + * (subject to our timeout limit). + */ + time_t now; + + time(&now); + if (now < time_limit) + goto wait; + + /* + * Our timeout has expired. We don't try to + * kill the child, but rather let it continue + * (hopefully) trying to startup. + */ + trace2_child_ready(cmd, "timeout"); + sbgr = SBGR_TIMEOUT; + goto done; + } else { + /* + * The cb gave up on this child. It is still running, + * but our cb got an error trying to probe it. + */ + trace2_child_ready(cmd, "error"); + sbgr = SBGR_CB_ERROR; + goto done; + } + } + + else if (pid_seen == cmd->pid) { + int child_code = -1; + + /* + * The child started, but exited or was terminated + * before becoming "ready". + * + * We try to match the behavior of `wait_or_whine()` + * WRT the handling of WIFSIGNALED() and WIFEXITED() + * and convert the child's status to a return code for + * tracing purposes and emit the `trace2_child_exit()` + * event. + * + * We do not want the wait_or_whine() error message + * because we will be called by client-side library + * routines. + */ + if (WIFEXITED(wait_status)) + child_code = WEXITSTATUS(wait_status); + else if (WIFSIGNALED(wait_status)) + child_code = WTERMSIG(wait_status) + 128; + trace2_child_exit(cmd, child_code); + + sbgr = SBGR_DIED; + goto done; + } + + else if (pid_seen < 0 && errno == EINTR) + goto wait; + + trace2_child_exit(cmd, -1); + sbgr = SBGR_ERROR; + +done: + child_process_clear(cmd); + invalidate_lstat_cache(); + return sbgr; +} diff --git a/run-command.h b/run-command.h index 5e544acf4bc..49878262584 100644 --- a/run-command.h +++ b/run-command.h @@ -510,4 +510,61 @@ int run_processes_parallel_tr2(int n, get_next_task_fn, start_failure_fn, */ void prepare_other_repo_env(struct strvec *env_array, const char *new_git_dir); +/** + * Possible return values for start_bg_command(). + */ +enum start_bg_result { + /* child process is "ready" */ + SBGR_READY = 0, + + /* child process could not be started */ + SBGR_ERROR, + + /* callback error when testing for "ready" */ + SBGR_CB_ERROR, + + /* timeout expired waiting for child to become "ready" */ + SBGR_TIMEOUT, + + /* child process exited or was signalled before becomming "ready" */ + SBGR_DIED, +}; + +/** + * Callback used by start_bg_command() to ask whether the + * child process is ready or needs more time to become "ready". + * + * The callback will receive the cmd and cb_data arguments given to + * start_bg_command(). + * + * Returns 1 is child needs more time (subject to the requested timeout). + * Returns 0 if child is "ready". + * Returns -1 on any error and cause start_bg_command() to also error out. + */ +typedef int(start_bg_wait_cb)(const struct child_process *cmd, void *cb_data); + +/** + * Start a command in the background. Wait long enough for the child + * to become "ready" (as defined by the provided callback). Capture + * immediate errors (like failure to start) and any immediate exit + * status (such as a shutdown/signal before the child became "ready") + * and return this like start_command(). + * + * We run a custom wait loop using the provided callback to wait for + * the child to start and become "ready". This is limited by the given + * timeout value. + * + * If the child does successfully start and become "ready", we orphan + * it into the background. + * + * The caller must not call finish_command(). + * + * The opaque cb_data argument will be forwarded to the callback for + * any instance data that it might require. This may be NULL. + */ +enum start_bg_result start_bg_command(struct child_process *cmd, + start_bg_wait_cb *wait_cb, + void *cb_data, + unsigned int timeout_sec); + #endif diff --git a/simple-ipc.h b/simple-ipc.h index 08b2908d5f8..a849d9f8411 100644 --- a/simple-ipc.h +++ b/simple-ipc.h @@ -5,13 +5,6 @@ * See Documentation/technical/api-simple-ipc.txt */ -#ifdef SUPPORTS_SIMPLE_IPC -#include "pkt-line.h" - -/* - * Simple IPC Client Side API. - */ - enum ipc_active_state { /* * The pipe/socket exists and the daemon is waiting for connections. @@ -43,6 +36,13 @@ enum ipc_active_state { IPC_STATE__OTHER_ERROR, }; +#ifdef SUPPORTS_SIMPLE_IPC +#include "pkt-line.h" + +/* + * Simple IPC Client Side API. + */ + struct ipc_client_connect_options { /* * Spin under timeout if the server is running but can't @@ -103,7 +103,8 @@ void ipc_client_close_connection(struct ipc_client_connection *connection); */ int ipc_client_send_command_to_connection( struct ipc_client_connection *connection, - const char *message, struct strbuf *answer); + const char *message, size_t message_len, + struct strbuf *answer); /* * Used by the client to synchronously connect and send and receive a @@ -115,7 +116,8 @@ int ipc_client_send_command_to_connection( */ int ipc_client_send_command(const char *path, const struct ipc_client_connect_options *options, - const char *message, struct strbuf *answer); + const char *message, size_t message_len, + struct strbuf *answer); /* * Simple IPC Server Side API. @@ -140,6 +142,7 @@ typedef int (ipc_server_reply_cb)(struct ipc_server_reply_data *, */ typedef int (ipc_server_application_cb)(void *application_data, const char *request, + size_t request_len, ipc_server_reply_cb *reply_cb, struct ipc_server_reply_data *reply_data); diff --git a/t/helper/test-simple-ipc.c b/t/helper/test-simple-ipc.c index 42040ef81b1..28365ff85b6 100644 --- a/t/helper/test-simple-ipc.c +++ b/t/helper/test-simple-ipc.c @@ -9,6 +9,7 @@ #include "parse-options.h" #include "thread-utils.h" #include "strvec.h" +#include "run-command.h" #ifndef SUPPORTS_SIMPLE_IPC int cmd__simple_ipc(int argc, const char **argv) @@ -112,7 +113,7 @@ static int app__slow_command(ipc_server_reply_cb *reply_cb, /* * The client sent a command followed by a (possibly very) large buffer. */ -static int app__sendbytes_command(const char *received, +static int app__sendbytes_command(const char *received, size_t received_len, ipc_server_reply_cb *reply_cb, struct ipc_server_reply_data *reply_data) { @@ -123,6 +124,13 @@ static int app__sendbytes_command(const char *received, int errs = 0; int ret; + /* + * The test is setup to send: + * "sendbytes" SP + */ + if (received_len < strlen("sendbytes ")) + BUG("received_len is short in app__sendbytes_command"); + if (skip_prefix(received, "sendbytes ", &p)) len_ballast = strlen(p); @@ -160,7 +168,7 @@ static ipc_server_application_cb test_app_cb; * by this application. */ static int test_app_cb(void *application_data, - const char *command, + const char *command, size_t command_len, ipc_server_reply_cb *reply_cb, struct ipc_server_reply_data *reply_data) { @@ -173,7 +181,7 @@ static int test_app_cb(void *application_data, if (application_data != (void*)&my_app_data) BUG("application_cb: application_data pointer wrong"); - if (!strcmp(command, "quit")) { + if (command_len == 4 && !strncmp(command, "quit", 4)) { /* * The client sent a "quit" command. This is an async * request for the server to shutdown. @@ -193,22 +201,23 @@ static int test_app_cb(void *application_data, return SIMPLE_IPC_QUIT; } - if (!strcmp(command, "ping")) { + if (command_len == 4 && !strncmp(command, "ping", 4)) { const char *answer = "pong"; return reply_cb(reply_data, answer, strlen(answer)); } - if (!strcmp(command, "big")) + if (command_len == 3 && !strncmp(command, "big", 3)) return app__big_command(reply_cb, reply_data); - if (!strcmp(command, "chunk")) + if (command_len == 5 && !strncmp(command, "chunk", 5)) return app__chunk_command(reply_cb, reply_data); - if (!strcmp(command, "slow")) + if (command_len == 4 && !strncmp(command, "slow", 4)) return app__slow_command(reply_cb, reply_data); - if (starts_with(command, "sendbytes ")) - return app__sendbytes_command(command, reply_cb, reply_data); + if (command_len >= 10 && starts_with(command, "sendbytes ")) + return app__sendbytes_command(command, command_len, + reply_cb, reply_data); return app__unhandled_command(command, reply_cb, reply_data); } @@ -259,183 +268,69 @@ static int daemon__run_server(void) */ ret = ipc_server_run(cl_args.path, &opts, test_app_cb, (void*)&my_app_data); if (ret == -2) - error(_("socket/pipe already in use: '%s'"), cl_args.path); + error("socket/pipe already in use: '%s'", cl_args.path); else if (ret == -1) - error_errno(_("could not start server on: '%s'"), cl_args.path); + error_errno("could not start server on: '%s'", cl_args.path); return ret; } -#ifndef GIT_WINDOWS_NATIVE -/* - * This is adapted from `daemonize()`. Use `fork()` to directly create and - * run the daemon in a child process. - */ -static int spawn_server(pid_t *pid) +static start_bg_wait_cb bg_wait_cb; + +static int bg_wait_cb(const struct child_process *cp, void *cb_data) { - struct ipc_server_opts opts = { - .nr_threads = cl_args.nr_threads, - }; + int s = ipc_get_active_state(cl_args.path); - *pid = fork(); + switch (s) { + case IPC_STATE__LISTENING: + /* child is "ready" */ + return 0; - switch (*pid) { - case 0: - if (setsid() == -1) - error_errno(_("setsid failed")); - close(0); - close(1); - close(2); - sanitize_stdfds(); - - return ipc_server_run(cl_args.path, &opts, test_app_cb, - (void*)&my_app_data); - - case -1: - return error_errno(_("could not spawn daemon in the background")); + case IPC_STATE__NOT_LISTENING: + case IPC_STATE__PATH_NOT_FOUND: + /* give child more time */ + return 1; default: - return 0; - } -} -#else -/* - * Conceptually like `daemonize()` but different because Windows does not - * have `fork(2)`. Spawn a normal Windows child process but without the - * limitations of `start_command()` and `finish_command()`. - */ -static int spawn_server(pid_t *pid) -{ - char test_tool_exe[MAX_PATH]; - struct strvec args = STRVEC_INIT; - int in, out; - - GetModuleFileNameA(NULL, test_tool_exe, MAX_PATH); - - in = open("/dev/null", O_RDONLY); - out = open("/dev/null", O_WRONLY); - - strvec_push(&args, test_tool_exe); - strvec_push(&args, "simple-ipc"); - strvec_push(&args, "run-daemon"); - strvec_pushf(&args, "--name=%s", cl_args.path); - strvec_pushf(&args, "--threads=%d", cl_args.nr_threads); - - *pid = mingw_spawnvpe(args.v[0], args.v, NULL, NULL, in, out, out); - close(in); - close(out); - - strvec_clear(&args); - - if (*pid < 0) - return error(_("could not spawn daemon in the background")); - - return 0; -} -#endif - -/* - * This is adapted from `wait_or_whine()`. Watch the child process and - * let it get started and begin listening for requests on the socket - * before reporting our success. - */ -static int wait_for_server_startup(pid_t pid_child) -{ - int status; - pid_t pid_seen; - enum ipc_active_state s; - time_t time_limit, now; - - time(&time_limit); - time_limit += cl_args.max_wait_sec; - - for (;;) { - pid_seen = waitpid(pid_child, &status, WNOHANG); - - if (pid_seen == -1) - return error_errno(_("waitpid failed")); - - else if (pid_seen == 0) { - /* - * The child is still running (this should be - * the normal case). Try to connect to it on - * the socket and see if it is ready for - * business. - * - * If there is another daemon already running, - * our child will fail to start (possibly - * after a timeout on the lock), but we don't - * care (who responds) if the socket is live. - */ - s = ipc_get_active_state(cl_args.path); - if (s == IPC_STATE__LISTENING) - return 0; - - time(&now); - if (now > time_limit) - return error(_("daemon not online yet")); - - continue; - } - - else if (pid_seen == pid_child) { - /* - * The new child daemon process shutdown while - * it was starting up, so it is not listening - * on the socket. - * - * Try to ping the socket in the odd chance - * that another daemon started (or was already - * running) while our child was starting. - * - * Again, we don't care who services the socket. - */ - s = ipc_get_active_state(cl_args.path); - if (s == IPC_STATE__LISTENING) - return 0; - - /* - * We don't care about the WEXITSTATUS() nor - * any of the WIF*(status) values because - * `cmd__simple_ipc()` does the `!!result` - * trick on all function return values. - * - * So it is sufficient to just report the - * early shutdown as an error. - */ - return error(_("daemon failed to start")); - } - - else - return error(_("waitpid is confused")); + case IPC_STATE__INVALID_PATH: + case IPC_STATE__OTHER_ERROR: + /* all the time in world won't help */ + return -1; } } -/* - * This process will start a simple-ipc server in a background process and - * wait for it to become ready. This is like `daemonize()` but gives us - * more control and better error reporting (and makes it easier to write - * unit tests). - */ static int daemon__start_server(void) { - pid_t pid_child; - int ret; + struct child_process cp = CHILD_PROCESS_INIT; + enum start_bg_result sbgr; - /* - * Run the actual daemon in a background process. - */ - ret = spawn_server(&pid_child); - if (pid_child <= 0) - return ret; + strvec_push(&cp.args, "test-tool"); + strvec_push(&cp.args, "simple-ipc"); + strvec_push(&cp.args, "run-daemon"); + strvec_pushf(&cp.args, "--name=%s", cl_args.path); + strvec_pushf(&cp.args, "--threads=%d", cl_args.nr_threads); - /* - * Let the parent wait for the child process to get started - * and begin listening for requests on the socket. - */ - ret = wait_for_server_startup(pid_child); + cp.no_stdin = 1; + cp.no_stdout = 1; + cp.no_stderr = 1; - return ret; + sbgr = start_bg_command(&cp, bg_wait_cb, NULL, cl_args.max_wait_sec); + + switch (sbgr) { + case SBGR_READY: + return 0; + + default: + case SBGR_ERROR: + case SBGR_CB_ERROR: + return error("daemon failed to start"); + + case SBGR_TIMEOUT: + return error("daemon not online yet"); + + case SBGR_DIED: + return error("daemon terminated"); + } } /* @@ -488,7 +383,9 @@ static int client__send_ipc(void) options.wait_if_busy = 1; options.wait_if_not_found = 0; - if (!ipc_client_send_command(cl_args.path, &options, command, &buf)) { + if (!ipc_client_send_command(cl_args.path, &options, + command, strlen(command), + &buf)) { if (buf.len) { printf("%s\n", buf.buf); fflush(stdout); @@ -538,7 +435,7 @@ static int client__stop_server(void) time(&now); if (now > time_limit) - return error(_("daemon has not shutdown yet")); + return error("daemon has not shutdown yet"); } } @@ -556,7 +453,9 @@ static int do_sendbytes(int bytecount, char byte, const char *path, strbuf_addstr(&buf_send, "sendbytes "); strbuf_addchars(&buf_send, byte, bytecount); - if (!ipc_client_send_command(path, options, buf_send.buf, &buf_resp)) { + if (!ipc_client_send_command(path, options, + buf_send.buf, buf_send.len, + &buf_resp)) { strbuf_rtrim(&buf_resp); printf("sent:%c%08d %s\n", byte, bytecount, buf_resp.buf); fflush(stdout); diff --git a/trace2.c b/trace2.c index b9b154ac440..b2d471526fd 100644 --- a/trace2.c +++ b/trace2.c @@ -394,6 +394,37 @@ void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, us_elapsed_child); } +void trace2_child_ready_fl(const char *file, int line, + struct child_process *cmd, + const char *ready) +{ + struct tr2_tgt *tgt_j; + int j; + uint64_t us_now; + uint64_t us_elapsed_absolute; + uint64_t us_elapsed_child; + + if (!trace2_enabled) + return; + + us_now = getnanotime() / 1000; + us_elapsed_absolute = tr2tls_absolute_elapsed(us_now); + + if (cmd->trace2_child_us_start) + us_elapsed_child = us_now - cmd->trace2_child_us_start; + else + us_elapsed_child = 0; + + for_each_wanted_builtin (j, tgt_j) + if (tgt_j->pfn_child_ready_fl) + tgt_j->pfn_child_ready_fl(file, line, + us_elapsed_absolute, + cmd->trace2_child_id, + cmd->pid, + ready, + us_elapsed_child); +} + int trace2_exec_fl(const char *file, int line, const char *exe, const char **argv) { diff --git a/trace2.h b/trace2.h index 2f450ab5bed..0cc7b5f5312 100644 --- a/trace2.h +++ b/trace2.h @@ -253,6 +253,31 @@ void trace2_child_exit_fl(const char *file, int line, struct child_process *cmd, #define trace2_child_exit(cmd, code) \ trace2_child_exit_fl(__FILE__, __LINE__, (cmd), (code)) +/** + * Emits a "child_ready" message containing the "child-id" and a flag + * indicating whether the child was considered "ready" when we + * released it. + * + * This function should be called after starting a daemon process in + * the background (and after giving it sufficient time to boot + * up) to indicate that we no longer control or own it. + * + * The "ready" argument should contain one of { "ready", "timeout", + * "error" } to indicate the state of the running daemon when we + * released it. + * + * If the daemon process fails to start or it exits or is terminated + * while we are still waiting for it, the caller should emit a + * regular "child_exit" to report the normal process exit information. + * + */ +void trace2_child_ready_fl(const char *file, int line, + struct child_process *cmd, + const char *ready); + +#define trace2_child_ready(cmd, ready) \ + trace2_child_ready_fl(__FILE__, __LINE__, (cmd), (ready)) + /** * Emit an 'exec' event prior to calling one of exec(), execv(), * execvp(), and etc. On Unix-derived systems, this will be the diff --git a/trace2/tr2_tgt.h b/trace2/tr2_tgt.h index 1f66fd65730..65f94e15748 100644 --- a/trace2/tr2_tgt.h +++ b/trace2/tr2_tgt.h @@ -45,6 +45,10 @@ typedef void(tr2_tgt_evt_child_exit_fl_t)(const char *file, int line, uint64_t us_elapsed_absolute, int cid, int pid, int code, uint64_t us_elapsed_child); +typedef void(tr2_tgt_evt_child_ready_fl_t)(const char *file, int line, + uint64_t us_elapsed_absolute, + int cid, int pid, const char *ready, + uint64_t us_elapsed_child); typedef void(tr2_tgt_evt_thread_start_fl_t)(const char *file, int line, uint64_t us_elapsed_absolute); @@ -116,6 +120,7 @@ struct tr2_tgt { tr2_tgt_evt_alias_fl_t *pfn_alias_fl; tr2_tgt_evt_child_start_fl_t *pfn_child_start_fl; tr2_tgt_evt_child_exit_fl_t *pfn_child_exit_fl; + tr2_tgt_evt_child_ready_fl_t *pfn_child_ready_fl; tr2_tgt_evt_thread_start_fl_t *pfn_thread_start_fl; tr2_tgt_evt_thread_exit_fl_t *pfn_thread_exit_fl; tr2_tgt_evt_exec_fl_t *pfn_exec_fl; diff --git a/trace2/tr2_tgt_event.c b/trace2/tr2_tgt_event.c index 578a9a5287a..70cfc2f77cc 100644 --- a/trace2/tr2_tgt_event.c +++ b/trace2/tr2_tgt_event.c @@ -383,6 +383,27 @@ static void fn_child_exit_fl(const char *file, int line, jw_release(&jw); } +static void fn_child_ready_fl(const char *file, int line, + uint64_t us_elapsed_absolute, int cid, int pid, + const char *ready, uint64_t us_elapsed_child) +{ + const char *event_name = "child_ready"; + struct json_writer jw = JSON_WRITER_INIT; + double t_rel = (double)us_elapsed_child / 1000000.0; + + jw_object_begin(&jw, 0); + event_fmt_prepare(event_name, file, line, NULL, &jw); + jw_object_intmax(&jw, "child_id", cid); + jw_object_intmax(&jw, "pid", pid); + jw_object_string(&jw, "ready", ready); + jw_object_double(&jw, "t_rel", 6, t_rel); + jw_end(&jw); + + tr2_dst_write_line(&tr2dst_event, &jw.json); + + jw_release(&jw); +} + static void fn_thread_start_fl(const char *file, int line, uint64_t us_elapsed_absolute) { @@ -610,6 +631,7 @@ struct tr2_tgt tr2_tgt_event = { fn_alias_fl, fn_child_start_fl, fn_child_exit_fl, + fn_child_ready_fl, fn_thread_start_fl, fn_thread_exit_fl, fn_exec_fl, diff --git a/trace2/tr2_tgt_normal.c b/trace2/tr2_tgt_normal.c index a5751c88644..58d9e430f05 100644 --- a/trace2/tr2_tgt_normal.c +++ b/trace2/tr2_tgt_normal.c @@ -251,6 +251,19 @@ static void fn_child_exit_fl(const char *file, int line, strbuf_release(&buf_payload); } +static void fn_child_ready_fl(const char *file, int line, + uint64_t us_elapsed_absolute, int cid, int pid, + const char *ready, uint64_t us_elapsed_child) +{ + struct strbuf buf_payload = STRBUF_INIT; + double elapsed = (double)us_elapsed_child / 1000000.0; + + strbuf_addf(&buf_payload, "child_ready[%d] pid:%d ready:%s elapsed:%.6f", + cid, pid, ready, elapsed); + normal_io_write_fl(file, line, &buf_payload); + strbuf_release(&buf_payload); +} + static void fn_exec_fl(const char *file, int line, uint64_t us_elapsed_absolute, int exec_id, const char *exe, const char **argv) { @@ -330,6 +343,7 @@ struct tr2_tgt tr2_tgt_normal = { fn_alias_fl, fn_child_start_fl, fn_child_exit_fl, + fn_child_ready_fl, NULL, /* thread_start */ NULL, /* thread_exit */ fn_exec_fl, diff --git a/trace2/tr2_tgt_perf.c b/trace2/tr2_tgt_perf.c index af4d65a0a5f..e4acca13d64 100644 --- a/trace2/tr2_tgt_perf.c +++ b/trace2/tr2_tgt_perf.c @@ -360,6 +360,20 @@ static void fn_child_exit_fl(const char *file, int line, strbuf_release(&buf_payload); } +static void fn_child_ready_fl(const char *file, int line, + uint64_t us_elapsed_absolute, int cid, int pid, + const char *ready, uint64_t us_elapsed_child) +{ + const char *event_name = "child_ready"; + struct strbuf buf_payload = STRBUF_INIT; + + strbuf_addf(&buf_payload, "[ch%d] pid:%d ready:%s", cid, pid, ready); + + perf_io_write_fl(file, line, event_name, NULL, &us_elapsed_absolute, + &us_elapsed_child, NULL, &buf_payload); + strbuf_release(&buf_payload); +} + static void fn_thread_start_fl(const char *file, int line, uint64_t us_elapsed_absolute) { @@ -553,6 +567,7 @@ struct tr2_tgt tr2_tgt_perf = { fn_alias_fl, fn_child_start_fl, fn_child_exit_fl, + fn_child_ready_fl, fn_thread_start_fl, fn_thread_exit_fl, fn_exec_fl,