Skip to content

Commit

Permalink
fsmonitor: fix hangs by delayed fs event listening
Browse files Browse the repository at this point in the history
The thread serving the client (ipc-thread) calls
with_lock__wait_for_cookie() in which a cookie file is
created. with_lock__wait_for_cookie() then waits for the event caused by
the cookie file from the thread for fs events (fsevent-thread).

However, in high load situations, the fsevent-thread may start actual fs
event listening (triggered by FSEventStreamStart() for Darwin, for
example) *after* the cookie file is created. In this case, the
fsevent-thread cannot detect the cookie file and
with_lock__wait_for_cookie() waits forever, so that the whole daemon
hangs [1].

Extend listen_error_code to express that actual fs event listening
starts. listen_error_code is accessed in a thread-safe manner by
utilizing a dedicated mutex.

[1]: https://lore.kernel.org/git/[email protected]/

Suggested-by: Jeff King <[email protected]>
Signed-off-by: Koji Nakamaru <[email protected]>
  • Loading branch information
KojiNakamaru committed Oct 2, 2024
1 parent e9356ba commit 83c6040
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
21 changes: 21 additions & 0 deletions builtin/fsmonitor--daemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ static enum fsmonitor_cookie_item_result with_lock__wait_for_cookie(
trace_printf_key(&trace_fsmonitor, "cookie-wait: '%s' '%s'",
cookie->name, cookie_pathname.buf);

while (fsmonitor_get_listen_error_code(state) == 0)
sleep_millisec(50);

/*
* Create the cookie file on disk and then wait for a notification
* that the listener thread has seen it.
Expand Down Expand Up @@ -606,6 +609,23 @@ void fsmonitor_force_resync(struct fsmonitor_daemon_state *state)
pthread_mutex_unlock(&state->main_lock);
}

int fsmonitor_get_listen_error_code(struct fsmonitor_daemon_state *state)
{
int error_code;

pthread_mutex_lock(&state->listen_lock);
error_code = state->listen_error_code;
pthread_mutex_unlock(&state->listen_lock);
return error_code;
}

void fsmonitor_set_listen_error_code(struct fsmonitor_daemon_state *state, int error_code)
{
pthread_mutex_lock(&state->listen_lock);
state->listen_error_code = error_code;
pthread_mutex_unlock(&state->listen_lock);
}

/*
* Format an opaque token string to send to the client.
*/
Expand Down Expand Up @@ -1285,6 +1305,7 @@ static int fsmonitor_run_daemon(void)
hashmap_init(&state.cookies, cookies_cmp, NULL, 0);
pthread_mutex_init(&state.main_lock, NULL);
pthread_cond_init(&state.cookies_cond, NULL);
pthread_mutex_init(&state.listen_lock, NULL);
state.listen_error_code = 0;
state.health_error_code = 0;
state.current_token_data = fsmonitor_new_token_data();
Expand Down
5 changes: 3 additions & 2 deletions compat/fsmonitor/fsm-listen-darwin.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,14 +515,15 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
goto force_error_stop_without_loop;
}
data->stream_started = 1;
fsmonitor_set_listen_error_code(state, 1);

pthread_mutex_lock(&data->dq_lock);
pthread_cond_wait(&data->dq_finished, &data->dq_lock);
pthread_mutex_unlock(&data->dq_lock);

switch (data->shutdown_style) {
case FORCE_ERROR_STOP:
state->listen_error_code = -1;
fsmonitor_set_listen_error_code(state, -1);
/* fall thru */
case FORCE_SHUTDOWN:
ipc_server_stop_async(state->ipc_server_data);
Expand All @@ -534,7 +535,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
return;

force_error_stop_without_loop:
state->listen_error_code = -1;
fsmonitor_set_listen_error_code(state, -1);
ipc_server_stop_async(state->ipc_server_data);
return;
}
5 changes: 2 additions & 3 deletions compat/fsmonitor/fsm-listen-win32.c
Original file line number Diff line number Diff line change
Expand Up @@ -732,14 +732,13 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
DWORD dwWait;
int result;

state->listen_error_code = 0;

if (start_rdcw_watch(data->watch_worktree) == -1)
goto force_error_stop;

if (data->watch_gitdir &&
start_rdcw_watch(data->watch_gitdir) == -1)
goto force_error_stop;
fsmonitor_set_listen_error_code(state, 1);

for (;;) {
dwWait = WaitForMultipleObjects(data->nr_listener_handles,
Expand Down Expand Up @@ -797,7 +796,7 @@ void fsm_listen__loop(struct fsmonitor_daemon_state *state)
}

force_error_stop:
state->listen_error_code = -1;
fsmonitor_set_listen_error_code(state, -1);

force_shutdown:
/*
Expand Down
4 changes: 4 additions & 0 deletions fsmonitor--daemon.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct fsmonitor_daemon_state {
int cookie_seq;
struct hashmap cookies;

pthread_mutex_t listen_lock;
int listen_error_code;
int health_error_code;
struct fsm_listen_data *listen_data;
Expand Down Expand Up @@ -167,5 +168,8 @@ void fsmonitor_publish(struct fsmonitor_daemon_state *state,
*/
void fsmonitor_force_resync(struct fsmonitor_daemon_state *state);

int fsmonitor_get_listen_error_code(struct fsmonitor_daemon_state *state);
void fsmonitor_set_listen_error_code(struct fsmonitor_daemon_state *state, int error_code);

#endif /* HAVE_FSMONITOR_DAEMON_BACKEND */
#endif /* FSMONITOR_DAEMON_H */

0 comments on commit 83c6040

Please sign in to comment.