[PATCH v8 01/18] perf tpebs: Fix concurrent stop races and PID reuse hazards in tpebs_stop
From: Ian Rogers
Date: Tue Jun 02 2026 - 13:53:25 EST
Parallel verbose test execution can trigger a race condition in tpebs_stop
if called concurrently or when PID reuse occurs, causing finish_command()
to block or reap the wrong process.
Introduce a `tpebs_stopping` flag inside intel-tpebs.c to prevent
redundant stop execution paths, and safely restore the `cmd.pid`
temporarily only during `finish_command()` to ensure it is properly reaped,
while preventing other threads from referencing it.
Assisted-by: Gemini-CLI:Google Gemini 3
Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
---
tools/perf/util/intel-tpebs.c | 92 ++++++++++++++++++++++++++++++-----
1 file changed, 80 insertions(+), 12 deletions(-)
diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c
index ed8cfe2ba2fa..bc3b79bfa01a 100644
--- a/tools/perf/util/intel-tpebs.c
+++ b/tools/perf/util/intel-tpebs.c
@@ -37,6 +37,7 @@ static pthread_t tpebs_reader_thread;
static struct child_process tpebs_cmd;
static int control_fd[2], ack_fd[2];
static struct mutex tpebs_mtx;
+static bool tpebs_stopping;
struct tpebs_retire_lat {
struct list_head nd;
@@ -52,16 +53,18 @@ struct tpebs_retire_lat {
bool started;
};
-static void tpebs_mtx_init(void)
+static void tpebs_init(void)
{
mutex_init(&tpebs_mtx);
+ control_fd[0] = control_fd[1] = -1;
+ ack_fd[0] = ack_fd[1] = -1;
}
static struct mutex *tpebs_mtx_get(void)
{
- static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT;
+ static pthread_once_t tpebs_once = PTHREAD_ONCE_INIT;
- pthread_once(&tpebs_mtx_once, tpebs_mtx_init);
+ pthread_once(&tpebs_once, tpebs_init);
return &tpebs_mtx;
}
@@ -111,6 +114,7 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel)
/* Note, no workload given so system wide is implied. */
assert(tpebs_cmd.pid == 0);
+ memset(&tpebs_cmd, 0, sizeof(tpebs_cmd));
tpebs_cmd.argv = record_argv;
tpebs_cmd.out = -1;
ret = start_command(&tpebs_cmd);
@@ -320,20 +324,43 @@ static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
{
int ret = 0;
+ if (tpebs_stopping)
+ return 0;
+
/* Like tpebs_start, we should only run tpebs_end once. */
if (tpebs_cmd.pid != 0) {
+ pid_t actual_pid = tpebs_cmd.pid;
+
+ tpebs_stopping = true;
tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG);
tpebs_cmd.pid = 0;
mutex_unlock(tpebs_mtx_get());
pthread_join(tpebs_reader_thread, NULL);
mutex_lock(tpebs_mtx_get());
- close(control_fd[0]);
- close(control_fd[1]);
- close(ack_fd[0]);
- close(ack_fd[1]);
- close(tpebs_cmd.out);
+ if (control_fd[0] >= 0) {
+ close(control_fd[0]);
+ control_fd[0] = -1;
+ }
+ if (control_fd[1] >= 0) {
+ close(control_fd[1]);
+ control_fd[1] = -1;
+ }
+ if (ack_fd[0] >= 0) {
+ close(ack_fd[0]);
+ ack_fd[0] = -1;
+ }
+ if (ack_fd[1] >= 0) {
+ close(ack_fd[1]);
+ ack_fd[1] = -1;
+ }
+ if (tpebs_cmd.out >= 0) {
+ close(tpebs_cmd.out);
+ tpebs_cmd.out = -1;
+ }
+ tpebs_cmd.pid = actual_pid;
ret = finish_command(&tpebs_cmd);
tpebs_cmd.pid = 0;
+ tpebs_stopping = false;
if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL)
ret = 0;
}
@@ -486,30 +513,42 @@ int evsel__tpebs_open(struct evsel *evsel)
{
int ret;
bool tpebs_empty;
+ bool started_process = false;
/* We should only run tpebs_start when tpebs_recording is enabled. */
if (!tpebs_recording)
return 0;
+
+ mutex_lock(tpebs_mtx_get());
+ if (tpebs_stopping) {
+ mutex_unlock(tpebs_mtx_get());
+ return -EBUSY;
+ }
/* Only start the events once. */
if (tpebs_cmd.pid != 0) {
struct tpebs_retire_lat *t;
bool valid;
- mutex_lock(tpebs_mtx_get());
t = tpebs_retire_lat__find(evsel);
valid = t && t->started;
mutex_unlock(tpebs_mtx_get());
/* May fail as the event wasn't started. */
return valid ? 0 : -EBUSY;
}
+ mutex_unlock(tpebs_mtx_get());
ret = evsel__tpebs_prepare(evsel);
if (ret)
return ret;
mutex_lock(tpebs_mtx_get());
+ if (tpebs_stopping || tpebs_cmd.pid != 0) {
+ ret = -EBUSY;
+ goto out;
+ }
tpebs_empty = list_empty(&tpebs_results);
if (!tpebs_empty) {
+ started_process = true;
/*Create control and ack fd for --control*/
if (pipe(control_fd) < 0) {
pr_err("tpebs: Failed to create control fifo");
@@ -529,7 +568,6 @@ int evsel__tpebs_open(struct evsel *evsel)
if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader,
/*arg=*/NULL)) {
kill(tpebs_cmd.pid, SIGTERM);
- close(tpebs_cmd.out);
pr_err("Could not create thread to process sample data.\n");
ret = -1;
goto out;
@@ -540,8 +578,38 @@ int evsel__tpebs_open(struct evsel *evsel)
if (ret) {
struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);
- list_del_init(&t->nd);
- tpebs_retire_lat__delete(t);
+ if (t) {
+ list_del_init(&t->nd);
+ tpebs_retire_lat__delete(t);
+ }
+
+ if (started_process) {
+ if (tpebs_cmd.pid > 0) {
+ kill(tpebs_cmd.pid, SIGTERM);
+ finish_command(&tpebs_cmd);
+ tpebs_cmd.pid = 0;
+ }
+ if (tpebs_cmd.out >= 0) {
+ close(tpebs_cmd.out);
+ tpebs_cmd.out = -1;
+ }
+ if (control_fd[0] >= 0) {
+ close(control_fd[0]);
+ control_fd[0] = -1;
+ }
+ if (control_fd[1] >= 0) {
+ close(control_fd[1]);
+ control_fd[1] = -1;
+ }
+ if (ack_fd[0] >= 0) {
+ close(ack_fd[0]);
+ ack_fd[0] = -1;
+ }
+ if (ack_fd[1] >= 0) {
+ close(ack_fd[1]);
+ ack_fd[1] = -1;
+ }
+ }
}
mutex_unlock(tpebs_mtx_get());
return ret;
--
2.54.0.1013.g208068f2d8-goog