From: Ian Rogers <irogers@google.com>
To: John Garry <john.g.garry@oracle.com>,
Will Deacon <will@kernel.org>, James Clark <james.clark@arm.com>,
Mike Leach <mike.leach@linaro.org>, Leo Yan <leo.yan@linaro.org>,
Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Mark Rutland <mark.rutland@arm.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Suzuki K Poulose <suzuki.poulose@arm.com>,
"Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>,
Kan Liang <kan.liang@linux.intel.com>,
German Gomez <german.gomez@arm.com>,
Ali Saidi <alisaidi@amazon.com>,
Jing Zhang <renyu.zj@linux.alibaba.com>,
Athira Rajeev <atrajeev@linux.vnet.ibm.com>,
Miguel Ojeda <ojeda@kernel.org>,
ye xingchen <ye.xingchen@zte.com.cn>,
Liam Howlett <liam.howlett@oracle.com>,
Dmitrii Dolgov <9erthalion6@gmail.com>,
Yang Jihong <yangjihong1@huawei.com>,
K Prateek Nayak <kprateek.nayak@amd.com>,
Changbin Du <changbin.du@huawei.com>,
Ravi Bangoria <ravi.bangoria@amd.com>,
Sean Christopherson <seanjc@google.com>,
Andi Kleen <ak@linux.intel.com>,
"Steinar H. Gunderson" <sesse@google.com>,
Yuan Can <yuancan@huawei.com>,
Brian Robbins <brianrob@linux.microsoft.com>,
liuwenyu <liuwenyu7@huawei.com>,
Ivan Babrou <ivan@cloudflare.com>,
Fangrui Song <maskray@google.com>,
linux-kernel@vger.kernel.org,
linux-arm-kernel@lists.infradead.org,
linux-perf-users@vger.kernel.org, coresight@lists.linaro.org
Subject: [PATCH v2 24/26] perf callchain: Use pthread keys for tls callchain_cursor
Date: Thu, 8 Jun 2023 16:28:21 -0700 [thread overview]
Message-ID: <20230608232823.4027869-25-irogers@google.com> (raw)
In-Reply-To: <20230608232823.4027869-1-irogers@google.com>
Pthread keys are more portable than __thread and allow the association
of a destructor with the key. Use the destructor to clean up TLS
callchain cursors to aid understanding memory leaks.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/builtin-c2c.c | 4 +-
tools/perf/builtin-script.c | 24 ++++++-----
tools/perf/util/callchain.c | 40 ++++++++++++++++++-
tools/perf/util/callchain.h | 4 +-
tools/perf/util/db-export.c | 10 +++--
tools/perf/util/hist.c | 29 ++++++++------
.../scripting-engines/trace-event-python.c | 10 +++--
7 files changed, 86 insertions(+), 35 deletions(-)
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 530a44a59f41..a4cf9de7a7b5 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -284,6 +284,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct hist_entry *he;
struct addr_location al;
struct mem_info *mi, *mi_dup;
+ struct callchain_cursor *cursor;
int ret;
addr_location__init(&al);
@@ -297,7 +298,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
if (c2c.stitch_lbr)
thread__set_lbr_stitch_enable(al.thread, true);
- ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ cursor = get_tls_callchain_cursor();
+ ret = sample__resolve_callchain(sample, cursor, NULL,
evsel, &al, sysctl_perf_event_max_stack);
if (ret)
goto out;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 784d478c2e05..e3f435e6a7d0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1557,11 +1557,13 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
unsigned int print_opts = output[type].print_ip_opts;
struct callchain_cursor *cursor = NULL;
- if (symbol_conf.use_callchain && sample->callchain &&
- thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
- sample, NULL, NULL, scripting_max_stack) == 0)
- cursor = &callchain_cursor;
-
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack))
+ cursor = NULL;
+ }
if (cursor == NULL) {
printed += fprintf(fp, " ");
if (print_opts & EVSEL__PRINT_SRCLINE) {
@@ -2203,11 +2205,13 @@ static void process_event(struct perf_script *script,
if (script->stitch_lbr)
thread__set_lbr_stitch_enable(al->thread, true);
- if (symbol_conf.use_callchain && sample->callchain &&
- thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
- sample, NULL, NULL, scripting_max_stack) == 0)
- cursor = &callchain_cursor;
-
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack))
+ cursor = NULL;
+ }
fputc(cursor ? '\n' : ' ', fp);
sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor,
symbol_conf.bt_stop_list, fp);
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 909f62b3b266..7e9bd3b6be9f 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -58,7 +58,8 @@ struct callchain_param callchain_param_default = {
CALLCHAIN_PARAM_DEFAULT
};
-__thread struct callchain_cursor callchain_cursor;
+/* Used for thread-local struct callchain_cursor. */
+static pthread_key_t callchain_cursor;
int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
{
@@ -1116,7 +1117,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
!symbol_conf.show_branchflag_count)
return 0;
- return callchain_append(he->callchain, &callchain_cursor, sample->period);
+ return callchain_append(he->callchain, get_tls_callchain_cursor(), sample->period);
}
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
@@ -1570,6 +1571,41 @@ int callchain_node__make_parent_list(struct callchain_node *node)
return -ENOMEM;
}
+static void callchain_cursor__delete(void *vcursor)
+{
+ struct callchain_cursor *cursor = vcursor;
+ struct callchain_cursor_node *node, *next;
+
+ callchain_cursor_reset(cursor);
+ for (node = cursor->first; node != NULL; node = next) {
+ next = node->next;
+ free(node);
+ }
+ free(cursor);
+}
+
+static void init_callchain_cursor_key(void)
+{
+ if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) {
+ pr_err("callchain cursor creation failed");
+ abort();
+ }
+}
+
+struct callchain_cursor *get_tls_callchain_cursor(void)
+{
+ static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+ struct callchain_cursor *cursor;
+
+ pthread_once(&once_control, init_callchain_cursor_key);
+ cursor = pthread_getspecific(callchain_cursor);
+ if (!cursor) {
+ cursor = zalloc(sizeof(*cursor));
+ pthread_setspecific(callchain_cursor, cursor);
+ }
+ return cursor;
+}
+
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src)
{
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index d95615daed73..ce9410018cf7 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -168,8 +168,6 @@ struct callchain_cursor {
struct callchain_cursor_node *curr;
};
-extern __thread struct callchain_cursor callchain_cursor;
-
static inline void callchain_init(struct callchain_root *root)
{
INIT_LIST_HEAD(&root->node.val);
@@ -231,6 +229,8 @@ static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
cursor->pos++;
}
+struct callchain_cursor *get_tls_callchain_cursor(void);
+
int callchain_cursor__copy(struct callchain_cursor *dst,
struct callchain_cursor *src);
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 6184696dc266..b9fb71ab7a73 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -215,6 +215,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
u64 kernel_start = machine__kernel_start(machine);
struct call_path *current = &dbe->cpr->call_path;
enum chain_order saved_order = callchain_param.order;
+ struct callchain_cursor *cursor;
int err;
if (!symbol_conf.use_callchain || !sample->callchain)
@@ -226,13 +227,14 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
* the callchain starting with the root node and ending with the leaf.
*/
callchain_param.order = ORDER_CALLER;
- err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ cursor = get_tls_callchain_cursor();
+ err = thread__resolve_callchain(thread, cursor, evsel,
sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
if (err) {
callchain_param.order = saved_order;
return NULL;
}
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
while (1) {
struct callchain_cursor_node *node;
@@ -240,7 +242,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (!node)
break;
@@ -265,7 +267,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe,
al.sym, node->ip,
kernel_start);
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
addr_location__exit(&al);
}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index fb218b3e8a7c..4004c0915e4f 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1029,15 +1029,16 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
struct hist_entry **he_cache;
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
/*
* This is for detecting cycles or recursions so that they're
* cumulated only one time to prevent entries more than 100%
* overhead.
*/
- he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
+ he_cache = malloc(sizeof(*he_cache) * (cursor->nr + 1));
if (he_cache == NULL)
return -ENOMEM;
@@ -1072,7 +1073,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
* We need to re-initialize the cursor since callchain_append()
* advanced the cursor to the end.
*/
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(get_tls_callchain_cursor());
hists__inc_nr_samples(hists, he->filtered);
@@ -1085,7 +1086,7 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
{
struct callchain_cursor_node *node;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(get_tls_callchain_cursor());
if (node == NULL)
return 0;
@@ -1131,12 +1132,12 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
.raw_size = sample->raw_size,
};
int i;
- struct callchain_cursor cursor;
+ struct callchain_cursor cursor, *tls_cursor = get_tls_callchain_cursor();
bool fast = hists__has(he_tmp.hists, sym);
- callchain_cursor_snapshot(&cursor, &callchain_cursor);
+ callchain_cursor_snapshot(&cursor, tls_cursor);
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(tls_cursor);
/*
* Check if there's duplicate entries in the callchain.
@@ -1222,7 +1223,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
if (al)
alm = map__get(al->map);
- err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
+ err = sample__resolve_callchain(iter->sample, get_tls_callchain_cursor(), &iter->parent,
iter->evsel, al, max_stack_depth);
if (err) {
map__put(alm);
@@ -1568,8 +1569,10 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
if (hist_entry__has_callchains(new_he) &&
symbol_conf.use_callchain) {
- callchain_cursor_reset(&callchain_cursor);
- if (callchain_merge(&callchain_cursor,
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor,
new_he->callchain,
he->callchain) < 0)
ret = -1;
@@ -1610,8 +1613,10 @@ static int hists__collapse_insert_entry(struct hists *hists,
he_stat__add_stat(iter->stat_acc, he->stat_acc);
if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
- callchain_cursor_reset(&callchain_cursor);
- if (callchain_merge(&callchain_cursor,
+ struct callchain_cursor *cursor = get_tls_callchain_cursor();
+
+ callchain_cursor_reset(cursor);
+ if (callchain_merge(cursor,
iter->callchain,
he->callchain) < 0)
ret = -1;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index d96e5c0fef45..59063ec98619 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -417,6 +417,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
struct addr_location *al)
{
PyObject *pylist;
+ struct callchain_cursor *cursor;
pylist = PyList_New(0);
if (!pylist)
@@ -425,19 +426,20 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
if (!symbol_conf.use_callchain || !sample->callchain)
goto exit;
- if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al->thread, cursor, evsel,
sample, NULL, NULL,
scripting_max_stack) != 0) {
pr_err("Failed to resolve callchain. Skipping\n");
goto exit;
}
- callchain_cursor_commit(&callchain_cursor);
+ callchain_cursor_commit(cursor);
while (1) {
PyObject *pyelem;
struct callchain_cursor_node *node;
- node = callchain_cursor_current(&callchain_cursor);
+ node = callchain_cursor_current(cursor);
if (!node)
break;
@@ -493,7 +495,7 @@ static PyObject *python_process_callchain(struct perf_sample *sample,
_PyUnicode_FromString(dsoname));
}
- callchain_cursor_advance(&callchain_cursor);
+ callchain_cursor_advance(cursor);
PyList_Append(pylist, pyelem);
Py_DECREF(pyelem);
}
--
2.41.0.162.gfafddb0af9-goog
next prev parent reply other threads:[~2023-06-08 23:31 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-08 23:27 [PATCH v2 00/26] Fix memory leaks (was reference count checking for thread) Ian Rogers
2023-06-08 23:27 ` [PATCH v2 01/26] perf thread: Remove notion of dead threads Ian Rogers
2023-06-08 23:27 ` [PATCH v2 02/26] perf thread: Make threads rbtree non-invasive Ian Rogers
2023-06-09 14:13 ` Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 03/26] perf thread: Add accessor functions for thread Ian Rogers
2023-06-09 14:15 ` Arnaldo Carvalho de Melo
2023-06-09 14:50 ` Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 04/26] perf maps: Make delete static, always use put Ian Rogers
2023-06-09 14:17 ` Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 05/26] perf addr_location: Move to its own header Ian Rogers
2023-06-09 14:18 ` Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 06/26] perf addr_location: Add init/exit/copy functions Ian Rogers
2023-06-09 19:48 ` Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 07/26] perf thread: Add reference count checking Ian Rogers
2023-06-08 23:28 ` [PATCH v2 08/26] perf machine: Make delete_threads part of machine__exit Ian Rogers
2023-06-08 23:28 ` [PATCH v2 09/26] perf report: Avoid thread leak Ian Rogers
2023-06-08 23:28 ` [PATCH v2 10/26] perf header: Ensure bitmaps are freed Ian Rogers
2023-06-08 23:28 ` [PATCH v2 11/26] perf stat: Avoid evlist leak Ian Rogers
2023-06-08 23:28 ` [PATCH v2 12/26] perf intel-pt: Fix missed put and leak Ian Rogers
2023-06-08 23:28 ` [PATCH v2 13/26] perf evlist: Free stats in all evlist destruction Ian Rogers
2023-06-08 23:28 ` [PATCH v2 14/26] perf python: Avoid 2 leak sanitizer issues Ian Rogers
2023-06-08 23:28 ` [PATCH v2 15/26] perf jit: Fix two thread leaks Ian Rogers
2023-06-08 23:28 ` [PATCH v2 16/26] perf symbol-elf: Correct holding a reference Ian Rogers
2023-06-08 23:28 ` [PATCH v2 17/26] perf maps: Fix overlapping memory leak Ian Rogers
2023-06-08 23:28 ` [PATCH v2 18/26] perf machine: Fix leak of kernel dso Ian Rogers
2023-06-08 23:28 ` [PATCH v2 19/26] perf machine: Don't leak module maps Ian Rogers
2023-06-08 23:28 ` [PATCH v2 20/26] perf map/maps/thread: Changes to reference counting Ian Rogers
2023-06-08 23:28 ` [PATCH v2 21/26] perf annotate: Fix parse_objdump_line memory leak Ian Rogers
2023-06-08 23:28 ` [PATCH v2 22/26] perf top: Add exit routine for main thread Ian Rogers
2023-06-08 23:28 ` [PATCH v2 23/26] perf header: Avoid out-of-bounds read Ian Rogers
2023-06-08 23:28 ` Ian Rogers [this message]
2023-06-09 19:49 ` [PATCH v2 24/26] perf callchain: Use pthread keys for tls callchain_cursor Arnaldo Carvalho de Melo
2023-06-08 23:28 ` [PATCH v2 25/26] perf srcline: Change free_srcline to zfree_srcline Ian Rogers
2023-06-08 23:28 ` [PATCH v2 26/26] perf hist: Fix srcline memory leak Ian Rogers
2023-06-12 14:13 ` Arnaldo Carvalho de Melo
2023-06-12 14:16 ` Arnaldo Carvalho de Melo
2023-06-12 14:46 ` Ian Rogers
2023-06-12 17:23 ` Arnaldo Carvalho de Melo
2023-06-12 21:16 ` Andi Kleen
2023-06-12 21:30 ` Arnaldo Carvalho de Melo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230608232823.4027869-25-irogers@google.com \
--to=irogers@google.com \
--cc=9erthalion6@gmail.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=alisaidi@amazon.com \
--cc=atrajeev@linux.vnet.ibm.com \
--cc=brianrob@linux.microsoft.com \
--cc=changbin.du@huawei.com \
--cc=coresight@lists.linaro.org \
--cc=german.gomez@arm.com \
--cc=ivan@cloudflare.com \
--cc=james.clark@arm.com \
--cc=john.g.garry@oracle.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=kprateek.nayak@amd.com \
--cc=leo.yan@linaro.org \
--cc=liam.howlett@oracle.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=liuwenyu7@huawei.com \
--cc=mark.rutland@arm.com \
--cc=maskray@google.com \
--cc=mike.leach@linaro.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=naveen.n.rao@linux.vnet.ibm.com \
--cc=ojeda@kernel.org \
--cc=peterz@infradead.org \
--cc=ravi.bangoria@amd.com \
--cc=renyu.zj@linux.alibaba.com \
--cc=seanjc@google.com \
--cc=sesse@google.com \
--cc=suzuki.poulose@arm.com \
--cc=will@kernel.org \
--cc=yangjihong1@huawei.com \
--cc=ye.xingchen@zte.com.cn \
--cc=yuancan@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).