All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] Optimize pid filters and add --no-filter option
@ 2019-04-15 16:47 Slavomir Kaslev
  2019-04-15 16:47 ` [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed Slavomir Kaslev
  2019-04-15 16:47 ` [PATCH v2 2/2] trace-cmd: Add --no-filter option to not filter recording processes Slavomir Kaslev
  0 siblings, 2 replies; 5+ messages in thread
From: Slavomir Kaslev @ 2019-04-15 16:47 UTC (permalink / raw
  To: rostedt
  Cc: linux-trace-devel, pauld, ykaradzhov, jbacik, tstoyanov,
	slavomir.kaslev

This patchset optimizes how pid filters are expressed and makes it less likely
that we overflow ftrace filters' size limit of one page.

Changes since v1:

Add missing tags
Fix append_filter_pid_range() callers to pass valid range as [min,max]

Slavomir Kaslev (2):
  trace-cmd: Optimize how pid filters are expressed
  trace-cmd: Add --no-filter option to not filter recording processes

 tracecmd/trace-record.c | 129 +++++++++++++++++++++++++++-------------
 tracecmd/trace-usage.c  |   1 +
 2 files changed, 90 insertions(+), 40 deletions(-)

-- 
2.19.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed
  2019-04-15 16:47 [PATCH v2 0/2] Optimize pid filters and add --no-filter option Slavomir Kaslev
@ 2019-04-15 16:47 ` Slavomir Kaslev
  2019-04-15 21:59   ` Steven Rostedt
  2019-04-15 16:47 ` [PATCH v2 2/2] trace-cmd: Add --no-filter option to not filter recording processes Slavomir Kaslev
  1 sibling, 1 reply; 5+ messages in thread
From: Slavomir Kaslev @ 2019-04-15 16:47 UTC (permalink / raw
  To: rostedt
  Cc: linux-trace-devel, pauld, ykaradzhov, jbacik, tstoyanov,
	slavomir.kaslev

Express pid filters as allowed/disallowed filter ranges

  (pid>=100&&pid<=103)

instead of specifying them per pid

  (pid==100||pid==101||pid==102||pid==103)

This makes the size of the resulting filter smaller (and faster) and avoids
overflowing the filter size limit of one page which we can hit on bigger
machines (say >160 CPUs).

Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
Reported-by: Phil Auld <pauld@redhat.com>
Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tracecmd/trace-record.c | 115 +++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 37 deletions(-)

diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index 76ca92d..eeee5e9 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -950,10 +950,61 @@ static void update_ftrace_pids(int reset)
 static void update_event_filters(struct buffer_instance *instance);
 static void update_pid_event_filters(struct buffer_instance *instance);
 
+static void append_filter_pid_range(char **filter, int *curr_len,
+				    const char *field,
+				    int start_pid, int end_pid, bool exclude)
+{
+	char *op, *op1, *op2, *op3;
+	int len;
+
+	op = *filter && **filter ? "||" : "";
+
+	// Handle thus case explicitly so that we get `pid==3` instead of
+	// `pid>=3&&pid<=3` for singleton ranges
+	if (start_pid == end_pid) {
+#define FMT	"%s(%s%s%d)"
+		len = snprintf(NULL, 0, FMT, op,
+			       field, exclude ? "!=" : "==", start_pid);
+		*filter = realloc(*filter, *curr_len + len + 1);
+		if (!*filter)
+			die("realloc");
+
+		len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+			       field, exclude ? "!=" : "==", start_pid);
+		*curr_len += len;
+
+		return;
+#undef FMT
+	}
+
+	if (exclude) {
+		op1 = "<";
+		op2 = "||";
+		op3 = ">";
+	} else {
+		op1 = ">=";
+		op2 = "&&";
+		op3 = "<=";
+	}
+
+#define FMT	"%s(%s%s%d%s%s%s%d)"
+	len = snprintf(NULL, 0, FMT, op,
+		       field, op1, start_pid, op2,
+		       field, op3, end_pid);
+	*filter = realloc(*filter, *curr_len + len + 1);
+	if (!*filter)
+		die("realloc");
+
+	len = snprintf(*filter + *curr_len, len + 1, FMT, op,
+		       field, op1, start_pid, op2,
+		       field, op3, end_pid);
+	*curr_len += len;
+}
+
 /**
  * make_pid_filter - create a filter string to all pids against @field
  * @curr_filter: Append to a previous filter (may realloc). Can be NULL
- * @field: The fild to compare the pids against
+ * @field: The field to compare the pids against
  *
  * Creates a new string or appends to an existing one if @curr_filter
  * is not NULL. The new string will contain a filter with all pids
@@ -963,54 +1014,44 @@ static void update_pid_event_filters(struct buffer_instance *instance);
  */
 static char *make_pid_filter(char *curr_filter, const char *field)
 {
+	int curr_len = 0, last_exclude = -1;
+	int start_pid = -1, last_pid = -1;
+	char *filter = NULL, *save;
 	struct filter_pids *p;
-	char *filter;
-	char *orit;
-	char *match;
-	char *str;
-	int curr_len = 0;
-	int len;
 
 	/* Use the new method if possible */
 	if (have_set_event_pid)
 		return NULL;
 
-	len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids;
-
-	if (curr_filter) {
-		curr_len = strlen(curr_filter);
-		filter = realloc(curr_filter, curr_len + len + strlen("(&&())"));
-		if (!filter)
-			die("realloc");
-		memmove(filter+1, curr_filter, curr_len);
-		filter[0] = '(';
-		strcat(filter, ")&&(");
-		curr_len = strlen(filter);
-	} else
-		filter = malloc(len);
-	if (!filter)
-		die("Failed to allocate pid filter");
-
-	/* Last '||' that is not used will cover the \0 */
-	str = filter + curr_len;
+	if (!filter_pids)
+		return curr_filter;
 
 	for (p = filter_pids; p; p = p->next) {
-		if (p->exclude) {
-			match = "!=";
-			orit = "&&";
-		} else {
-			match = "==";
-			orit = "||";
+		/* PIDs are inserted in `filter_pids` from the front and that's
+		 * why we expect them in descending order here.
+		 */
+		if (p->pid == last_pid - 1 && p->exclude == last_exclude) {
+			last_pid = p->pid;
+			continue;
 		}
-		if (p == filter_pids)
-			orit = "";
 
-		len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid);
-		str += len;
+		if (start_pid != -1)
+			append_filter_pid_range(&filter, &curr_len, field,
+						last_pid, start_pid,
+						last_exclude);
+
+		start_pid = last_pid = p->pid;
+		last_exclude = p->exclude;
+
 	}
+	append_filter_pid_range(&filter, &curr_len, field,
+				last_pid, start_pid, last_exclude);
 
-	if (curr_len)
-		sprintf(str, ")");
+	if (curr_filter) {
+		save = filter;
+		asprintf(&filter, "(%s)&&(%s)", curr_filter, filter);
+		free(save);
+	}
 
 	return filter;
 }
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] trace-cmd: Add --no-filter option to not filter recording processes
  2019-04-15 16:47 [PATCH v2 0/2] Optimize pid filters and add --no-filter option Slavomir Kaslev
  2019-04-15 16:47 ` [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed Slavomir Kaslev
@ 2019-04-15 16:47 ` Slavomir Kaslev
  1 sibling, 0 replies; 5+ messages in thread
From: Slavomir Kaslev @ 2019-04-15 16:47 UTC (permalink / raw
  To: rostedt
  Cc: linux-trace-devel, pauld, ykaradzhov, jbacik, tstoyanov,
	slavomir.kaslev

Add --no-filter option which doesn't install filters for the trace-cmd recording
processes pids.

Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tracecmd/trace-record.c | 14 +++++++++++---
 tracecmd/trace-usage.c  |  1 +
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
index eeee5e9..133dfdf 100644
--- a/tracecmd/trace-record.c
+++ b/tracecmd/trace-record.c
@@ -86,6 +86,7 @@ static int do_ptrace;
 
 static int filter_task;
 static int filter_pid = -1;
+static bool no_filter = false;
 
 static int local_cpu_count;
 
@@ -1061,6 +1062,9 @@ static void update_task_filter(void)
 	struct buffer_instance *instance;
 	int pid = getpid();
 
+	if (no_filter)
+		return;
+
 	if (filter_task)
 		add_filter_pid(pid, 0);
 
@@ -4375,9 +4379,9 @@ void update_first_instance(struct buffer_instance *instance, int topt)
 }
 
 enum {
-
-	OPT_quiet		= 246,
-	OPT_debug		= 247,
+	OPT_quiet		= 245,
+	OPT_debug		= 246,
+	OPT_no_filter		= 247,
 	OPT_max_graph_depth	= 248,
 	OPT_tsoffset		= 249,
 	OPT_bycomm		= 250,
@@ -4601,6 +4605,7 @@ static void parse_record_options(int argc,
 			{"by-comm", no_argument, NULL, OPT_bycomm},
 			{"ts-offset", required_argument, NULL, OPT_tsoffset},
 			{"max-graph-depth", required_argument, NULL, OPT_max_graph_depth},
+			{"no-filter", no_argument, NULL, OPT_no_filter},
 			{"debug", no_argument, NULL, OPT_debug},
 			{"quiet", no_argument, NULL, OPT_quiet},
 			{"help", no_argument, NULL, '?'},
@@ -4875,6 +4880,9 @@ static void parse_record_options(int argc,
 			if (!ctx->max_graph_depth)
 				die("Could not allocate option");
 			break;
+		case OPT_no_filter:
+			no_filter = true;
+			break;
 		case OPT_debug:
 			debug = 1;
 			break;
diff --git a/tracecmd/trace-usage.c b/tracecmd/trace-usage.c
index 9ea1906..29a7081 100644
--- a/tracecmd/trace-usage.c
+++ b/tracecmd/trace-usage.c
@@ -56,6 +56,7 @@ static struct usage_help usage_help[] = {
 		"          --func-stack perform a stack trace for function tracer\n"
 		"             (use with caution)\n"
 		"          --max-graph-depth limit function_graph depth\n"
+		"          --no-filter do not set any event filters\n"
 	},
 	{
 		"start",
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed
  2019-04-15 16:47 ` [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed Slavomir Kaslev
@ 2019-04-15 21:59   ` Steven Rostedt
  2019-04-15 22:55     ` Slavomir Kaslev
  0 siblings, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2019-04-15 21:59 UTC (permalink / raw
  To: Slavomir Kaslev
  Cc: linux-trace-devel, pauld, ykaradzhov, jbacik, tstoyanov,
	slavomir.kaslev

On Mon, 15 Apr 2019 19:47:38 +0300
Slavomir Kaslev <kaslevs@vmware.com> wrote:

> Express pid filters as allowed/disallowed filter ranges
> 
>   (pid>=100&&pid<=103)  
> 
> instead of specifying them per pid
> 
>   (pid==100||pid==101||pid==102||pid==103)
> 
> This makes the size of the resulting filter smaller (and faster) and avoids
> overflowing the filter size limit of one page which we can hit on bigger
> machines (say >160 CPUs).

Except it breaks if we have a split.

I ran this:

 hackbench 10 &
 tracecmd/trace-cmd record -e sched_switch cat /sys/kernel/debug/tracing/events/sched/sched_switch/filter


Time: 0.093
(common_pid<6959||common_pid>6969)||(common_pid<6945||common_pid>6957)||(next_pid<6959||next_pid>6969)||(next_pid<6945||next_pid>6957)

This was the output. Showing that we had common_pid from 6959 - 6969
and 6945 - 6957 (a 6958 was missing), and because of this, we now trace
all processes because (common_pid < 6959 || common_pid > 6957) is
always true.

We need an "&&" there somewhere. That should have been:

((common_pid<6959||common_pid>6969)&&(common_pid<6945||common_pid>6957))||((next_pid<6959||next_pid>6969)&&(next_pid<6945||next_pid>6957))

-- Steve



> 
> Signed-off-by: Slavomir Kaslev <kaslevs@vmware.com>
> Reported-by: Phil Auld <pauld@redhat.com>
> Suggested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> ---
>  tracecmd/trace-record.c | 115 +++++++++++++++++++++++++++-------------
>  1 file changed, 78 insertions(+), 37 deletions(-)
> 
> diff --git a/tracecmd/trace-record.c b/tracecmd/trace-record.c
> index 76ca92d..eeee5e9 100644
> --- a/tracecmd/trace-record.c
> +++ b/tracecmd/trace-record.c
> @@ -950,10 +950,61 @@ static void update_ftrace_pids(int reset)
>  static void update_event_filters(struct buffer_instance *instance);
>  static void update_pid_event_filters(struct buffer_instance *instance);
>  
> +static void append_filter_pid_range(char **filter, int *curr_len,
> +				    const char *field,
> +				    int start_pid, int end_pid, bool exclude)
> +{
> +	char *op, *op1, *op2, *op3;
> +	int len;
> +
> +	op = *filter && **filter ? "||" : "";
> +
> +	// Handle thus case explicitly so that we get `pid==3` instead of
> +	// `pid>=3&&pid<=3` for singleton ranges
> +	if (start_pid == end_pid) {
> +#define FMT	"%s(%s%s%d)"
> +		len = snprintf(NULL, 0, FMT, op,
> +			       field, exclude ? "!=" : "==", start_pid);
> +		*filter = realloc(*filter, *curr_len + len + 1);
> +		if (!*filter)
> +			die("realloc");
> +
> +		len = snprintf(*filter + *curr_len, len + 1, FMT, op,
> +			       field, exclude ? "!=" : "==", start_pid);
> +		*curr_len += len;
> +
> +		return;
> +#undef FMT
> +	}
> +
> +	if (exclude) {
> +		op1 = "<";
> +		op2 = "||";
> +		op3 = ">";
> +	} else {
> +		op1 = ">=";
> +		op2 = "&&";
> +		op3 = "<=";
> +	}
> +
> +#define FMT	"%s(%s%s%d%s%s%s%d)"
> +	len = snprintf(NULL, 0, FMT, op,
> +		       field, op1, start_pid, op2,
> +		       field, op3, end_pid);
> +	*filter = realloc(*filter, *curr_len + len + 1);
> +	if (!*filter)
> +		die("realloc");
> +
> +	len = snprintf(*filter + *curr_len, len + 1, FMT, op,
> +		       field, op1, start_pid, op2,
> +		       field, op3, end_pid);
> +	*curr_len += len;
> +}
> +
>  /**
>   * make_pid_filter - create a filter string to all pids against @field
>   * @curr_filter: Append to a previous filter (may realloc). Can be NULL
> - * @field: The fild to compare the pids against
> + * @field: The field to compare the pids against
>   *
>   * Creates a new string or appends to an existing one if @curr_filter
>   * is not NULL. The new string will contain a filter with all pids
> @@ -963,54 +1014,44 @@ static void update_pid_event_filters(struct buffer_instance *instance);
>   */
>  static char *make_pid_filter(char *curr_filter, const char *field)
>  {
> +	int curr_len = 0, last_exclude = -1;
> +	int start_pid = -1, last_pid = -1;
> +	char *filter = NULL, *save;
>  	struct filter_pids *p;
> -	char *filter;
> -	char *orit;
> -	char *match;
> -	char *str;
> -	int curr_len = 0;
> -	int len;
>  
>  	/* Use the new method if possible */
>  	if (have_set_event_pid)
>  		return NULL;
>  
> -	len = len_filter_pids + (strlen(field) + strlen("(==)||")) * nr_filter_pids;
> -
> -	if (curr_filter) {
> -		curr_len = strlen(curr_filter);
> -		filter = realloc(curr_filter, curr_len + len + strlen("(&&())"));
> -		if (!filter)
> -			die("realloc");
> -		memmove(filter+1, curr_filter, curr_len);
> -		filter[0] = '(';
> -		strcat(filter, ")&&(");
> -		curr_len = strlen(filter);
> -	} else
> -		filter = malloc(len);
> -	if (!filter)
> -		die("Failed to allocate pid filter");
> -
> -	/* Last '||' that is not used will cover the \0 */
> -	str = filter + curr_len;
> +	if (!filter_pids)
> +		return curr_filter;
>  
>  	for (p = filter_pids; p; p = p->next) {
> -		if (p->exclude) {
> -			match = "!=";
> -			orit = "&&";
> -		} else {
> -			match = "==";
> -			orit = "||";
> +		/* PIDs are inserted in `filter_pids` from the front and that's
> +		 * why we expect them in descending order here.
> +		 */
> +		if (p->pid == last_pid - 1 && p->exclude == last_exclude) {
> +			last_pid = p->pid;
> +			continue;
>  		}
> -		if (p == filter_pids)
> -			orit = "";
>  
> -		len = sprintf(str, "%s(%s%s%d)", orit, field, match, p->pid);
> -		str += len;
> +		if (start_pid != -1)
> +			append_filter_pid_range(&filter, &curr_len, field,
> +						last_pid, start_pid,
> +						last_exclude);
> +
> +		start_pid = last_pid = p->pid;
> +		last_exclude = p->exclude;
> +
>  	}
> +	append_filter_pid_range(&filter, &curr_len, field,
> +				last_pid, start_pid, last_exclude);
>  
> -	if (curr_len)
> -		sprintf(str, ")");
> +	if (curr_filter) {
> +		save = filter;
> +		asprintf(&filter, "(%s)&&(%s)", curr_filter, filter);
> +		free(save);
> +	}
>  
>  	return filter;
>  }


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed
  2019-04-15 21:59   ` Steven Rostedt
@ 2019-04-15 22:55     ` Slavomir Kaslev
  0 siblings, 0 replies; 5+ messages in thread
From: Slavomir Kaslev @ 2019-04-15 22:55 UTC (permalink / raw
  To: rostedt@goodmis.org
  Cc: pauld@redhat.com, jbacik@fb.com, Yordan Karadzhov,
	linux-trace-devel@vger.kernel.org, Tzvetomir Stoyanov

On Mon, 2019-04-15 at 17:59 -0400, Steven Rostedt wrote:
> On Mon, 15 Apr 2019 19:47:38 +0300
> Slavomir Kaslev <kaslevs@vmware.com> wrote:
> 
> > Express pid filters as allowed/disallowed filter ranges
> > 
> >   (pid>=100&&pid<=103)  
> > 
> > instead of specifying them per pid
> > 
> >   (pid==100||pid==101||pid==102||pid==103)
> > 
> > This makes the size of the resulting filter smaller (and faster)
> > and avoids
> > overflowing the filter size limit of one page which we can hit on
> > bigger
> > machines (say >160 CPUs).
> 
> Except it breaks if we have a split.
> 
> I ran this:
> 
>  hackbench 10 &
>  tracecmd/trace-cmd record -e sched_switch cat
> /sys/kernel/debug/tracing/events/sched/sched_switch/filter
> 
> 
> Time: 0.093
> (common_pid<6959||common_pid>6969)||(common_pid<6945||common_pid>6957
> )||(next_pid<6959||next_pid>6969)||(next_pid<6945||next_pid>6957)
> 
> This was the output. Showing that we had common_pid from 6959 - 6969
> and 6945 - 6957 (a 6958 was missing), and because of this, we now
> trace
> all processes because (common_pid < 6959 || common_pid > 6957) is
> always true.
> 
> We need an "&&" there somewhere. That should have been:
> 
> ((common_pid<6959||common_pid>6969)&&(common_pid<6945||common_pid>695
> 7))||((next_pid<6959||next_pid>6969)&&(next_pid<6945||next_pid>6957))

Good catch. I've misread how the original code worked. It appends
exclude filters with && and non-exclude with ||. I'll send a fix in v3.

-- Slavi


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-04-15 22:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-04-15 16:47 [PATCH v2 0/2] Optimize pid filters and add --no-filter option Slavomir Kaslev
2019-04-15 16:47 ` [PATCH v2 1/2] trace-cmd: Optimize how pid filters are expressed Slavomir Kaslev
2019-04-15 21:59   ` Steven Rostedt
2019-04-15 22:55     ` Slavomir Kaslev
2019-04-15 16:47 ` [PATCH v2 2/2] trace-cmd: Add --no-filter option to not filter recording processes Slavomir Kaslev

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.