[RFC PATCH v1 05/11] mm: Add soft_limit_top_tier tree for mem cgroup

LKML Archive mirror
 help / color / mirror / Atom feed

From: Tim Chen <tim.c.chen@linux.intel.com>
To: Michal Hocko <mhocko@suse.cz>
Cc: Tim Chen <tim.c.chen@linux.intel.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dave Hansen <dave.hansen@intel.com>,
	Ying Huang <ying.huang@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	David Rientjes <rientjes@google.com>,
	Shakeel Butt <shakeelb@google.com>,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v1 05/11] mm: Add soft_limit_top_tier tree for mem cgroup
Date: Mon,  5 Apr 2021 10:08:29 -0700	[thread overview]
Message-ID: <04b7c9bce901d271eae216dcfbb928aadc8d48d0.1617642417.git.tim.c.chen@linux.intel.com> (raw)
In-Reply-To: <cover.1617642417.git.tim.c.chen@linux.intel.com>

Define a per node soft_limit_top_tier red black tree that sort and track
the cgroups by each group's excess over its toptier soft limit.  A cgroup
is added to the tree if it has exceeded its top tier soft limit and it
has used pages on the node.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 mm/memcontrol.c | 68 +++++++++++++++++++++++++++++++++++++------------
 1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 68590f46fa76..90a78ff3fca8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -122,6 +122,7 @@ struct mem_cgroup_tree {
 };
 
 static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+static struct mem_cgroup_tree soft_limit_toptier_tree __read_mostly;
 
 /* for OOM */
 struct mem_cgroup_eventfd_list {
@@ -590,17 +591,27 @@ mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
 }
 
 static struct mem_cgroup_tree_per_node *
-soft_limit_tree_node(int nid)
-{
-	return soft_limit_tree.rb_tree_per_node[nid];
+soft_limit_tree_node(int nid, enum node_states type)
+{
+	switch (type) {
+	case N_MEMORY:
+		return soft_limit_tree.rb_tree_per_node[nid];
+	case N_TOPTIER:
+		if (node_state(nid, N_TOPTIER))
+			return soft_limit_toptier_tree.rb_tree_per_node[nid];
+		else
+			return NULL;
+	default:
+		return NULL;
+	}
 }
 
 static struct mem_cgroup_tree_per_node *
-soft_limit_tree_from_page(struct page *page)
+soft_limit_tree_from_page(struct page *page, enum node_states type)
 {
 	int nid = page_to_nid(page);
 
-	return soft_limit_tree.rb_tree_per_node[nid];
+	return soft_limit_tree_node(nid, type);
 }
 
 static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
@@ -661,12 +672,24 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
 	spin_unlock_irqrestore(&mctz->lock, flags);
 }
 
-static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
+static unsigned long soft_limit_excess(struct mem_cgroup *memcg, enum node_states type)
 {
-	unsigned long nr_pages = page_counter_read(&memcg->memory);
-	unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
+	unsigned long nr_pages;
+	unsigned long soft_limit;
 	unsigned long excess = 0;
 
+	switch (type) {
+	case N_MEMORY:
+		nr_pages = page_counter_read(&memcg->memory);
+		soft_limit = READ_ONCE(memcg->soft_limit);
+		break;
+	case N_TOPTIER:
+		nr_pages = page_counter_read(&memcg->toptier);
+		soft_limit = READ_ONCE(memcg->toptier_soft_limit);
+		break;
+	default:
+		return 0;
+	}
 	if (nr_pages > soft_limit)
 		excess = nr_pages - soft_limit;
 
@@ -679,7 +702,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	struct mem_cgroup_per_node *mz;
 	struct mem_cgroup_tree_per_node *mctz;
 
-	mctz = soft_limit_tree_from_page(page);
+	mctz = soft_limit_tree_from_page(page, N_MEMORY);
 	if (!mctz)
 		return;
 	/*
@@ -688,7 +711,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	 */
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		mz = mem_cgroup_page_nodeinfo(memcg, page);
-		excess = soft_limit_excess(memcg);
+		excess = soft_limit_excess(memcg, N_MEMORY);
 		/*
 		 * We have to update the tree if mz is on RB-tree or
 		 * mem is over its softlimit.
@@ -718,7 +741,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
 
 	for_each_node(nid) {
 		mz = mem_cgroup_nodeinfo(memcg, nid);
-		mctz = soft_limit_tree_node(nid);
+		mctz = soft_limit_tree_node(nid, N_MEMORY);
 		if (mctz)
 			mem_cgroup_remove_exceeded(mz, mctz);
 	}
@@ -742,7 +765,7 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
 	 * position in the tree.
 	 */
 	__mem_cgroup_remove_exceeded(mz, mctz);
-	if (!soft_limit_excess(mz->memcg) ||
+	if (!soft_limit_excess(mz->memcg, N_MEMORY) ||
 	    !css_tryget(&mz->memcg->css))
 		goto retry;
 done:
@@ -1805,7 +1828,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
 		.pgdat = pgdat,
 	};
 
-	excess = soft_limit_excess(root_memcg);
+	excess = soft_limit_excess(root_memcg, N_MEMORY);
 
 	while (1) {
 		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
@@ -1834,7 +1857,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
 		total += mem_cgroup_shrink_node(victim, gfp_mask, false,
 					pgdat, &nr_scanned);
 		*total_scanned += nr_scanned;
-		if (!soft_limit_excess(root_memcg))
+		if (!soft_limit_excess(root_memcg, N_MEMORY))
 			break;
 	}
 	mem_cgroup_iter_break(root_memcg, victim);
@@ -3457,7 +3480,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 	if (order > 0)
 		return 0;
 
-	mctz = soft_limit_tree_node(pgdat->node_id);
+	mctz = soft_limit_tree_node(pgdat->node_id, N_MEMORY);
 
 	/*
 	 * Do not even bother to check the largest node if the root
@@ -3513,7 +3536,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 		if (!reclaimed)
 			next_mz = __mem_cgroup_largest_soft_limit_node(mctz);
 
-		excess = soft_limit_excess(mz->memcg);
+		excess = soft_limit_excess(mz->memcg, N_MEMORY);
 		/*
 		 * One school of thought says that we should not add
 		 * back the node to the tree if reclaim returns 0.
@@ -7189,6 +7212,19 @@ static int __init mem_cgroup_init(void)
 		rtpn->rb_rightmost = NULL;
 		spin_lock_init(&rtpn->lock);
 		soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+		if (!node_state(node, N_TOPTIER)) {
+			soft_limit_toptier_tree.rb_tree_per_node[node] = NULL;
+			continue;
+		}
+
+		rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
+				    node_online(node) ? node : NUMA_NO_NODE);
+
+		rtpn->rb_root = RB_ROOT;
+		rtpn->rb_rightmost = NULL;
+		spin_lock_init(&rtpn->lock);
+		soft_limit_toptier_tree.rb_tree_per_node[node] = rtpn;
 	}
 
 	return 0;
-- 
2.20.1

next prev parent reply	other threads:[~2021-04-05 18:09 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05 17:08 [RFC PATCH v1 00/11] Manage the top tier memory in a tiered memory Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 01/11] mm: Define top tier memory node mask Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 02/11] mm: Add soft memory limit for mem cgroup Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 03/11] mm: Account the top tier memory usage per cgroup Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 04/11] mm: Report top tier memory usage in sysfs Tim Chen
2021-04-05 17:08 ` Tim Chen [this message]
2021-04-05 17:08 ` [RFC PATCH v1 06/11] mm: Handle top tier memory in cgroup soft limit memory tree utilities Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 07/11] mm: Account the total top tier memory in use Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 08/11] mm: Add toptier option for mem_cgroup_soft_limit_reclaim() Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 09/11] mm: Use kswapd to demote pages when toptier memory is tight Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 10/11] mm: Set toptier_scale_factor via sysctl Tim Chen
2021-04-05 17:08 ` [RFC PATCH v1 11/11] mm: Wakeup kswapd if toptier memory need soft reclaim Tim Chen
2021-04-06  9:08 ` [RFC PATCH v1 00/11] Manage the top tier memory in a tiered memory Michal Hocko
2021-04-07 22:33   ` Tim Chen
2021-04-08 11:52     ` Michal Hocko
2021-04-09 23:26       ` Tim Chen
2021-04-12 19:20         ` Shakeel Butt
2021-04-14  8:59           ` Jonathan Cameron
2021-04-15  0:42           ` Tim Chen
2021-04-13  2:15         ` Huang, Ying
2021-04-13  8:33         ` Michal Hocko
2021-04-12 14:03       ` Shakeel Butt
2021-04-08 17:18 ` Shakeel Butt
2021-04-08 18:00   ` Yang Shi
2021-04-08 20:29     ` Shakeel Butt
2021-04-08 20:50       ` Yang Shi
2021-04-12 14:03         ` Shakeel Butt
2021-04-09  7:24       ` Michal Hocko
2021-04-15 22:31         ` Tim Chen
2021-04-16  6:38           ` Michal Hocko
2021-04-14 23:22       ` Tim Chen
2021-04-09  2:58     ` Huang, Ying
2021-04-09 20:50       ` Yang Shi
2021-04-15 22:25   ` Tim Chen

find likely ancestor, descendant, or conflicting patches for this message:
dfblob:68590f46fa7 dfblob:90a78ff3fca
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=04b7c9bce901d271eae216dcfbb928aadc8d48d0.1617642417.git.tim.c.chen@linux.intel.com \
    --to=tim.c.chen@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@intel.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=rientjes@google.com \
    --cc=shakeelb@google.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).