xref: /linux/mm/hugetlb_cgroup.c (revision 3e9e952bb3139ad1e08f3e1960239c2988ab90c9)
1ef24e0aaSTim Bird // SPDX-License-Identifier: LGPL-2.1
22bc64a20SAneesh Kumar K.V /*
32bc64a20SAneesh Kumar K.V  *
42bc64a20SAneesh Kumar K.V  * Copyright IBM Corporation, 2012
52bc64a20SAneesh Kumar K.V  * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
62bc64a20SAneesh Kumar K.V  *
7faced7e0SGiuseppe Scrivano  * Cgroup v2
8faced7e0SGiuseppe Scrivano  * Copyright (C) 2019 Red Hat, Inc.
9faced7e0SGiuseppe Scrivano  * Author: Giuseppe Scrivano <gscrivan@redhat.com>
10faced7e0SGiuseppe Scrivano  *
112bc64a20SAneesh Kumar K.V  */
122bc64a20SAneesh Kumar K.V 
132bc64a20SAneesh Kumar K.V #include <linux/cgroup.h>
1471f87beeSJohannes Weiner #include <linux/page_counter.h>
152bc64a20SAneesh Kumar K.V #include <linux/slab.h>
162bc64a20SAneesh Kumar K.V #include <linux/hugetlb.h>
172bc64a20SAneesh Kumar K.V #include <linux/hugetlb_cgroup.h>
182bc64a20SAneesh Kumar K.V 
19abb8206cSAneesh Kumar K.V #define MEMFILE_PRIVATE(x, val)	(((x) << 16) | (val))
20abb8206cSAneesh Kumar K.V #define MEMFILE_IDX(val)	(((val) >> 16) & 0xffff)
21abb8206cSAneesh Kumar K.V #define MEMFILE_ATTR(val)	((val) & 0xffff)
22abb8206cSAneesh Kumar K.V 
2347179fe0SXiu Jianfeng /* Use t->m[0] to encode the offset */
2447179fe0SXiu Jianfeng #define MEMFILE_OFFSET(t, m0)	(((offsetof(t, m0) << 16) | sizeof_field(t, m0)))
2547179fe0SXiu Jianfeng #define MEMFILE_OFFSET0(val)	(((val) >> 16) & 0xffff)
2647179fe0SXiu Jianfeng #define MEMFILE_FIELD_SIZE(val)	((val) & 0xffff)
2747179fe0SXiu Jianfeng 
2847179fe0SXiu Jianfeng #define DFL_TMPL_SIZE		ARRAY_SIZE(hugetlb_dfl_tmpl)
2947179fe0SXiu Jianfeng #define LEGACY_TMPL_SIZE	ARRAY_SIZE(hugetlb_legacy_tmpl)
3047179fe0SXiu Jianfeng 
312bc64a20SAneesh Kumar K.V static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
3247179fe0SXiu Jianfeng static struct cftype *dfl_files;
3347179fe0SXiu Jianfeng static struct cftype *legacy_files;
342bc64a20SAneesh Kumar K.V 
35cdc2fcfeSMina Almasry static inline struct page_counter *
__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup * h_cg,int idx,bool rsvd)361adc4d41SMina Almasry __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
37cdc2fcfeSMina Almasry 				     bool rsvd)
38cdc2fcfeSMina Almasry {
39cdc2fcfeSMina Almasry 	if (rsvd)
40cdc2fcfeSMina Almasry 		return &h_cg->rsvd_hugepage[idx];
41cdc2fcfeSMina Almasry 	return &h_cg->hugepage[idx];
42cdc2fcfeSMina Almasry }
43cdc2fcfeSMina Almasry 
441adc4d41SMina Almasry static inline struct page_counter *
hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup * h_cg,int idx)451adc4d41SMina Almasry hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
461adc4d41SMina Almasry {
471adc4d41SMina Almasry 	return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
481adc4d41SMina Almasry }
491adc4d41SMina Almasry 
501adc4d41SMina Almasry static inline struct page_counter *
hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup * h_cg,int idx)511adc4d41SMina Almasry hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
521adc4d41SMina Almasry {
531adc4d41SMina Almasry 	return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
541adc4d41SMina Almasry }
551adc4d41SMina Almasry 
562bc64a20SAneesh Kumar K.V static inline
hugetlb_cgroup_from_css(struct cgroup_subsys_state * s)572bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
582bc64a20SAneesh Kumar K.V {
59a7c6d554STejun Heo 	return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
602bc64a20SAneesh Kumar K.V }
612bc64a20SAneesh Kumar K.V 
622bc64a20SAneesh Kumar K.V static inline
hugetlb_cgroup_from_task(struct task_struct * task)632bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
642bc64a20SAneesh Kumar K.V {
65073219e9STejun Heo 	return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
662bc64a20SAneesh Kumar K.V }
672bc64a20SAneesh Kumar K.V 
hugetlb_cgroup_is_root(struct hugetlb_cgroup * h_cg)682bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
692bc64a20SAneesh Kumar K.V {
702bc64a20SAneesh Kumar K.V 	return (h_cg == root_h_cgroup);
712bc64a20SAneesh Kumar K.V }
722bc64a20SAneesh Kumar K.V 
733f798518STejun Heo static inline struct hugetlb_cgroup *
parent_hugetlb_cgroup(struct hugetlb_cgroup * h_cg)743f798518STejun Heo parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
752bc64a20SAneesh Kumar K.V {
765c9d535bSTejun Heo 	return hugetlb_cgroup_from_css(h_cg->css.parent);
772bc64a20SAneesh Kumar K.V }
782bc64a20SAneesh Kumar K.V 
hugetlb_cgroup_have_usage(struct hugetlb_cgroup * h_cg)793f798518STejun Heo static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
802bc64a20SAneesh Kumar K.V {
81c37213c5SMiaohe Lin 	struct hstate *h;
822bc64a20SAneesh Kumar K.V 
83c37213c5SMiaohe Lin 	for_each_hstate(h) {
841adc4d41SMina Almasry 		if (page_counter_read(
85c37213c5SMiaohe Lin 		    hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h))))
862bc64a20SAneesh Kumar K.V 			return true;
872bc64a20SAneesh Kumar K.V 	}
882bc64a20SAneesh Kumar K.V 	return false;
892bc64a20SAneesh Kumar K.V }
902bc64a20SAneesh Kumar K.V 
hugetlb_cgroup_init(struct hugetlb_cgroup * h_cgroup,struct hugetlb_cgroup * parent_h_cgroup)91297880f4SDavid Rientjes static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
92297880f4SDavid Rientjes 				struct hugetlb_cgroup *parent_h_cgroup)
93297880f4SDavid Rientjes {
94297880f4SDavid Rientjes 	int idx;
95297880f4SDavid Rientjes 
96297880f4SDavid Rientjes 	for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
970e2759afSShakeel Butt 		struct page_counter *fault, *fault_parent = NULL;
980e2759afSShakeel Butt 		struct page_counter *rsvd, *rsvd_parent = NULL;
99297880f4SDavid Rientjes 		unsigned long limit;
100297880f4SDavid Rientjes 
1011adc4d41SMina Almasry 		if (parent_h_cgroup) {
1021adc4d41SMina Almasry 			fault_parent = hugetlb_cgroup_counter_from_cgroup(
1031adc4d41SMina Almasry 				parent_h_cgroup, idx);
1041adc4d41SMina Almasry 			rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
1051adc4d41SMina Almasry 				parent_h_cgroup, idx);
1061adc4d41SMina Almasry 		}
1070e2759afSShakeel Butt 		fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx);
1080e2759afSShakeel Butt 		rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx);
1090e2759afSShakeel Butt 
1100e2759afSShakeel Butt 		page_counter_init(fault, fault_parent, false);
1110e2759afSShakeel Butt 		page_counter_init(rsvd, rsvd_parent, false);
1120e2759afSShakeel Butt 
1130e2759afSShakeel Butt 		if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) {
1140e2759afSShakeel Butt 			fault->track_failcnt = true;
1150e2759afSShakeel Butt 			rsvd->track_failcnt = true;
1160e2759afSShakeel Butt 		}
117297880f4SDavid Rientjes 
118297880f4SDavid Rientjes 		limit = round_down(PAGE_COUNTER_MAX,
1198938494cSMiaohe Lin 				   pages_per_huge_page(&hstates[idx]));
1201adc4d41SMina Almasry 
1210e2759afSShakeel Butt 		VM_BUG_ON(page_counter_set_max(fault, limit));
1220e2759afSShakeel Butt 		VM_BUG_ON(page_counter_set_max(rsvd, limit));
123297880f4SDavid Rientjes 	}
124297880f4SDavid Rientjes }
125297880f4SDavid Rientjes 
hugetlb_cgroup_free(struct hugetlb_cgroup * h_cgroup)126f4776199SMina Almasry static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
127f4776199SMina Almasry {
128f4776199SMina Almasry 	int node;
129f4776199SMina Almasry 
130f4776199SMina Almasry 	for_each_node(node)
131f4776199SMina Almasry 		kfree(h_cgroup->nodeinfo[node]);
132f4776199SMina Almasry 	kfree(h_cgroup);
133f4776199SMina Almasry }
134f4776199SMina Almasry 
135eb95419bSTejun Heo static struct cgroup_subsys_state *
hugetlb_cgroup_css_alloc(struct cgroup_subsys_state * parent_css)136eb95419bSTejun Heo hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
1372bc64a20SAneesh Kumar K.V {
138eb95419bSTejun Heo 	struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
139eb95419bSTejun Heo 	struct hugetlb_cgroup *h_cgroup;
140f4776199SMina Almasry 	int node;
1412bc64a20SAneesh Kumar K.V 
142*323bbfcfSLinus Torvalds 	h_cgroup = kzalloc_flex(*h_cgroup, nodeinfo, nr_node_ids);
143f4776199SMina Almasry 
1442bc64a20SAneesh Kumar K.V 	if (!h_cgroup)
1452bc64a20SAneesh Kumar K.V 		return ERR_PTR(-ENOMEM);
1462bc64a20SAneesh Kumar K.V 
147297880f4SDavid Rientjes 	if (!parent_h_cgroup)
1482bc64a20SAneesh Kumar K.V 		root_h_cgroup = h_cgroup;
149297880f4SDavid Rientjes 
150f4776199SMina Almasry 	/*
151f4776199SMina Almasry 	 * TODO: this routine can waste much memory for nodes which will
152f4776199SMina Almasry 	 * never be onlined. It's better to use memory hotplug callback
153f4776199SMina Almasry 	 * function.
154f4776199SMina Almasry 	 */
155f4776199SMina Almasry 	for_each_node(node) {
15699249387SMiaohe Lin 		/* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */
157f4776199SMina Almasry 		int node_to_alloc =
15899249387SMiaohe Lin 			node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE;
159f4776199SMina Almasry 		h_cgroup->nodeinfo[node] =
160f4776199SMina Almasry 			kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
161f4776199SMina Almasry 				     GFP_KERNEL, node_to_alloc);
162f4776199SMina Almasry 		if (!h_cgroup->nodeinfo[node])
163f4776199SMina Almasry 			goto fail_alloc_nodeinfo;
164f4776199SMina Almasry 	}
165f4776199SMina Almasry 
166297880f4SDavid Rientjes 	hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
1672bc64a20SAneesh Kumar K.V 	return &h_cgroup->css;
168f4776199SMina Almasry 
169f4776199SMina Almasry fail_alloc_nodeinfo:
170f4776199SMina Almasry 	hugetlb_cgroup_free(h_cgroup);
171f4776199SMina Almasry 	return ERR_PTR(-ENOMEM);
1722bc64a20SAneesh Kumar K.V }
1732bc64a20SAneesh Kumar K.V 
hugetlb_cgroup_css_free(struct cgroup_subsys_state * css)174eb95419bSTejun Heo static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
1752bc64a20SAneesh Kumar K.V {
176f4776199SMina Almasry 	hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
1772bc64a20SAneesh Kumar K.V }
1782bc64a20SAneesh Kumar K.V 
179da1def55SAneesh Kumar K.V /*
180da1def55SAneesh Kumar K.V  * Should be called with hugetlb_lock held.
181da1def55SAneesh Kumar K.V  * Since we are holding hugetlb_lock, pages cannot get moved from
182da1def55SAneesh Kumar K.V  * active list or uncharged from the cgroup, So no need to get
183da1def55SAneesh Kumar K.V  * page reference and test for page active here. This function
184da1def55SAneesh Kumar K.V  * cannot fail.
185da1def55SAneesh Kumar K.V  */
hugetlb_cgroup_move_parent(int idx,struct hugetlb_cgroup * h_cg,struct folio * folio)1863f798518STejun Heo static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
1873f982b9bSDavid Hildenbrand 				       struct folio *folio)
188da1def55SAneesh Kumar K.V {
18971f87beeSJohannes Weiner 	unsigned int nr_pages;
19071f87beeSJohannes Weiner 	struct page_counter *counter;
1913f982b9bSDavid Hildenbrand 	struct hugetlb_cgroup *hcg;
1923f798518STejun Heo 	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
193da1def55SAneesh Kumar K.V 
1943f982b9bSDavid Hildenbrand 	hcg = hugetlb_cgroup_from_folio(folio);
195da1def55SAneesh Kumar K.V 	/*
196da1def55SAneesh Kumar K.V 	 * We can have pages in active list without any cgroup
197da1def55SAneesh Kumar K.V 	 * ie, hugepage with less than 3 pages. We can safely
198da1def55SAneesh Kumar K.V 	 * ignore those pages.
199da1def55SAneesh Kumar K.V 	 */
2003f982b9bSDavid Hildenbrand 	if (!hcg || hcg != h_cg)
201da1def55SAneesh Kumar K.V 		goto out;
202da1def55SAneesh Kumar K.V 
2033f982b9bSDavid Hildenbrand 	nr_pages = folio_nr_pages(folio);
204da1def55SAneesh Kumar K.V 	if (!parent) {
205da1def55SAneesh Kumar K.V 		parent = root_h_cgroup;
206da1def55SAneesh Kumar K.V 		/* root has no limit */
20771f87beeSJohannes Weiner 		page_counter_charge(&parent->hugepage[idx], nr_pages);
208da1def55SAneesh Kumar K.V 	}
209da1def55SAneesh Kumar K.V 	counter = &h_cg->hugepage[idx];
21071f87beeSJohannes Weiner 	/* Take the pages off the local counter */
21171f87beeSJohannes Weiner 	page_counter_cancel(counter, nr_pages);
212da1def55SAneesh Kumar K.V 
213de656ed3SSidhartha Kumar 	set_hugetlb_cgroup(folio, parent);
214da1def55SAneesh Kumar K.V out:
215da1def55SAneesh Kumar K.V 	return;
216da1def55SAneesh Kumar K.V }
217da1def55SAneesh Kumar K.V 
218da1def55SAneesh Kumar K.V /*
219da1def55SAneesh Kumar K.V  * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
220da1def55SAneesh Kumar K.V  * the parent cgroup.
221da1def55SAneesh Kumar K.V  */
hugetlb_cgroup_css_offline(struct cgroup_subsys_state * css)222eb95419bSTejun Heo static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
2232bc64a20SAneesh Kumar K.V {
224eb95419bSTejun Heo 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
225da1def55SAneesh Kumar K.V 	struct hstate *h;
2263f982b9bSDavid Hildenbrand 	struct folio *folio;
227da1def55SAneesh Kumar K.V 
228da1def55SAneesh Kumar K.V 	do {
229da1def55SAneesh Kumar K.V 		for_each_hstate(h) {
230db71ef79SMike Kravetz 			spin_lock_irq(&hugetlb_lock);
2313f982b9bSDavid Hildenbrand 			list_for_each_entry(folio, &h->hugepage_activelist, lru)
2323f982b9bSDavid Hildenbrand 				hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio);
233da1def55SAneesh Kumar K.V 
234db71ef79SMike Kravetz 			spin_unlock_irq(&hugetlb_lock);
235da1def55SAneesh Kumar K.V 		}
236da1def55SAneesh Kumar K.V 		cond_resched();
2373f798518STejun Heo 	} while (hugetlb_cgroup_have_usage(h_cg));
2382bc64a20SAneesh Kumar K.V }
2392bc64a20SAneesh Kumar K.V 
hugetlb_event(struct hugetlb_cgroup * hugetlb,int idx,enum hugetlb_memory_event event)240faced7e0SGiuseppe Scrivano static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
241faced7e0SGiuseppe Scrivano 				 enum hugetlb_memory_event event)
242faced7e0SGiuseppe Scrivano {
243faced7e0SGiuseppe Scrivano 	atomic_long_inc(&hugetlb->events_local[idx][event]);
244faced7e0SGiuseppe Scrivano 	cgroup_file_notify(&hugetlb->events_local_file[idx]);
245faced7e0SGiuseppe Scrivano 
246faced7e0SGiuseppe Scrivano 	do {
247faced7e0SGiuseppe Scrivano 		atomic_long_inc(&hugetlb->events[idx][event]);
248faced7e0SGiuseppe Scrivano 		cgroup_file_notify(&hugetlb->events_file[idx]);
249faced7e0SGiuseppe Scrivano 	} while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
250faced7e0SGiuseppe Scrivano 		 !hugetlb_cgroup_is_root(hugetlb));
251faced7e0SGiuseppe Scrivano }
252faced7e0SGiuseppe Scrivano 
__hugetlb_cgroup_charge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr,bool rsvd)2531adc4d41SMina Almasry static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
2541adc4d41SMina Almasry 					  struct hugetlb_cgroup **ptr,
2551adc4d41SMina Almasry 					  bool rsvd)
2566d76dcf4SAneesh Kumar K.V {
2576d76dcf4SAneesh Kumar K.V 	int ret = 0;
25871f87beeSJohannes Weiner 	struct page_counter *counter;
2596d76dcf4SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg = NULL;
2606d76dcf4SAneesh Kumar K.V 
2616d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled())
2626d76dcf4SAneesh Kumar K.V 		goto done;
2636d76dcf4SAneesh Kumar K.V again:
2646d76dcf4SAneesh Kumar K.V 	rcu_read_lock();
2656d76dcf4SAneesh Kumar K.V 	h_cg = hugetlb_cgroup_from_task(current);
2660362f326SRoman Gushchin 	if (!css_tryget(&h_cg->css)) {
2676d76dcf4SAneesh Kumar K.V 		rcu_read_unlock();
2686d76dcf4SAneesh Kumar K.V 		goto again;
2696d76dcf4SAneesh Kumar K.V 	}
2706d76dcf4SAneesh Kumar K.V 	rcu_read_unlock();
2716d76dcf4SAneesh Kumar K.V 
2721adc4d41SMina Almasry 	if (!page_counter_try_charge(
2731adc4d41SMina Almasry 		    __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
2741adc4d41SMina Almasry 		    nr_pages, &counter)) {
2756071ca52SJohannes Weiner 		ret = -ENOMEM;
276726b7bbeSMina Almasry 		hugetlb_event(h_cg, idx, HUGETLB_MAX);
2771adc4d41SMina Almasry 		css_put(&h_cg->css);
2781adc4d41SMina Almasry 		goto done;
279faced7e0SGiuseppe Scrivano 	}
2801adc4d41SMina Almasry 	/* Reservations take a reference to the css because they do not get
2811adc4d41SMina Almasry 	 * reparented.
2821adc4d41SMina Almasry 	 */
2831adc4d41SMina Almasry 	if (!rsvd)
2846d76dcf4SAneesh Kumar K.V 		css_put(&h_cg->css);
2856d76dcf4SAneesh Kumar K.V done:
2866d76dcf4SAneesh Kumar K.V 	*ptr = h_cg;
2876d76dcf4SAneesh Kumar K.V 	return ret;
2886d76dcf4SAneesh Kumar K.V }
2896d76dcf4SAneesh Kumar K.V 
hugetlb_cgroup_charge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr)2901adc4d41SMina Almasry int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
2911adc4d41SMina Almasry 				 struct hugetlb_cgroup **ptr)
2921adc4d41SMina Almasry {
2931adc4d41SMina Almasry 	return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
2941adc4d41SMina Almasry }
2951adc4d41SMina Almasry 
hugetlb_cgroup_charge_cgroup_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr)2961adc4d41SMina Almasry int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
2971adc4d41SMina Almasry 				      struct hugetlb_cgroup **ptr)
2981adc4d41SMina Almasry {
2991adc4d41SMina Almasry 	return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
3001adc4d41SMina Almasry }
3011adc4d41SMina Almasry 
30294ae8ba7SAneesh Kumar K.V /* Should be called with hugetlb_lock held */
__hugetlb_cgroup_commit_charge(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio,bool rsvd)3031adc4d41SMina Almasry static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
3046d76dcf4SAneesh Kumar K.V 					   struct hugetlb_cgroup *h_cg,
305541b7c7bSSidhartha Kumar 					   struct folio *folio, bool rsvd)
3066d76dcf4SAneesh Kumar K.V {
3076d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled() || !h_cg)
3086d76dcf4SAneesh Kumar K.V 		return;
3093ccae1dcSPeter Xu 	lockdep_assert_held(&hugetlb_lock);
310541b7c7bSSidhartha Kumar 	__set_hugetlb_cgroup(folio, h_cg, rsvd);
311f4776199SMina Almasry 	if (!rsvd) {
312f4776199SMina Almasry 		unsigned long usage =
313541b7c7bSSidhartha Kumar 			h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
314f4776199SMina Almasry 		/*
315f4776199SMina Almasry 		 * This write is not atomic due to fetching usage and writing
316f4776199SMina Almasry 		 * to it, but that's fine because we call this with
317f4776199SMina Almasry 		 * hugetlb_lock held anyway.
318f4776199SMina Almasry 		 */
319541b7c7bSSidhartha Kumar 		WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
320f4776199SMina Almasry 			   usage + nr_pages);
321f4776199SMina Almasry 	}
3226d76dcf4SAneesh Kumar K.V }
3236d76dcf4SAneesh Kumar K.V 
hugetlb_cgroup_commit_charge(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio)3241adc4d41SMina Almasry void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
3251adc4d41SMina Almasry 				  struct hugetlb_cgroup *h_cg,
326ff7d853bSSidhartha Kumar 				  struct folio *folio)
3271adc4d41SMina Almasry {
328541b7c7bSSidhartha Kumar 	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
3291adc4d41SMina Almasry }
3301adc4d41SMina Almasry 
hugetlb_cgroup_commit_charge_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio)3311adc4d41SMina Almasry void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
3321adc4d41SMina Almasry 				       struct hugetlb_cgroup *h_cg,
333ff7d853bSSidhartha Kumar 				       struct folio *folio)
3341adc4d41SMina Almasry {
335541b7c7bSSidhartha Kumar 	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
3361adc4d41SMina Almasry }
3371adc4d41SMina Almasry 
3386d76dcf4SAneesh Kumar K.V /*
3396d76dcf4SAneesh Kumar K.V  * Should be called with hugetlb_lock held
3406d76dcf4SAneesh Kumar K.V  */
__hugetlb_cgroup_uncharge_folio(int idx,unsigned long nr_pages,struct folio * folio,bool rsvd)341d4ab0316SSidhartha Kumar static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
342d4ab0316SSidhartha Kumar 					   struct folio *folio, bool rsvd)
3436d76dcf4SAneesh Kumar K.V {
3446d76dcf4SAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg;
3456d76dcf4SAneesh Kumar K.V 
3466d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled())
3476d76dcf4SAneesh Kumar K.V 		return;
3487ea8574eSMichal Hocko 	lockdep_assert_held(&hugetlb_lock);
349f074732dSSidhartha Kumar 	h_cg = __hugetlb_cgroup_from_folio(folio, rsvd);
3506d76dcf4SAneesh Kumar K.V 	if (unlikely(!h_cg))
3516d76dcf4SAneesh Kumar K.V 		return;
352f074732dSSidhartha Kumar 	__set_hugetlb_cgroup(folio, NULL, rsvd);
3531adc4d41SMina Almasry 
3541adc4d41SMina Almasry 	page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
3551adc4d41SMina Almasry 								   rsvd),
3561adc4d41SMina Almasry 			      nr_pages);
3571adc4d41SMina Almasry 
3581adc4d41SMina Almasry 	if (rsvd)
3591adc4d41SMina Almasry 		css_put(&h_cg->css);
360f4776199SMina Almasry 	else {
361f4776199SMina Almasry 		unsigned long usage =
362d4ab0316SSidhartha Kumar 			h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
363f4776199SMina Almasry 		/*
364f4776199SMina Almasry 		 * This write is not atomic due to fetching usage and writing
365f4776199SMina Almasry 		 * to it, but that's fine because we call this with
366f4776199SMina Almasry 		 * hugetlb_lock held anyway.
367f4776199SMina Almasry 		 */
368d4ab0316SSidhartha Kumar 		WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
369f4776199SMina Almasry 			   usage - nr_pages);
370f4776199SMina Almasry 	}
3716d76dcf4SAneesh Kumar K.V }
3726d76dcf4SAneesh Kumar K.V 
hugetlb_cgroup_uncharge_folio(int idx,unsigned long nr_pages,struct folio * folio)373d4ab0316SSidhartha Kumar void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
374d4ab0316SSidhartha Kumar 				  struct folio *folio)
3751adc4d41SMina Almasry {
376d4ab0316SSidhartha Kumar 	__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false);
3771adc4d41SMina Almasry }
3781adc4d41SMina Almasry 
hugetlb_cgroup_uncharge_folio_rsvd(int idx,unsigned long nr_pages,struct folio * folio)379d4ab0316SSidhartha Kumar void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
380d4ab0316SSidhartha Kumar 				       struct folio *folio)
3811adc4d41SMina Almasry {
382d4ab0316SSidhartha Kumar 	__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true);
3831adc4d41SMina Almasry }
3841adc4d41SMina Almasry 
__hugetlb_cgroup_uncharge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,bool rsvd)3851adc4d41SMina Almasry static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
3861adc4d41SMina Almasry 					     struct hugetlb_cgroup *h_cg,
3871adc4d41SMina Almasry 					     bool rsvd)
3886d76dcf4SAneesh Kumar K.V {
3896d76dcf4SAneesh Kumar K.V 	if (hugetlb_cgroup_disabled() || !h_cg)
3906d76dcf4SAneesh Kumar K.V 		return;
3916d76dcf4SAneesh Kumar K.V 
3921adc4d41SMina Almasry 	page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
3931adc4d41SMina Almasry 								   rsvd),
3941adc4d41SMina Almasry 			      nr_pages);
3951adc4d41SMina Almasry 
3961adc4d41SMina Almasry 	if (rsvd)
3971adc4d41SMina Almasry 		css_put(&h_cg->css);
3981adc4d41SMina Almasry }
3991adc4d41SMina Almasry 
hugetlb_cgroup_uncharge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg)4001adc4d41SMina Almasry void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
4011adc4d41SMina Almasry 				    struct hugetlb_cgroup *h_cg)
4021adc4d41SMina Almasry {
4031adc4d41SMina Almasry 	__hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
4041adc4d41SMina Almasry }
4051adc4d41SMina Almasry 
hugetlb_cgroup_uncharge_cgroup_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg)4061adc4d41SMina Almasry void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
4071adc4d41SMina Almasry 					 struct hugetlb_cgroup *h_cg)
4081adc4d41SMina Almasry {
4091adc4d41SMina Almasry 	__hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
4101adc4d41SMina Almasry }
4111adc4d41SMina Almasry 
hugetlb_cgroup_uncharge_counter(struct resv_map * resv,unsigned long start,unsigned long end)412e9fe92aeSMina Almasry void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
413e9fe92aeSMina Almasry 				     unsigned long end)
4141adc4d41SMina Almasry {
415e9fe92aeSMina Almasry 	if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
416e9fe92aeSMina Almasry 	    !resv->css)
4176d76dcf4SAneesh Kumar K.V 		return;
4181adc4d41SMina Almasry 
419e9fe92aeSMina Almasry 	page_counter_uncharge(resv->reservation_counter,
420e9fe92aeSMina Almasry 			      (end - start) * resv->pages_per_hpage);
421e9fe92aeSMina Almasry 	css_put(resv->css);
4226d76dcf4SAneesh Kumar K.V }
4236d76dcf4SAneesh Kumar K.V 
hugetlb_cgroup_uncharge_file_region(struct resv_map * resv,struct file_region * rg,unsigned long nr_pages,bool region_del)424075a61d0SMina Almasry void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
425075a61d0SMina Almasry 					 struct file_region *rg,
426d85aecf2SMiaohe Lin 					 unsigned long nr_pages,
427d85aecf2SMiaohe Lin 					 bool region_del)
428075a61d0SMina Almasry {
429075a61d0SMina Almasry 	if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
430075a61d0SMina Almasry 		return;
431075a61d0SMina Almasry 
432862f7f65SMiaohe Lin 	if (rg->reservation_counter && resv->pages_per_hpage &&
433075a61d0SMina Almasry 	    !resv->reservation_counter) {
434075a61d0SMina Almasry 		page_counter_uncharge(rg->reservation_counter,
435075a61d0SMina Almasry 				      nr_pages * resv->pages_per_hpage);
436d85aecf2SMiaohe Lin 		/*
437d85aecf2SMiaohe Lin 		 * Only do css_put(rg->css) when we delete the entire region
438d85aecf2SMiaohe Lin 		 * because one file_region must hold exactly one css reference.
439d85aecf2SMiaohe Lin 		 */
440d85aecf2SMiaohe Lin 		if (region_del)
441075a61d0SMina Almasry 			css_put(rg->css);
442075a61d0SMina Almasry 	}
443075a61d0SMina Almasry }
444075a61d0SMina Almasry 
44571f87beeSJohannes Weiner enum {
44671f87beeSJohannes Weiner 	RES_USAGE,
447cdc2fcfeSMina Almasry 	RES_RSVD_USAGE,
44871f87beeSJohannes Weiner 	RES_LIMIT,
449cdc2fcfeSMina Almasry 	RES_RSVD_LIMIT,
45071f87beeSJohannes Weiner 	RES_MAX_USAGE,
451cdc2fcfeSMina Almasry 	RES_RSVD_MAX_USAGE,
45271f87beeSJohannes Weiner 	RES_FAILCNT,
453cdc2fcfeSMina Almasry 	RES_RSVD_FAILCNT,
45471f87beeSJohannes Weiner };
45571f87beeSJohannes Weiner 
hugetlb_cgroup_read_numa_stat(struct seq_file * seq,void * dummy)456f4776199SMina Almasry static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
457f4776199SMina Almasry {
458f4776199SMina Almasry 	int nid;
459f4776199SMina Almasry 	struct cftype *cft = seq_cft(seq);
460f4776199SMina Almasry 	int idx = MEMFILE_IDX(cft->private);
461520de595SXiu Jianfeng 	bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys);
462f4776199SMina Almasry 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
463f4776199SMina Almasry 	struct cgroup_subsys_state *css;
464f4776199SMina Almasry 	unsigned long usage;
465f4776199SMina Almasry 
466f4776199SMina Almasry 	if (legacy) {
467f4776199SMina Almasry 		/* Add up usage across all nodes for the non-hierarchical total. */
468f4776199SMina Almasry 		usage = 0;
469f4776199SMina Almasry 		for_each_node_state(nid, N_MEMORY)
470f4776199SMina Almasry 			usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
471f4776199SMina Almasry 		seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
472f4776199SMina Almasry 
473f4776199SMina Almasry 		/* Simply print the per-node usage for the non-hierarchical total. */
474f4776199SMina Almasry 		for_each_node_state(nid, N_MEMORY)
475f4776199SMina Almasry 			seq_printf(seq, " N%d=%lu", nid,
476f4776199SMina Almasry 				   READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
477f4776199SMina Almasry 					   PAGE_SIZE);
478f4776199SMina Almasry 		seq_putc(seq, '\n');
479f4776199SMina Almasry 	}
480f4776199SMina Almasry 
481f4776199SMina Almasry 	/*
482f4776199SMina Almasry 	 * The hierarchical total is pretty much the value recorded by the
483f4776199SMina Almasry 	 * counter, so use that.
484f4776199SMina Almasry 	 */
485f4776199SMina Almasry 	seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
486f4776199SMina Almasry 		   page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
487f4776199SMina Almasry 
488f4776199SMina Almasry 	/*
489f4776199SMina Almasry 	 * For each node, transverse the css tree to obtain the hierarchical
490f4776199SMina Almasry 	 * node usage.
491f4776199SMina Almasry 	 */
492f4776199SMina Almasry 	for_each_node_state(nid, N_MEMORY) {
493f4776199SMina Almasry 		usage = 0;
494f4776199SMina Almasry 		rcu_read_lock();
495f4776199SMina Almasry 		css_for_each_descendant_pre(css, &h_cg->css) {
496f4776199SMina Almasry 			usage += READ_ONCE(hugetlb_cgroup_from_css(css)
497f4776199SMina Almasry 						   ->nodeinfo[nid]
498f4776199SMina Almasry 						   ->usage[idx]);
499f4776199SMina Almasry 		}
500f4776199SMina Almasry 		rcu_read_unlock();
501f4776199SMina Almasry 		seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
502f4776199SMina Almasry 	}
503f4776199SMina Almasry 
504f4776199SMina Almasry 	seq_putc(seq, '\n');
505f4776199SMina Almasry 
506f4776199SMina Almasry 	return 0;
507f4776199SMina Almasry }
508f4776199SMina Almasry 
hugetlb_cgroup_read_u64(struct cgroup_subsys_state * css,struct cftype * cft)509716f479dSTejun Heo static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
510716f479dSTejun Heo 				   struct cftype *cft)
511abb8206cSAneesh Kumar K.V {
51271f87beeSJohannes Weiner 	struct page_counter *counter;
513cdc2fcfeSMina Almasry 	struct page_counter *rsvd_counter;
514182446d0STejun Heo 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
515abb8206cSAneesh Kumar K.V 
51671f87beeSJohannes Weiner 	counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
517cdc2fcfeSMina Almasry 	rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
518abb8206cSAneesh Kumar K.V 
51971f87beeSJohannes Weiner 	switch (MEMFILE_ATTR(cft->private)) {
52071f87beeSJohannes Weiner 	case RES_USAGE:
52171f87beeSJohannes Weiner 		return (u64)page_counter_read(counter) * PAGE_SIZE;
522cdc2fcfeSMina Almasry 	case RES_RSVD_USAGE:
523cdc2fcfeSMina Almasry 		return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
52471f87beeSJohannes Weiner 	case RES_LIMIT:
525bbec2e15SRoman Gushchin 		return (u64)counter->max * PAGE_SIZE;
526cdc2fcfeSMina Almasry 	case RES_RSVD_LIMIT:
527cdc2fcfeSMina Almasry 		return (u64)rsvd_counter->max * PAGE_SIZE;
52871f87beeSJohannes Weiner 	case RES_MAX_USAGE:
52971f87beeSJohannes Weiner 		return (u64)counter->watermark * PAGE_SIZE;
530cdc2fcfeSMina Almasry 	case RES_RSVD_MAX_USAGE:
531cdc2fcfeSMina Almasry 		return (u64)rsvd_counter->watermark * PAGE_SIZE;
53271f87beeSJohannes Weiner 	case RES_FAILCNT:
53371f87beeSJohannes Weiner 		return counter->failcnt;
534cdc2fcfeSMina Almasry 	case RES_RSVD_FAILCNT:
535cdc2fcfeSMina Almasry 		return rsvd_counter->failcnt;
53671f87beeSJohannes Weiner 	default:
53771f87beeSJohannes Weiner 		BUG();
538abb8206cSAneesh Kumar K.V 	}
53971f87beeSJohannes Weiner }
54071f87beeSJohannes Weiner 
hugetlb_cgroup_read_u64_max(struct seq_file * seq,void * v)541faced7e0SGiuseppe Scrivano static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
542faced7e0SGiuseppe Scrivano {
543faced7e0SGiuseppe Scrivano 	int idx;
544faced7e0SGiuseppe Scrivano 	u64 val;
545faced7e0SGiuseppe Scrivano 	struct cftype *cft = seq_cft(seq);
546faced7e0SGiuseppe Scrivano 	unsigned long limit;
547faced7e0SGiuseppe Scrivano 	struct page_counter *counter;
548faced7e0SGiuseppe Scrivano 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
549faced7e0SGiuseppe Scrivano 
550faced7e0SGiuseppe Scrivano 	idx = MEMFILE_IDX(cft->private);
551faced7e0SGiuseppe Scrivano 	counter = &h_cg->hugepage[idx];
552faced7e0SGiuseppe Scrivano 
553faced7e0SGiuseppe Scrivano 	limit = round_down(PAGE_COUNTER_MAX,
5548938494cSMiaohe Lin 			   pages_per_huge_page(&hstates[idx]));
555faced7e0SGiuseppe Scrivano 
556faced7e0SGiuseppe Scrivano 	switch (MEMFILE_ATTR(cft->private)) {
557cdc2fcfeSMina Almasry 	case RES_RSVD_USAGE:
558cdc2fcfeSMina Almasry 		counter = &h_cg->rsvd_hugepage[idx];
559e4a9bc58SJoe Perches 		fallthrough;
560faced7e0SGiuseppe Scrivano 	case RES_USAGE:
561faced7e0SGiuseppe Scrivano 		val = (u64)page_counter_read(counter);
562faced7e0SGiuseppe Scrivano 		seq_printf(seq, "%llu\n", val * PAGE_SIZE);
563faced7e0SGiuseppe Scrivano 		break;
564cdc2fcfeSMina Almasry 	case RES_RSVD_LIMIT:
565cdc2fcfeSMina Almasry 		counter = &h_cg->rsvd_hugepage[idx];
566e4a9bc58SJoe Perches 		fallthrough;
567faced7e0SGiuseppe Scrivano 	case RES_LIMIT:
568faced7e0SGiuseppe Scrivano 		val = (u64)counter->max;
569faced7e0SGiuseppe Scrivano 		if (val == limit)
570faced7e0SGiuseppe Scrivano 			seq_puts(seq, "max\n");
571faced7e0SGiuseppe Scrivano 		else
572faced7e0SGiuseppe Scrivano 			seq_printf(seq, "%llu\n", val * PAGE_SIZE);
573faced7e0SGiuseppe Scrivano 		break;
574faced7e0SGiuseppe Scrivano 	default:
575faced7e0SGiuseppe Scrivano 		BUG();
576faced7e0SGiuseppe Scrivano 	}
577faced7e0SGiuseppe Scrivano 
578faced7e0SGiuseppe Scrivano 	return 0;
579faced7e0SGiuseppe Scrivano }
580faced7e0SGiuseppe Scrivano 
58171f87beeSJohannes Weiner static DEFINE_MUTEX(hugetlb_limit_mutex);
582abb8206cSAneesh Kumar K.V 
hugetlb_cgroup_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off,const char * max)583451af504STejun Heo static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
584faced7e0SGiuseppe Scrivano 				    char *buf, size_t nbytes, loff_t off,
585faced7e0SGiuseppe Scrivano 				    const char *max)
586abb8206cSAneesh Kumar K.V {
58771f87beeSJohannes Weiner 	int ret, idx;
58871f87beeSJohannes Weiner 	unsigned long nr_pages;
589451af504STejun Heo 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
590cdc2fcfeSMina Almasry 	bool rsvd = false;
591abb8206cSAneesh Kumar K.V 
59271f87beeSJohannes Weiner 	if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
59371f87beeSJohannes Weiner 		return -EINVAL;
594abb8206cSAneesh Kumar K.V 
59571f87beeSJohannes Weiner 	buf = strstrip(buf);
596faced7e0SGiuseppe Scrivano 	ret = page_counter_memparse(buf, max, &nr_pages);
597abb8206cSAneesh Kumar K.V 	if (ret)
59871f87beeSJohannes Weiner 		return ret;
59971f87beeSJohannes Weiner 
60071f87beeSJohannes Weiner 	idx = MEMFILE_IDX(of_cft(of)->private);
6018938494cSMiaohe Lin 	nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
60271f87beeSJohannes Weiner 
60371f87beeSJohannes Weiner 	switch (MEMFILE_ATTR(of_cft(of)->private)) {
604cdc2fcfeSMina Almasry 	case RES_RSVD_LIMIT:
605cdc2fcfeSMina Almasry 		rsvd = true;
606e4a9bc58SJoe Perches 		fallthrough;
60771f87beeSJohannes Weiner 	case RES_LIMIT:
60871f87beeSJohannes Weiner 		mutex_lock(&hugetlb_limit_mutex);
609cdc2fcfeSMina Almasry 		ret = page_counter_set_max(
6101adc4d41SMina Almasry 			__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
611cdc2fcfeSMina Almasry 			nr_pages);
61271f87beeSJohannes Weiner 		mutex_unlock(&hugetlb_limit_mutex);
613abb8206cSAneesh Kumar K.V 		break;
614abb8206cSAneesh Kumar K.V 	default:
615abb8206cSAneesh Kumar K.V 		ret = -EINVAL;
616abb8206cSAneesh Kumar K.V 		break;
617abb8206cSAneesh Kumar K.V 	}
618451af504STejun Heo 	return ret ?: nbytes;
619abb8206cSAneesh Kumar K.V }
620abb8206cSAneesh Kumar K.V 
hugetlb_cgroup_write_legacy(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)621faced7e0SGiuseppe Scrivano static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
622faced7e0SGiuseppe Scrivano 					   char *buf, size_t nbytes, loff_t off)
623faced7e0SGiuseppe Scrivano {
624faced7e0SGiuseppe Scrivano 	return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
625faced7e0SGiuseppe Scrivano }
626faced7e0SGiuseppe Scrivano 
hugetlb_cgroup_write_dfl(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)627faced7e0SGiuseppe Scrivano static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
628faced7e0SGiuseppe Scrivano 					char *buf, size_t nbytes, loff_t off)
629faced7e0SGiuseppe Scrivano {
630faced7e0SGiuseppe Scrivano 	return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
631faced7e0SGiuseppe Scrivano }
632faced7e0SGiuseppe Scrivano 
hugetlb_cgroup_reset(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)6336770c64eSTejun Heo static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
6346770c64eSTejun Heo 				    char *buf, size_t nbytes, loff_t off)
635abb8206cSAneesh Kumar K.V {
63671f87beeSJohannes Weiner 	int ret = 0;
637cdc2fcfeSMina Almasry 	struct page_counter *counter, *rsvd_counter;
6386770c64eSTejun Heo 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
639abb8206cSAneesh Kumar K.V 
64071f87beeSJohannes Weiner 	counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
641cdc2fcfeSMina Almasry 	rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
642abb8206cSAneesh Kumar K.V 
64371f87beeSJohannes Weiner 	switch (MEMFILE_ATTR(of_cft(of)->private)) {
644abb8206cSAneesh Kumar K.V 	case RES_MAX_USAGE:
64571f87beeSJohannes Weiner 		page_counter_reset_watermark(counter);
646abb8206cSAneesh Kumar K.V 		break;
647cdc2fcfeSMina Almasry 	case RES_RSVD_MAX_USAGE:
648cdc2fcfeSMina Almasry 		page_counter_reset_watermark(rsvd_counter);
649cdc2fcfeSMina Almasry 		break;
650abb8206cSAneesh Kumar K.V 	case RES_FAILCNT:
65171f87beeSJohannes Weiner 		counter->failcnt = 0;
652abb8206cSAneesh Kumar K.V 		break;
653cdc2fcfeSMina Almasry 	case RES_RSVD_FAILCNT:
654cdc2fcfeSMina Almasry 		rsvd_counter->failcnt = 0;
655cdc2fcfeSMina Almasry 		break;
656abb8206cSAneesh Kumar K.V 	default:
657abb8206cSAneesh Kumar K.V 		ret = -EINVAL;
658abb8206cSAneesh Kumar K.V 		break;
659abb8206cSAneesh Kumar K.V 	}
6606770c64eSTejun Heo 	return ret ?: nbytes;
661abb8206cSAneesh Kumar K.V }
662abb8206cSAneesh Kumar K.V 
mem_fmt(char * buf,int size,unsigned long hsize)663abb8206cSAneesh Kumar K.V static char *mem_fmt(char *buf, int size, unsigned long hsize)
664abb8206cSAneesh Kumar K.V {
665abfb09e2SMiaohe Lin 	if (hsize >= SZ_1G)
666abfb09e2SMiaohe Lin 		snprintf(buf, size, "%luGB", hsize / SZ_1G);
667abfb09e2SMiaohe Lin 	else if (hsize >= SZ_1M)
668abfb09e2SMiaohe Lin 		snprintf(buf, size, "%luMB", hsize / SZ_1M);
669abb8206cSAneesh Kumar K.V 	else
670abfb09e2SMiaohe Lin 		snprintf(buf, size, "%luKB", hsize / SZ_1K);
671abb8206cSAneesh Kumar K.V 	return buf;
672abb8206cSAneesh Kumar K.V }
673abb8206cSAneesh Kumar K.V 
__hugetlb_events_show(struct seq_file * seq,bool local)674faced7e0SGiuseppe Scrivano static int __hugetlb_events_show(struct seq_file *seq, bool local)
675faced7e0SGiuseppe Scrivano {
676faced7e0SGiuseppe Scrivano 	int idx;
677faced7e0SGiuseppe Scrivano 	long max;
678faced7e0SGiuseppe Scrivano 	struct cftype *cft = seq_cft(seq);
679faced7e0SGiuseppe Scrivano 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
680faced7e0SGiuseppe Scrivano 
681faced7e0SGiuseppe Scrivano 	idx = MEMFILE_IDX(cft->private);
682faced7e0SGiuseppe Scrivano 
683faced7e0SGiuseppe Scrivano 	if (local)
684faced7e0SGiuseppe Scrivano 		max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
685faced7e0SGiuseppe Scrivano 	else
686faced7e0SGiuseppe Scrivano 		max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
687faced7e0SGiuseppe Scrivano 
688faced7e0SGiuseppe Scrivano 	seq_printf(seq, "max %lu\n", max);
689faced7e0SGiuseppe Scrivano 
690faced7e0SGiuseppe Scrivano 	return 0;
691faced7e0SGiuseppe Scrivano }
692faced7e0SGiuseppe Scrivano 
hugetlb_events_show(struct seq_file * seq,void * v)693faced7e0SGiuseppe Scrivano static int hugetlb_events_show(struct seq_file *seq, void *v)
694faced7e0SGiuseppe Scrivano {
695faced7e0SGiuseppe Scrivano 	return __hugetlb_events_show(seq, false);
696faced7e0SGiuseppe Scrivano }
697faced7e0SGiuseppe Scrivano 
hugetlb_events_local_show(struct seq_file * seq,void * v)698faced7e0SGiuseppe Scrivano static int hugetlb_events_local_show(struct seq_file *seq, void *v)
699faced7e0SGiuseppe Scrivano {
700faced7e0SGiuseppe Scrivano 	return __hugetlb_events_show(seq, true);
701faced7e0SGiuseppe Scrivano }
702faced7e0SGiuseppe Scrivano 
70347179fe0SXiu Jianfeng static struct cftype hugetlb_dfl_tmpl[] = {
70447179fe0SXiu Jianfeng 	{
70547179fe0SXiu Jianfeng 		.name = "max",
70647179fe0SXiu Jianfeng 		.private = RES_LIMIT,
70747179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_u64_max,
70847179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_write_dfl,
70947179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
71047179fe0SXiu Jianfeng 	},
71147179fe0SXiu Jianfeng 	{
71247179fe0SXiu Jianfeng 		.name = "rsvd.max",
71347179fe0SXiu Jianfeng 		.private = RES_RSVD_LIMIT,
71447179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_u64_max,
71547179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_write_dfl,
71647179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
71747179fe0SXiu Jianfeng 	},
71847179fe0SXiu Jianfeng 	{
71947179fe0SXiu Jianfeng 		.name = "current",
72047179fe0SXiu Jianfeng 		.private = RES_USAGE,
72147179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_u64_max,
72247179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
72347179fe0SXiu Jianfeng 	},
72447179fe0SXiu Jianfeng 	{
72547179fe0SXiu Jianfeng 		.name = "rsvd.current",
72647179fe0SXiu Jianfeng 		.private = RES_RSVD_USAGE,
72747179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_u64_max,
72847179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
72947179fe0SXiu Jianfeng 	},
73047179fe0SXiu Jianfeng 	{
73147179fe0SXiu Jianfeng 		.name = "events",
73247179fe0SXiu Jianfeng 		.seq_show = hugetlb_events_show,
73347179fe0SXiu Jianfeng 		.file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]),
73447179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
73547179fe0SXiu Jianfeng 	},
73647179fe0SXiu Jianfeng 	{
73747179fe0SXiu Jianfeng 		.name = "events.local",
73847179fe0SXiu Jianfeng 		.seq_show = hugetlb_events_local_show,
73947179fe0SXiu Jianfeng 		.file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]),
74047179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
74147179fe0SXiu Jianfeng 	},
74247179fe0SXiu Jianfeng 	{
74347179fe0SXiu Jianfeng 		.name = "numa_stat",
74447179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_numa_stat,
74547179fe0SXiu Jianfeng 		.flags = CFTYPE_NOT_ON_ROOT,
74647179fe0SXiu Jianfeng 	},
74747179fe0SXiu Jianfeng 	/* don't need terminator here */
74847179fe0SXiu Jianfeng };
74947179fe0SXiu Jianfeng 
75047179fe0SXiu Jianfeng static struct cftype hugetlb_legacy_tmpl[] = {
75147179fe0SXiu Jianfeng 	{
75247179fe0SXiu Jianfeng 		.name = "limit_in_bytes",
75347179fe0SXiu Jianfeng 		.private = RES_LIMIT,
75447179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
75547179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_write_legacy,
75647179fe0SXiu Jianfeng 	},
75747179fe0SXiu Jianfeng 	{
75847179fe0SXiu Jianfeng 		.name = "rsvd.limit_in_bytes",
75947179fe0SXiu Jianfeng 		.private = RES_RSVD_LIMIT,
76047179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
76147179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_write_legacy,
76247179fe0SXiu Jianfeng 	},
76347179fe0SXiu Jianfeng 	{
76447179fe0SXiu Jianfeng 		.name = "usage_in_bytes",
76547179fe0SXiu Jianfeng 		.private = RES_USAGE,
76647179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
76747179fe0SXiu Jianfeng 	},
76847179fe0SXiu Jianfeng 	{
76947179fe0SXiu Jianfeng 		.name = "rsvd.usage_in_bytes",
77047179fe0SXiu Jianfeng 		.private = RES_RSVD_USAGE,
77147179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
77247179fe0SXiu Jianfeng 	},
77347179fe0SXiu Jianfeng 	{
77447179fe0SXiu Jianfeng 		.name = "max_usage_in_bytes",
77547179fe0SXiu Jianfeng 		.private = RES_MAX_USAGE,
77647179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_reset,
77747179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
77847179fe0SXiu Jianfeng 	},
77947179fe0SXiu Jianfeng 	{
78047179fe0SXiu Jianfeng 		.name = "rsvd.max_usage_in_bytes",
78147179fe0SXiu Jianfeng 		.private = RES_RSVD_MAX_USAGE,
78247179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_reset,
78347179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
78447179fe0SXiu Jianfeng 	},
78547179fe0SXiu Jianfeng 	{
78647179fe0SXiu Jianfeng 		.name = "failcnt",
78747179fe0SXiu Jianfeng 		.private = RES_FAILCNT,
78847179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_reset,
78947179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
79047179fe0SXiu Jianfeng 	},
79147179fe0SXiu Jianfeng 	{
79247179fe0SXiu Jianfeng 		.name = "rsvd.failcnt",
79347179fe0SXiu Jianfeng 		.private = RES_RSVD_FAILCNT,
79447179fe0SXiu Jianfeng 		.write = hugetlb_cgroup_reset,
79547179fe0SXiu Jianfeng 		.read_u64 = hugetlb_cgroup_read_u64,
79647179fe0SXiu Jianfeng 	},
79747179fe0SXiu Jianfeng 	{
79847179fe0SXiu Jianfeng 		.name = "numa_stat",
79947179fe0SXiu Jianfeng 		.seq_show = hugetlb_cgroup_read_numa_stat,
80047179fe0SXiu Jianfeng 	},
80147179fe0SXiu Jianfeng 	/* don't need terminator here */
80247179fe0SXiu Jianfeng };
80347179fe0SXiu Jianfeng 
80447179fe0SXiu Jianfeng static void __init
hugetlb_cgroup_cfttypes_init(struct hstate * h,struct cftype * cft,struct cftype * tmpl,int tmpl_size)80547179fe0SXiu Jianfeng hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft,
80647179fe0SXiu Jianfeng 			     struct cftype *tmpl, int tmpl_size)
80747179fe0SXiu Jianfeng {
80847179fe0SXiu Jianfeng 	char buf[32];
80947179fe0SXiu Jianfeng 	int i, idx = hstate_index(h);
81047179fe0SXiu Jianfeng 
81147179fe0SXiu Jianfeng 	/* format the size */
81247179fe0SXiu Jianfeng 	mem_fmt(buf, sizeof(buf), huge_page_size(h));
81347179fe0SXiu Jianfeng 
81447179fe0SXiu Jianfeng 	for (i = 0; i < tmpl_size; cft++, tmpl++, i++) {
81547179fe0SXiu Jianfeng 		*cft = *tmpl;
81647179fe0SXiu Jianfeng 		/* rebuild the name */
817ed60c8e2SXiu Jianfeng 		scnprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name);
81847179fe0SXiu Jianfeng 		/* rebuild the private */
81947179fe0SXiu Jianfeng 		cft->private = MEMFILE_PRIVATE(idx, tmpl->private);
82047179fe0SXiu Jianfeng 		/* rebuild the file_offset */
82147179fe0SXiu Jianfeng 		if (tmpl->file_offset) {
82247179fe0SXiu Jianfeng 			unsigned int offset = tmpl->file_offset;
82347179fe0SXiu Jianfeng 
82447179fe0SXiu Jianfeng 			cft->file_offset = MEMFILE_OFFSET0(offset) +
82547179fe0SXiu Jianfeng 					   MEMFILE_FIELD_SIZE(offset) * idx;
82647179fe0SXiu Jianfeng 		}
82747179fe0SXiu Jianfeng 
82847179fe0SXiu Jianfeng 		lockdep_register_key(&cft->lockdep_key);
82947179fe0SXiu Jianfeng 	}
83047179fe0SXiu Jianfeng }
83147179fe0SXiu Jianfeng 
__hugetlb_cgroup_file_dfl_init(struct hstate * h)832b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h)
833abb8206cSAneesh Kumar K.V {
834b79d715cSXiu Jianfeng 	int idx = hstate_index(h);
835abb8206cSAneesh Kumar K.V 
83647179fe0SXiu Jianfeng 	hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE,
83747179fe0SXiu Jianfeng 				     hugetlb_dfl_tmpl, DFL_TMPL_SIZE);
838faced7e0SGiuseppe Scrivano }
839faced7e0SGiuseppe Scrivano 
__hugetlb_cgroup_file_legacy_init(struct hstate * h)840b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h)
841faced7e0SGiuseppe Scrivano {
842b79d715cSXiu Jianfeng 	int idx = hstate_index(h);
843faced7e0SGiuseppe Scrivano 
84447179fe0SXiu Jianfeng 	hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE,
84547179fe0SXiu Jianfeng 				     hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE);
846faced7e0SGiuseppe Scrivano }
847faced7e0SGiuseppe Scrivano 
__hugetlb_cgroup_file_init(struct hstate * h)848b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_init(struct hstate *h)
849faced7e0SGiuseppe Scrivano {
850b79d715cSXiu Jianfeng 	__hugetlb_cgroup_file_dfl_init(h);
851b79d715cSXiu Jianfeng 	__hugetlb_cgroup_file_legacy_init(h);
8527179e7bfSJianguo Wu }
8537179e7bfSJianguo Wu 
__hugetlb_cgroup_file_pre_init(void)85447179fe0SXiu Jianfeng static void __init __hugetlb_cgroup_file_pre_init(void)
85547179fe0SXiu Jianfeng {
85647179fe0SXiu Jianfeng 	int cft_count;
85747179fe0SXiu Jianfeng 
85847179fe0SXiu Jianfeng 	cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */
859bf4afc53SLinus Torvalds 	dfl_files = kzalloc_objs(struct cftype, cft_count);
86047179fe0SXiu Jianfeng 	BUG_ON(!dfl_files);
86147179fe0SXiu Jianfeng 	cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */
862bf4afc53SLinus Torvalds 	legacy_files = kzalloc_objs(struct cftype, cft_count);
86347179fe0SXiu Jianfeng 	BUG_ON(!legacy_files);
86447179fe0SXiu Jianfeng }
86547179fe0SXiu Jianfeng 
__hugetlb_cgroup_file_post_init(void)866b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_post_init(void)
867b79d715cSXiu Jianfeng {
868b79d715cSXiu Jianfeng 	WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
869b79d715cSXiu Jianfeng 				       dfl_files));
870b79d715cSXiu Jianfeng 	WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
871b79d715cSXiu Jianfeng 					  legacy_files));
872b79d715cSXiu Jianfeng }
873b79d715cSXiu Jianfeng 
hugetlb_cgroup_file_init(void)8747179e7bfSJianguo Wu void __init hugetlb_cgroup_file_init(void)
8757179e7bfSJianguo Wu {
8767179e7bfSJianguo Wu 	struct hstate *h;
8777179e7bfSJianguo Wu 
87847179fe0SXiu Jianfeng 	__hugetlb_cgroup_file_pre_init();
87959838b25SFrank van der Linden 	for_each_hstate(h)
880b79d715cSXiu Jianfeng 		__hugetlb_cgroup_file_init(h);
881b79d715cSXiu Jianfeng 	__hugetlb_cgroup_file_post_init();
8827179e7bfSJianguo Wu }
883abb8206cSAneesh Kumar K.V 
88475754681SAneesh Kumar K.V /*
88575754681SAneesh Kumar K.V  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
88675754681SAneesh Kumar K.V  * when we migrate hugepages
88775754681SAneesh Kumar K.V  */
hugetlb_cgroup_migrate(struct folio * old_folio,struct folio * new_folio)88829f39430SSidhartha Kumar void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio)
8898e6ac7faSAneesh Kumar K.V {
8908e6ac7faSAneesh Kumar K.V 	struct hugetlb_cgroup *h_cg;
8911adc4d41SMina Almasry 	struct hugetlb_cgroup *h_cg_rsvd;
89229f39430SSidhartha Kumar 	struct hstate *h = folio_hstate(old_folio);
8938e6ac7faSAneesh Kumar K.V 
8948e6ac7faSAneesh Kumar K.V 	if (hugetlb_cgroup_disabled())
8958e6ac7faSAneesh Kumar K.V 		return;
8968e6ac7faSAneesh Kumar K.V 
897db71ef79SMike Kravetz 	spin_lock_irq(&hugetlb_lock);
898f074732dSSidhartha Kumar 	h_cg = hugetlb_cgroup_from_folio(old_folio);
899f074732dSSidhartha Kumar 	h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio);
900de656ed3SSidhartha Kumar 	set_hugetlb_cgroup(old_folio, NULL);
901de656ed3SSidhartha Kumar 	set_hugetlb_cgroup_rsvd(old_folio, NULL);
9028e6ac7faSAneesh Kumar K.V 
9038e6ac7faSAneesh Kumar K.V 	/* move the h_cg details to new cgroup */
904de656ed3SSidhartha Kumar 	set_hugetlb_cgroup(new_folio, h_cg);
905de656ed3SSidhartha Kumar 	set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd);
90629f39430SSidhartha Kumar 	list_move(&new_folio->lru, &h->hugepage_activelist);
907db71ef79SMike Kravetz 	spin_unlock_irq(&hugetlb_lock);
9088e6ac7faSAneesh Kumar K.V }
9098e6ac7faSAneesh Kumar K.V 
910faced7e0SGiuseppe Scrivano static struct cftype hugetlb_files[] = {
911faced7e0SGiuseppe Scrivano 	{} /* terminate */
912faced7e0SGiuseppe Scrivano };
913faced7e0SGiuseppe Scrivano 
914073219e9STejun Heo struct cgroup_subsys hugetlb_cgrp_subsys = {
91592fb9748STejun Heo 	.css_alloc	= hugetlb_cgroup_css_alloc,
91692fb9748STejun Heo 	.css_offline	= hugetlb_cgroup_css_offline,
91792fb9748STejun Heo 	.css_free	= hugetlb_cgroup_css_free,
918faced7e0SGiuseppe Scrivano 	.dfl_cftypes	= hugetlb_files,
919faced7e0SGiuseppe Scrivano 	.legacy_cftypes	= hugetlb_files,
9202bc64a20SAneesh Kumar K.V };
921