1ef24e0aaSTim Bird // SPDX-License-Identifier: LGPL-2.1
22bc64a20SAneesh Kumar K.V /*
32bc64a20SAneesh Kumar K.V *
42bc64a20SAneesh Kumar K.V * Copyright IBM Corporation, 2012
52bc64a20SAneesh Kumar K.V * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
62bc64a20SAneesh Kumar K.V *
7faced7e0SGiuseppe Scrivano * Cgroup v2
8faced7e0SGiuseppe Scrivano * Copyright (C) 2019 Red Hat, Inc.
9faced7e0SGiuseppe Scrivano * Author: Giuseppe Scrivano <gscrivan@redhat.com>
10faced7e0SGiuseppe Scrivano *
112bc64a20SAneesh Kumar K.V */
122bc64a20SAneesh Kumar K.V
132bc64a20SAneesh Kumar K.V #include <linux/cgroup.h>
1471f87beeSJohannes Weiner #include <linux/page_counter.h>
152bc64a20SAneesh Kumar K.V #include <linux/slab.h>
162bc64a20SAneesh Kumar K.V #include <linux/hugetlb.h>
172bc64a20SAneesh Kumar K.V #include <linux/hugetlb_cgroup.h>
182bc64a20SAneesh Kumar K.V
19abb8206cSAneesh Kumar K.V #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
20abb8206cSAneesh Kumar K.V #define MEMFILE_IDX(val) (((val) >> 16) & 0xffff)
21abb8206cSAneesh Kumar K.V #define MEMFILE_ATTR(val) ((val) & 0xffff)
22abb8206cSAneesh Kumar K.V
2347179fe0SXiu Jianfeng /* Use t->m[0] to encode the offset */
2447179fe0SXiu Jianfeng #define MEMFILE_OFFSET(t, m0) (((offsetof(t, m0) << 16) | sizeof_field(t, m0)))
2547179fe0SXiu Jianfeng #define MEMFILE_OFFSET0(val) (((val) >> 16) & 0xffff)
2647179fe0SXiu Jianfeng #define MEMFILE_FIELD_SIZE(val) ((val) & 0xffff)
2747179fe0SXiu Jianfeng
2847179fe0SXiu Jianfeng #define DFL_TMPL_SIZE ARRAY_SIZE(hugetlb_dfl_tmpl)
2947179fe0SXiu Jianfeng #define LEGACY_TMPL_SIZE ARRAY_SIZE(hugetlb_legacy_tmpl)
3047179fe0SXiu Jianfeng
312bc64a20SAneesh Kumar K.V static struct hugetlb_cgroup *root_h_cgroup __read_mostly;
3247179fe0SXiu Jianfeng static struct cftype *dfl_files;
3347179fe0SXiu Jianfeng static struct cftype *legacy_files;
342bc64a20SAneesh Kumar K.V
35cdc2fcfeSMina Almasry static inline struct page_counter *
__hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup * h_cg,int idx,bool rsvd)361adc4d41SMina Almasry __hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx,
37cdc2fcfeSMina Almasry bool rsvd)
38cdc2fcfeSMina Almasry {
39cdc2fcfeSMina Almasry if (rsvd)
40cdc2fcfeSMina Almasry return &h_cg->rsvd_hugepage[idx];
41cdc2fcfeSMina Almasry return &h_cg->hugepage[idx];
42cdc2fcfeSMina Almasry }
43cdc2fcfeSMina Almasry
441adc4d41SMina Almasry static inline struct page_counter *
hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup * h_cg,int idx)451adc4d41SMina Almasry hugetlb_cgroup_counter_from_cgroup(struct hugetlb_cgroup *h_cg, int idx)
461adc4d41SMina Almasry {
471adc4d41SMina Almasry return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, false);
481adc4d41SMina Almasry }
491adc4d41SMina Almasry
501adc4d41SMina Almasry static inline struct page_counter *
hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup * h_cg,int idx)511adc4d41SMina Almasry hugetlb_cgroup_counter_from_cgroup_rsvd(struct hugetlb_cgroup *h_cg, int idx)
521adc4d41SMina Almasry {
531adc4d41SMina Almasry return __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, true);
541adc4d41SMina Almasry }
551adc4d41SMina Almasry
562bc64a20SAneesh Kumar K.V static inline
hugetlb_cgroup_from_css(struct cgroup_subsys_state * s)572bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_css(struct cgroup_subsys_state *s)
582bc64a20SAneesh Kumar K.V {
59a7c6d554STejun Heo return s ? container_of(s, struct hugetlb_cgroup, css) : NULL;
602bc64a20SAneesh Kumar K.V }
612bc64a20SAneesh Kumar K.V
622bc64a20SAneesh Kumar K.V static inline
hugetlb_cgroup_from_task(struct task_struct * task)632bc64a20SAneesh Kumar K.V struct hugetlb_cgroup *hugetlb_cgroup_from_task(struct task_struct *task)
642bc64a20SAneesh Kumar K.V {
65073219e9STejun Heo return hugetlb_cgroup_from_css(task_css(task, hugetlb_cgrp_id));
662bc64a20SAneesh Kumar K.V }
672bc64a20SAneesh Kumar K.V
hugetlb_cgroup_is_root(struct hugetlb_cgroup * h_cg)682bc64a20SAneesh Kumar K.V static inline bool hugetlb_cgroup_is_root(struct hugetlb_cgroup *h_cg)
692bc64a20SAneesh Kumar K.V {
702bc64a20SAneesh Kumar K.V return (h_cg == root_h_cgroup);
712bc64a20SAneesh Kumar K.V }
722bc64a20SAneesh Kumar K.V
733f798518STejun Heo static inline struct hugetlb_cgroup *
parent_hugetlb_cgroup(struct hugetlb_cgroup * h_cg)743f798518STejun Heo parent_hugetlb_cgroup(struct hugetlb_cgroup *h_cg)
752bc64a20SAneesh Kumar K.V {
765c9d535bSTejun Heo return hugetlb_cgroup_from_css(h_cg->css.parent);
772bc64a20SAneesh Kumar K.V }
782bc64a20SAneesh Kumar K.V
hugetlb_cgroup_have_usage(struct hugetlb_cgroup * h_cg)793f798518STejun Heo static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
802bc64a20SAneesh Kumar K.V {
81c37213c5SMiaohe Lin struct hstate *h;
822bc64a20SAneesh Kumar K.V
83c37213c5SMiaohe Lin for_each_hstate(h) {
841adc4d41SMina Almasry if (page_counter_read(
85c37213c5SMiaohe Lin hugetlb_cgroup_counter_from_cgroup(h_cg, hstate_index(h))))
862bc64a20SAneesh Kumar K.V return true;
872bc64a20SAneesh Kumar K.V }
882bc64a20SAneesh Kumar K.V return false;
892bc64a20SAneesh Kumar K.V }
902bc64a20SAneesh Kumar K.V
hugetlb_cgroup_init(struct hugetlb_cgroup * h_cgroup,struct hugetlb_cgroup * parent_h_cgroup)91297880f4SDavid Rientjes static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
92297880f4SDavid Rientjes struct hugetlb_cgroup *parent_h_cgroup)
93297880f4SDavid Rientjes {
94297880f4SDavid Rientjes int idx;
95297880f4SDavid Rientjes
96297880f4SDavid Rientjes for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) {
970e2759afSShakeel Butt struct page_counter *fault, *fault_parent = NULL;
980e2759afSShakeel Butt struct page_counter *rsvd, *rsvd_parent = NULL;
99297880f4SDavid Rientjes unsigned long limit;
100297880f4SDavid Rientjes
1011adc4d41SMina Almasry if (parent_h_cgroup) {
1021adc4d41SMina Almasry fault_parent = hugetlb_cgroup_counter_from_cgroup(
1031adc4d41SMina Almasry parent_h_cgroup, idx);
1041adc4d41SMina Almasry rsvd_parent = hugetlb_cgroup_counter_from_cgroup_rsvd(
1051adc4d41SMina Almasry parent_h_cgroup, idx);
1061adc4d41SMina Almasry }
1070e2759afSShakeel Butt fault = hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx);
1080e2759afSShakeel Butt rsvd = hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx);
1090e2759afSShakeel Butt
1100e2759afSShakeel Butt page_counter_init(fault, fault_parent, false);
1110e2759afSShakeel Butt page_counter_init(rsvd, rsvd_parent, false);
1120e2759afSShakeel Butt
1130e2759afSShakeel Butt if (!cgroup_subsys_on_dfl(hugetlb_cgrp_subsys)) {
1140e2759afSShakeel Butt fault->track_failcnt = true;
1150e2759afSShakeel Butt rsvd->track_failcnt = true;
1160e2759afSShakeel Butt }
117297880f4SDavid Rientjes
118297880f4SDavid Rientjes limit = round_down(PAGE_COUNTER_MAX,
1198938494cSMiaohe Lin pages_per_huge_page(&hstates[idx]));
1201adc4d41SMina Almasry
1210e2759afSShakeel Butt VM_BUG_ON(page_counter_set_max(fault, limit));
1220e2759afSShakeel Butt VM_BUG_ON(page_counter_set_max(rsvd, limit));
123297880f4SDavid Rientjes }
124297880f4SDavid Rientjes }
125297880f4SDavid Rientjes
hugetlb_cgroup_free(struct hugetlb_cgroup * h_cgroup)126f4776199SMina Almasry static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
127f4776199SMina Almasry {
128f4776199SMina Almasry int node;
129f4776199SMina Almasry
130f4776199SMina Almasry for_each_node(node)
131f4776199SMina Almasry kfree(h_cgroup->nodeinfo[node]);
132f4776199SMina Almasry kfree(h_cgroup);
133f4776199SMina Almasry }
134f4776199SMina Almasry
135eb95419bSTejun Heo static struct cgroup_subsys_state *
hugetlb_cgroup_css_alloc(struct cgroup_subsys_state * parent_css)136eb95419bSTejun Heo hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
1372bc64a20SAneesh Kumar K.V {
138eb95419bSTejun Heo struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
139eb95419bSTejun Heo struct hugetlb_cgroup *h_cgroup;
140f4776199SMina Almasry int node;
1412bc64a20SAneesh Kumar K.V
142*323bbfcfSLinus Torvalds h_cgroup = kzalloc_flex(*h_cgroup, nodeinfo, nr_node_ids);
143f4776199SMina Almasry
1442bc64a20SAneesh Kumar K.V if (!h_cgroup)
1452bc64a20SAneesh Kumar K.V return ERR_PTR(-ENOMEM);
1462bc64a20SAneesh Kumar K.V
147297880f4SDavid Rientjes if (!parent_h_cgroup)
1482bc64a20SAneesh Kumar K.V root_h_cgroup = h_cgroup;
149297880f4SDavid Rientjes
150f4776199SMina Almasry /*
151f4776199SMina Almasry * TODO: this routine can waste much memory for nodes which will
152f4776199SMina Almasry * never be onlined. It's better to use memory hotplug callback
153f4776199SMina Almasry * function.
154f4776199SMina Almasry */
155f4776199SMina Almasry for_each_node(node) {
15699249387SMiaohe Lin /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */
157f4776199SMina Almasry int node_to_alloc =
15899249387SMiaohe Lin node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE;
159f4776199SMina Almasry h_cgroup->nodeinfo[node] =
160f4776199SMina Almasry kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
161f4776199SMina Almasry GFP_KERNEL, node_to_alloc);
162f4776199SMina Almasry if (!h_cgroup->nodeinfo[node])
163f4776199SMina Almasry goto fail_alloc_nodeinfo;
164f4776199SMina Almasry }
165f4776199SMina Almasry
166297880f4SDavid Rientjes hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
1672bc64a20SAneesh Kumar K.V return &h_cgroup->css;
168f4776199SMina Almasry
169f4776199SMina Almasry fail_alloc_nodeinfo:
170f4776199SMina Almasry hugetlb_cgroup_free(h_cgroup);
171f4776199SMina Almasry return ERR_PTR(-ENOMEM);
1722bc64a20SAneesh Kumar K.V }
1732bc64a20SAneesh Kumar K.V
hugetlb_cgroup_css_free(struct cgroup_subsys_state * css)174eb95419bSTejun Heo static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
1752bc64a20SAneesh Kumar K.V {
176f4776199SMina Almasry hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
1772bc64a20SAneesh Kumar K.V }
1782bc64a20SAneesh Kumar K.V
179da1def55SAneesh Kumar K.V /*
180da1def55SAneesh Kumar K.V * Should be called with hugetlb_lock held.
181da1def55SAneesh Kumar K.V * Since we are holding hugetlb_lock, pages cannot get moved from
182da1def55SAneesh Kumar K.V * active list or uncharged from the cgroup, So no need to get
183da1def55SAneesh Kumar K.V * page reference and test for page active here. This function
184da1def55SAneesh Kumar K.V * cannot fail.
185da1def55SAneesh Kumar K.V */
hugetlb_cgroup_move_parent(int idx,struct hugetlb_cgroup * h_cg,struct folio * folio)1863f798518STejun Heo static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
1873f982b9bSDavid Hildenbrand struct folio *folio)
188da1def55SAneesh Kumar K.V {
18971f87beeSJohannes Weiner unsigned int nr_pages;
19071f87beeSJohannes Weiner struct page_counter *counter;
1913f982b9bSDavid Hildenbrand struct hugetlb_cgroup *hcg;
1923f798518STejun Heo struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
193da1def55SAneesh Kumar K.V
1943f982b9bSDavid Hildenbrand hcg = hugetlb_cgroup_from_folio(folio);
195da1def55SAneesh Kumar K.V /*
196da1def55SAneesh Kumar K.V * We can have pages in active list without any cgroup
197da1def55SAneesh Kumar K.V * ie, hugepage with less than 3 pages. We can safely
198da1def55SAneesh Kumar K.V * ignore those pages.
199da1def55SAneesh Kumar K.V */
2003f982b9bSDavid Hildenbrand if (!hcg || hcg != h_cg)
201da1def55SAneesh Kumar K.V goto out;
202da1def55SAneesh Kumar K.V
2033f982b9bSDavid Hildenbrand nr_pages = folio_nr_pages(folio);
204da1def55SAneesh Kumar K.V if (!parent) {
205da1def55SAneesh Kumar K.V parent = root_h_cgroup;
206da1def55SAneesh Kumar K.V /* root has no limit */
20771f87beeSJohannes Weiner page_counter_charge(&parent->hugepage[idx], nr_pages);
208da1def55SAneesh Kumar K.V }
209da1def55SAneesh Kumar K.V counter = &h_cg->hugepage[idx];
21071f87beeSJohannes Weiner /* Take the pages off the local counter */
21171f87beeSJohannes Weiner page_counter_cancel(counter, nr_pages);
212da1def55SAneesh Kumar K.V
213de656ed3SSidhartha Kumar set_hugetlb_cgroup(folio, parent);
214da1def55SAneesh Kumar K.V out:
215da1def55SAneesh Kumar K.V return;
216da1def55SAneesh Kumar K.V }
217da1def55SAneesh Kumar K.V
218da1def55SAneesh Kumar K.V /*
219da1def55SAneesh Kumar K.V * Force the hugetlb cgroup to empty the hugetlb resources by moving them to
220da1def55SAneesh Kumar K.V * the parent cgroup.
221da1def55SAneesh Kumar K.V */
hugetlb_cgroup_css_offline(struct cgroup_subsys_state * css)222eb95419bSTejun Heo static void hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
2232bc64a20SAneesh Kumar K.V {
224eb95419bSTejun Heo struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
225da1def55SAneesh Kumar K.V struct hstate *h;
2263f982b9bSDavid Hildenbrand struct folio *folio;
227da1def55SAneesh Kumar K.V
228da1def55SAneesh Kumar K.V do {
229da1def55SAneesh Kumar K.V for_each_hstate(h) {
230db71ef79SMike Kravetz spin_lock_irq(&hugetlb_lock);
2313f982b9bSDavid Hildenbrand list_for_each_entry(folio, &h->hugepage_activelist, lru)
2323f982b9bSDavid Hildenbrand hugetlb_cgroup_move_parent(hstate_index(h), h_cg, folio);
233da1def55SAneesh Kumar K.V
234db71ef79SMike Kravetz spin_unlock_irq(&hugetlb_lock);
235da1def55SAneesh Kumar K.V }
236da1def55SAneesh Kumar K.V cond_resched();
2373f798518STejun Heo } while (hugetlb_cgroup_have_usage(h_cg));
2382bc64a20SAneesh Kumar K.V }
2392bc64a20SAneesh Kumar K.V
hugetlb_event(struct hugetlb_cgroup * hugetlb,int idx,enum hugetlb_memory_event event)240faced7e0SGiuseppe Scrivano static inline void hugetlb_event(struct hugetlb_cgroup *hugetlb, int idx,
241faced7e0SGiuseppe Scrivano enum hugetlb_memory_event event)
242faced7e0SGiuseppe Scrivano {
243faced7e0SGiuseppe Scrivano atomic_long_inc(&hugetlb->events_local[idx][event]);
244faced7e0SGiuseppe Scrivano cgroup_file_notify(&hugetlb->events_local_file[idx]);
245faced7e0SGiuseppe Scrivano
246faced7e0SGiuseppe Scrivano do {
247faced7e0SGiuseppe Scrivano atomic_long_inc(&hugetlb->events[idx][event]);
248faced7e0SGiuseppe Scrivano cgroup_file_notify(&hugetlb->events_file[idx]);
249faced7e0SGiuseppe Scrivano } while ((hugetlb = parent_hugetlb_cgroup(hugetlb)) &&
250faced7e0SGiuseppe Scrivano !hugetlb_cgroup_is_root(hugetlb));
251faced7e0SGiuseppe Scrivano }
252faced7e0SGiuseppe Scrivano
__hugetlb_cgroup_charge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr,bool rsvd)2531adc4d41SMina Almasry static int __hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
2541adc4d41SMina Almasry struct hugetlb_cgroup **ptr,
2551adc4d41SMina Almasry bool rsvd)
2566d76dcf4SAneesh Kumar K.V {
2576d76dcf4SAneesh Kumar K.V int ret = 0;
25871f87beeSJohannes Weiner struct page_counter *counter;
2596d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg = NULL;
2606d76dcf4SAneesh Kumar K.V
2616d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled())
2626d76dcf4SAneesh Kumar K.V goto done;
2636d76dcf4SAneesh Kumar K.V again:
2646d76dcf4SAneesh Kumar K.V rcu_read_lock();
2656d76dcf4SAneesh Kumar K.V h_cg = hugetlb_cgroup_from_task(current);
2660362f326SRoman Gushchin if (!css_tryget(&h_cg->css)) {
2676d76dcf4SAneesh Kumar K.V rcu_read_unlock();
2686d76dcf4SAneesh Kumar K.V goto again;
2696d76dcf4SAneesh Kumar K.V }
2706d76dcf4SAneesh Kumar K.V rcu_read_unlock();
2716d76dcf4SAneesh Kumar K.V
2721adc4d41SMina Almasry if (!page_counter_try_charge(
2731adc4d41SMina Almasry __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
2741adc4d41SMina Almasry nr_pages, &counter)) {
2756071ca52SJohannes Weiner ret = -ENOMEM;
276726b7bbeSMina Almasry hugetlb_event(h_cg, idx, HUGETLB_MAX);
2771adc4d41SMina Almasry css_put(&h_cg->css);
2781adc4d41SMina Almasry goto done;
279faced7e0SGiuseppe Scrivano }
2801adc4d41SMina Almasry /* Reservations take a reference to the css because they do not get
2811adc4d41SMina Almasry * reparented.
2821adc4d41SMina Almasry */
2831adc4d41SMina Almasry if (!rsvd)
2846d76dcf4SAneesh Kumar K.V css_put(&h_cg->css);
2856d76dcf4SAneesh Kumar K.V done:
2866d76dcf4SAneesh Kumar K.V *ptr = h_cg;
2876d76dcf4SAneesh Kumar K.V return ret;
2886d76dcf4SAneesh Kumar K.V }
2896d76dcf4SAneesh Kumar K.V
hugetlb_cgroup_charge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr)2901adc4d41SMina Almasry int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
2911adc4d41SMina Almasry struct hugetlb_cgroup **ptr)
2921adc4d41SMina Almasry {
2931adc4d41SMina Almasry return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, false);
2941adc4d41SMina Almasry }
2951adc4d41SMina Almasry
hugetlb_cgroup_charge_cgroup_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup ** ptr)2961adc4d41SMina Almasry int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
2971adc4d41SMina Almasry struct hugetlb_cgroup **ptr)
2981adc4d41SMina Almasry {
2991adc4d41SMina Almasry return __hugetlb_cgroup_charge_cgroup(idx, nr_pages, ptr, true);
3001adc4d41SMina Almasry }
3011adc4d41SMina Almasry
30294ae8ba7SAneesh Kumar K.V /* Should be called with hugetlb_lock held */
__hugetlb_cgroup_commit_charge(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio,bool rsvd)3031adc4d41SMina Almasry static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
3046d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg,
305541b7c7bSSidhartha Kumar struct folio *folio, bool rsvd)
3066d76dcf4SAneesh Kumar K.V {
3076d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled() || !h_cg)
3086d76dcf4SAneesh Kumar K.V return;
3093ccae1dcSPeter Xu lockdep_assert_held(&hugetlb_lock);
310541b7c7bSSidhartha Kumar __set_hugetlb_cgroup(folio, h_cg, rsvd);
311f4776199SMina Almasry if (!rsvd) {
312f4776199SMina Almasry unsigned long usage =
313541b7c7bSSidhartha Kumar h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
314f4776199SMina Almasry /*
315f4776199SMina Almasry * This write is not atomic due to fetching usage and writing
316f4776199SMina Almasry * to it, but that's fine because we call this with
317f4776199SMina Almasry * hugetlb_lock held anyway.
318f4776199SMina Almasry */
319541b7c7bSSidhartha Kumar WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
320f4776199SMina Almasry usage + nr_pages);
321f4776199SMina Almasry }
3226d76dcf4SAneesh Kumar K.V }
3236d76dcf4SAneesh Kumar K.V
hugetlb_cgroup_commit_charge(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio)3241adc4d41SMina Almasry void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
3251adc4d41SMina Almasry struct hugetlb_cgroup *h_cg,
326ff7d853bSSidhartha Kumar struct folio *folio)
3271adc4d41SMina Almasry {
328541b7c7bSSidhartha Kumar __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
3291adc4d41SMina Almasry }
3301adc4d41SMina Almasry
hugetlb_cgroup_commit_charge_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,struct folio * folio)3311adc4d41SMina Almasry void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
3321adc4d41SMina Almasry struct hugetlb_cgroup *h_cg,
333ff7d853bSSidhartha Kumar struct folio *folio)
3341adc4d41SMina Almasry {
335541b7c7bSSidhartha Kumar __hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
3361adc4d41SMina Almasry }
3371adc4d41SMina Almasry
3386d76dcf4SAneesh Kumar K.V /*
3396d76dcf4SAneesh Kumar K.V * Should be called with hugetlb_lock held
3406d76dcf4SAneesh Kumar K.V */
__hugetlb_cgroup_uncharge_folio(int idx,unsigned long nr_pages,struct folio * folio,bool rsvd)341d4ab0316SSidhartha Kumar static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
342d4ab0316SSidhartha Kumar struct folio *folio, bool rsvd)
3436d76dcf4SAneesh Kumar K.V {
3446d76dcf4SAneesh Kumar K.V struct hugetlb_cgroup *h_cg;
3456d76dcf4SAneesh Kumar K.V
3466d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled())
3476d76dcf4SAneesh Kumar K.V return;
3487ea8574eSMichal Hocko lockdep_assert_held(&hugetlb_lock);
349f074732dSSidhartha Kumar h_cg = __hugetlb_cgroup_from_folio(folio, rsvd);
3506d76dcf4SAneesh Kumar K.V if (unlikely(!h_cg))
3516d76dcf4SAneesh Kumar K.V return;
352f074732dSSidhartha Kumar __set_hugetlb_cgroup(folio, NULL, rsvd);
3531adc4d41SMina Almasry
3541adc4d41SMina Almasry page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
3551adc4d41SMina Almasry rsvd),
3561adc4d41SMina Almasry nr_pages);
3571adc4d41SMina Almasry
3581adc4d41SMina Almasry if (rsvd)
3591adc4d41SMina Almasry css_put(&h_cg->css);
360f4776199SMina Almasry else {
361f4776199SMina Almasry unsigned long usage =
362d4ab0316SSidhartha Kumar h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
363f4776199SMina Almasry /*
364f4776199SMina Almasry * This write is not atomic due to fetching usage and writing
365f4776199SMina Almasry * to it, but that's fine because we call this with
366f4776199SMina Almasry * hugetlb_lock held anyway.
367f4776199SMina Almasry */
368d4ab0316SSidhartha Kumar WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
369f4776199SMina Almasry usage - nr_pages);
370f4776199SMina Almasry }
3716d76dcf4SAneesh Kumar K.V }
3726d76dcf4SAneesh Kumar K.V
hugetlb_cgroup_uncharge_folio(int idx,unsigned long nr_pages,struct folio * folio)373d4ab0316SSidhartha Kumar void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
374d4ab0316SSidhartha Kumar struct folio *folio)
3751adc4d41SMina Almasry {
376d4ab0316SSidhartha Kumar __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false);
3771adc4d41SMina Almasry }
3781adc4d41SMina Almasry
hugetlb_cgroup_uncharge_folio_rsvd(int idx,unsigned long nr_pages,struct folio * folio)379d4ab0316SSidhartha Kumar void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
380d4ab0316SSidhartha Kumar struct folio *folio)
3811adc4d41SMina Almasry {
382d4ab0316SSidhartha Kumar __hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true);
3831adc4d41SMina Almasry }
3841adc4d41SMina Almasry
__hugetlb_cgroup_uncharge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg,bool rsvd)3851adc4d41SMina Almasry static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
3861adc4d41SMina Almasry struct hugetlb_cgroup *h_cg,
3871adc4d41SMina Almasry bool rsvd)
3886d76dcf4SAneesh Kumar K.V {
3896d76dcf4SAneesh Kumar K.V if (hugetlb_cgroup_disabled() || !h_cg)
3906d76dcf4SAneesh Kumar K.V return;
3916d76dcf4SAneesh Kumar K.V
3921adc4d41SMina Almasry page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
3931adc4d41SMina Almasry rsvd),
3941adc4d41SMina Almasry nr_pages);
3951adc4d41SMina Almasry
3961adc4d41SMina Almasry if (rsvd)
3971adc4d41SMina Almasry css_put(&h_cg->css);
3981adc4d41SMina Almasry }
3991adc4d41SMina Almasry
hugetlb_cgroup_uncharge_cgroup(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg)4001adc4d41SMina Almasry void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
4011adc4d41SMina Almasry struct hugetlb_cgroup *h_cg)
4021adc4d41SMina Almasry {
4031adc4d41SMina Almasry __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, false);
4041adc4d41SMina Almasry }
4051adc4d41SMina Almasry
hugetlb_cgroup_uncharge_cgroup_rsvd(int idx,unsigned long nr_pages,struct hugetlb_cgroup * h_cg)4061adc4d41SMina Almasry void hugetlb_cgroup_uncharge_cgroup_rsvd(int idx, unsigned long nr_pages,
4071adc4d41SMina Almasry struct hugetlb_cgroup *h_cg)
4081adc4d41SMina Almasry {
4091adc4d41SMina Almasry __hugetlb_cgroup_uncharge_cgroup(idx, nr_pages, h_cg, true);
4101adc4d41SMina Almasry }
4111adc4d41SMina Almasry
hugetlb_cgroup_uncharge_counter(struct resv_map * resv,unsigned long start,unsigned long end)412e9fe92aeSMina Almasry void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
413e9fe92aeSMina Almasry unsigned long end)
4141adc4d41SMina Almasry {
415e9fe92aeSMina Almasry if (hugetlb_cgroup_disabled() || !resv || !resv->reservation_counter ||
416e9fe92aeSMina Almasry !resv->css)
4176d76dcf4SAneesh Kumar K.V return;
4181adc4d41SMina Almasry
419e9fe92aeSMina Almasry page_counter_uncharge(resv->reservation_counter,
420e9fe92aeSMina Almasry (end - start) * resv->pages_per_hpage);
421e9fe92aeSMina Almasry css_put(resv->css);
4226d76dcf4SAneesh Kumar K.V }
4236d76dcf4SAneesh Kumar K.V
hugetlb_cgroup_uncharge_file_region(struct resv_map * resv,struct file_region * rg,unsigned long nr_pages,bool region_del)424075a61d0SMina Almasry void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
425075a61d0SMina Almasry struct file_region *rg,
426d85aecf2SMiaohe Lin unsigned long nr_pages,
427d85aecf2SMiaohe Lin bool region_del)
428075a61d0SMina Almasry {
429075a61d0SMina Almasry if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
430075a61d0SMina Almasry return;
431075a61d0SMina Almasry
432862f7f65SMiaohe Lin if (rg->reservation_counter && resv->pages_per_hpage &&
433075a61d0SMina Almasry !resv->reservation_counter) {
434075a61d0SMina Almasry page_counter_uncharge(rg->reservation_counter,
435075a61d0SMina Almasry nr_pages * resv->pages_per_hpage);
436d85aecf2SMiaohe Lin /*
437d85aecf2SMiaohe Lin * Only do css_put(rg->css) when we delete the entire region
438d85aecf2SMiaohe Lin * because one file_region must hold exactly one css reference.
439d85aecf2SMiaohe Lin */
440d85aecf2SMiaohe Lin if (region_del)
441075a61d0SMina Almasry css_put(rg->css);
442075a61d0SMina Almasry }
443075a61d0SMina Almasry }
444075a61d0SMina Almasry
44571f87beeSJohannes Weiner enum {
44671f87beeSJohannes Weiner RES_USAGE,
447cdc2fcfeSMina Almasry RES_RSVD_USAGE,
44871f87beeSJohannes Weiner RES_LIMIT,
449cdc2fcfeSMina Almasry RES_RSVD_LIMIT,
45071f87beeSJohannes Weiner RES_MAX_USAGE,
451cdc2fcfeSMina Almasry RES_RSVD_MAX_USAGE,
45271f87beeSJohannes Weiner RES_FAILCNT,
453cdc2fcfeSMina Almasry RES_RSVD_FAILCNT,
45471f87beeSJohannes Weiner };
45571f87beeSJohannes Weiner
hugetlb_cgroup_read_numa_stat(struct seq_file * seq,void * dummy)456f4776199SMina Almasry static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
457f4776199SMina Almasry {
458f4776199SMina Almasry int nid;
459f4776199SMina Almasry struct cftype *cft = seq_cft(seq);
460f4776199SMina Almasry int idx = MEMFILE_IDX(cft->private);
461520de595SXiu Jianfeng bool legacy = !cgroup_subsys_on_dfl(hugetlb_cgrp_subsys);
462f4776199SMina Almasry struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
463f4776199SMina Almasry struct cgroup_subsys_state *css;
464f4776199SMina Almasry unsigned long usage;
465f4776199SMina Almasry
466f4776199SMina Almasry if (legacy) {
467f4776199SMina Almasry /* Add up usage across all nodes for the non-hierarchical total. */
468f4776199SMina Almasry usage = 0;
469f4776199SMina Almasry for_each_node_state(nid, N_MEMORY)
470f4776199SMina Almasry usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
471f4776199SMina Almasry seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
472f4776199SMina Almasry
473f4776199SMina Almasry /* Simply print the per-node usage for the non-hierarchical total. */
474f4776199SMina Almasry for_each_node_state(nid, N_MEMORY)
475f4776199SMina Almasry seq_printf(seq, " N%d=%lu", nid,
476f4776199SMina Almasry READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
477f4776199SMina Almasry PAGE_SIZE);
478f4776199SMina Almasry seq_putc(seq, '\n');
479f4776199SMina Almasry }
480f4776199SMina Almasry
481f4776199SMina Almasry /*
482f4776199SMina Almasry * The hierarchical total is pretty much the value recorded by the
483f4776199SMina Almasry * counter, so use that.
484f4776199SMina Almasry */
485f4776199SMina Almasry seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
486f4776199SMina Almasry page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
487f4776199SMina Almasry
488f4776199SMina Almasry /*
489f4776199SMina Almasry * For each node, transverse the css tree to obtain the hierarchical
490f4776199SMina Almasry * node usage.
491f4776199SMina Almasry */
492f4776199SMina Almasry for_each_node_state(nid, N_MEMORY) {
493f4776199SMina Almasry usage = 0;
494f4776199SMina Almasry rcu_read_lock();
495f4776199SMina Almasry css_for_each_descendant_pre(css, &h_cg->css) {
496f4776199SMina Almasry usage += READ_ONCE(hugetlb_cgroup_from_css(css)
497f4776199SMina Almasry ->nodeinfo[nid]
498f4776199SMina Almasry ->usage[idx]);
499f4776199SMina Almasry }
500f4776199SMina Almasry rcu_read_unlock();
501f4776199SMina Almasry seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
502f4776199SMina Almasry }
503f4776199SMina Almasry
504f4776199SMina Almasry seq_putc(seq, '\n');
505f4776199SMina Almasry
506f4776199SMina Almasry return 0;
507f4776199SMina Almasry }
508f4776199SMina Almasry
hugetlb_cgroup_read_u64(struct cgroup_subsys_state * css,struct cftype * cft)509716f479dSTejun Heo static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
510716f479dSTejun Heo struct cftype *cft)
511abb8206cSAneesh Kumar K.V {
51271f87beeSJohannes Weiner struct page_counter *counter;
513cdc2fcfeSMina Almasry struct page_counter *rsvd_counter;
514182446d0STejun Heo struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
515abb8206cSAneesh Kumar K.V
51671f87beeSJohannes Weiner counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
517cdc2fcfeSMina Almasry rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(cft->private)];
518abb8206cSAneesh Kumar K.V
51971f87beeSJohannes Weiner switch (MEMFILE_ATTR(cft->private)) {
52071f87beeSJohannes Weiner case RES_USAGE:
52171f87beeSJohannes Weiner return (u64)page_counter_read(counter) * PAGE_SIZE;
522cdc2fcfeSMina Almasry case RES_RSVD_USAGE:
523cdc2fcfeSMina Almasry return (u64)page_counter_read(rsvd_counter) * PAGE_SIZE;
52471f87beeSJohannes Weiner case RES_LIMIT:
525bbec2e15SRoman Gushchin return (u64)counter->max * PAGE_SIZE;
526cdc2fcfeSMina Almasry case RES_RSVD_LIMIT:
527cdc2fcfeSMina Almasry return (u64)rsvd_counter->max * PAGE_SIZE;
52871f87beeSJohannes Weiner case RES_MAX_USAGE:
52971f87beeSJohannes Weiner return (u64)counter->watermark * PAGE_SIZE;
530cdc2fcfeSMina Almasry case RES_RSVD_MAX_USAGE:
531cdc2fcfeSMina Almasry return (u64)rsvd_counter->watermark * PAGE_SIZE;
53271f87beeSJohannes Weiner case RES_FAILCNT:
53371f87beeSJohannes Weiner return counter->failcnt;
534cdc2fcfeSMina Almasry case RES_RSVD_FAILCNT:
535cdc2fcfeSMina Almasry return rsvd_counter->failcnt;
53671f87beeSJohannes Weiner default:
53771f87beeSJohannes Weiner BUG();
538abb8206cSAneesh Kumar K.V }
53971f87beeSJohannes Weiner }
54071f87beeSJohannes Weiner
hugetlb_cgroup_read_u64_max(struct seq_file * seq,void * v)541faced7e0SGiuseppe Scrivano static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
542faced7e0SGiuseppe Scrivano {
543faced7e0SGiuseppe Scrivano int idx;
544faced7e0SGiuseppe Scrivano u64 val;
545faced7e0SGiuseppe Scrivano struct cftype *cft = seq_cft(seq);
546faced7e0SGiuseppe Scrivano unsigned long limit;
547faced7e0SGiuseppe Scrivano struct page_counter *counter;
548faced7e0SGiuseppe Scrivano struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
549faced7e0SGiuseppe Scrivano
550faced7e0SGiuseppe Scrivano idx = MEMFILE_IDX(cft->private);
551faced7e0SGiuseppe Scrivano counter = &h_cg->hugepage[idx];
552faced7e0SGiuseppe Scrivano
553faced7e0SGiuseppe Scrivano limit = round_down(PAGE_COUNTER_MAX,
5548938494cSMiaohe Lin pages_per_huge_page(&hstates[idx]));
555faced7e0SGiuseppe Scrivano
556faced7e0SGiuseppe Scrivano switch (MEMFILE_ATTR(cft->private)) {
557cdc2fcfeSMina Almasry case RES_RSVD_USAGE:
558cdc2fcfeSMina Almasry counter = &h_cg->rsvd_hugepage[idx];
559e4a9bc58SJoe Perches fallthrough;
560faced7e0SGiuseppe Scrivano case RES_USAGE:
561faced7e0SGiuseppe Scrivano val = (u64)page_counter_read(counter);
562faced7e0SGiuseppe Scrivano seq_printf(seq, "%llu\n", val * PAGE_SIZE);
563faced7e0SGiuseppe Scrivano break;
564cdc2fcfeSMina Almasry case RES_RSVD_LIMIT:
565cdc2fcfeSMina Almasry counter = &h_cg->rsvd_hugepage[idx];
566e4a9bc58SJoe Perches fallthrough;
567faced7e0SGiuseppe Scrivano case RES_LIMIT:
568faced7e0SGiuseppe Scrivano val = (u64)counter->max;
569faced7e0SGiuseppe Scrivano if (val == limit)
570faced7e0SGiuseppe Scrivano seq_puts(seq, "max\n");
571faced7e0SGiuseppe Scrivano else
572faced7e0SGiuseppe Scrivano seq_printf(seq, "%llu\n", val * PAGE_SIZE);
573faced7e0SGiuseppe Scrivano break;
574faced7e0SGiuseppe Scrivano default:
575faced7e0SGiuseppe Scrivano BUG();
576faced7e0SGiuseppe Scrivano }
577faced7e0SGiuseppe Scrivano
578faced7e0SGiuseppe Scrivano return 0;
579faced7e0SGiuseppe Scrivano }
580faced7e0SGiuseppe Scrivano
58171f87beeSJohannes Weiner static DEFINE_MUTEX(hugetlb_limit_mutex);
582abb8206cSAneesh Kumar K.V
hugetlb_cgroup_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off,const char * max)583451af504STejun Heo static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
584faced7e0SGiuseppe Scrivano char *buf, size_t nbytes, loff_t off,
585faced7e0SGiuseppe Scrivano const char *max)
586abb8206cSAneesh Kumar K.V {
58771f87beeSJohannes Weiner int ret, idx;
58871f87beeSJohannes Weiner unsigned long nr_pages;
589451af504STejun Heo struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
590cdc2fcfeSMina Almasry bool rsvd = false;
591abb8206cSAneesh Kumar K.V
59271f87beeSJohannes Weiner if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
59371f87beeSJohannes Weiner return -EINVAL;
594abb8206cSAneesh Kumar K.V
59571f87beeSJohannes Weiner buf = strstrip(buf);
596faced7e0SGiuseppe Scrivano ret = page_counter_memparse(buf, max, &nr_pages);
597abb8206cSAneesh Kumar K.V if (ret)
59871f87beeSJohannes Weiner return ret;
59971f87beeSJohannes Weiner
60071f87beeSJohannes Weiner idx = MEMFILE_IDX(of_cft(of)->private);
6018938494cSMiaohe Lin nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
60271f87beeSJohannes Weiner
60371f87beeSJohannes Weiner switch (MEMFILE_ATTR(of_cft(of)->private)) {
604cdc2fcfeSMina Almasry case RES_RSVD_LIMIT:
605cdc2fcfeSMina Almasry rsvd = true;
606e4a9bc58SJoe Perches fallthrough;
60771f87beeSJohannes Weiner case RES_LIMIT:
60871f87beeSJohannes Weiner mutex_lock(&hugetlb_limit_mutex);
609cdc2fcfeSMina Almasry ret = page_counter_set_max(
6101adc4d41SMina Almasry __hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
611cdc2fcfeSMina Almasry nr_pages);
61271f87beeSJohannes Weiner mutex_unlock(&hugetlb_limit_mutex);
613abb8206cSAneesh Kumar K.V break;
614abb8206cSAneesh Kumar K.V default:
615abb8206cSAneesh Kumar K.V ret = -EINVAL;
616abb8206cSAneesh Kumar K.V break;
617abb8206cSAneesh Kumar K.V }
618451af504STejun Heo return ret ?: nbytes;
619abb8206cSAneesh Kumar K.V }
620abb8206cSAneesh Kumar K.V
hugetlb_cgroup_write_legacy(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)621faced7e0SGiuseppe Scrivano static ssize_t hugetlb_cgroup_write_legacy(struct kernfs_open_file *of,
622faced7e0SGiuseppe Scrivano char *buf, size_t nbytes, loff_t off)
623faced7e0SGiuseppe Scrivano {
624faced7e0SGiuseppe Scrivano return hugetlb_cgroup_write(of, buf, nbytes, off, "-1");
625faced7e0SGiuseppe Scrivano }
626faced7e0SGiuseppe Scrivano
hugetlb_cgroup_write_dfl(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)627faced7e0SGiuseppe Scrivano static ssize_t hugetlb_cgroup_write_dfl(struct kernfs_open_file *of,
628faced7e0SGiuseppe Scrivano char *buf, size_t nbytes, loff_t off)
629faced7e0SGiuseppe Scrivano {
630faced7e0SGiuseppe Scrivano return hugetlb_cgroup_write(of, buf, nbytes, off, "max");
631faced7e0SGiuseppe Scrivano }
632faced7e0SGiuseppe Scrivano
hugetlb_cgroup_reset(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)6336770c64eSTejun Heo static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
6346770c64eSTejun Heo char *buf, size_t nbytes, loff_t off)
635abb8206cSAneesh Kumar K.V {
63671f87beeSJohannes Weiner int ret = 0;
637cdc2fcfeSMina Almasry struct page_counter *counter, *rsvd_counter;
6386770c64eSTejun Heo struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
639abb8206cSAneesh Kumar K.V
64071f87beeSJohannes Weiner counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
641cdc2fcfeSMina Almasry rsvd_counter = &h_cg->rsvd_hugepage[MEMFILE_IDX(of_cft(of)->private)];
642abb8206cSAneesh Kumar K.V
64371f87beeSJohannes Weiner switch (MEMFILE_ATTR(of_cft(of)->private)) {
644abb8206cSAneesh Kumar K.V case RES_MAX_USAGE:
64571f87beeSJohannes Weiner page_counter_reset_watermark(counter);
646abb8206cSAneesh Kumar K.V break;
647cdc2fcfeSMina Almasry case RES_RSVD_MAX_USAGE:
648cdc2fcfeSMina Almasry page_counter_reset_watermark(rsvd_counter);
649cdc2fcfeSMina Almasry break;
650abb8206cSAneesh Kumar K.V case RES_FAILCNT:
65171f87beeSJohannes Weiner counter->failcnt = 0;
652abb8206cSAneesh Kumar K.V break;
653cdc2fcfeSMina Almasry case RES_RSVD_FAILCNT:
654cdc2fcfeSMina Almasry rsvd_counter->failcnt = 0;
655cdc2fcfeSMina Almasry break;
656abb8206cSAneesh Kumar K.V default:
657abb8206cSAneesh Kumar K.V ret = -EINVAL;
658abb8206cSAneesh Kumar K.V break;
659abb8206cSAneesh Kumar K.V }
6606770c64eSTejun Heo return ret ?: nbytes;
661abb8206cSAneesh Kumar K.V }
662abb8206cSAneesh Kumar K.V
mem_fmt(char * buf,int size,unsigned long hsize)663abb8206cSAneesh Kumar K.V static char *mem_fmt(char *buf, int size, unsigned long hsize)
664abb8206cSAneesh Kumar K.V {
665abfb09e2SMiaohe Lin if (hsize >= SZ_1G)
666abfb09e2SMiaohe Lin snprintf(buf, size, "%luGB", hsize / SZ_1G);
667abfb09e2SMiaohe Lin else if (hsize >= SZ_1M)
668abfb09e2SMiaohe Lin snprintf(buf, size, "%luMB", hsize / SZ_1M);
669abb8206cSAneesh Kumar K.V else
670abfb09e2SMiaohe Lin snprintf(buf, size, "%luKB", hsize / SZ_1K);
671abb8206cSAneesh Kumar K.V return buf;
672abb8206cSAneesh Kumar K.V }
673abb8206cSAneesh Kumar K.V
__hugetlb_events_show(struct seq_file * seq,bool local)674faced7e0SGiuseppe Scrivano static int __hugetlb_events_show(struct seq_file *seq, bool local)
675faced7e0SGiuseppe Scrivano {
676faced7e0SGiuseppe Scrivano int idx;
677faced7e0SGiuseppe Scrivano long max;
678faced7e0SGiuseppe Scrivano struct cftype *cft = seq_cft(seq);
679faced7e0SGiuseppe Scrivano struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
680faced7e0SGiuseppe Scrivano
681faced7e0SGiuseppe Scrivano idx = MEMFILE_IDX(cft->private);
682faced7e0SGiuseppe Scrivano
683faced7e0SGiuseppe Scrivano if (local)
684faced7e0SGiuseppe Scrivano max = atomic_long_read(&h_cg->events_local[idx][HUGETLB_MAX]);
685faced7e0SGiuseppe Scrivano else
686faced7e0SGiuseppe Scrivano max = atomic_long_read(&h_cg->events[idx][HUGETLB_MAX]);
687faced7e0SGiuseppe Scrivano
688faced7e0SGiuseppe Scrivano seq_printf(seq, "max %lu\n", max);
689faced7e0SGiuseppe Scrivano
690faced7e0SGiuseppe Scrivano return 0;
691faced7e0SGiuseppe Scrivano }
692faced7e0SGiuseppe Scrivano
hugetlb_events_show(struct seq_file * seq,void * v)693faced7e0SGiuseppe Scrivano static int hugetlb_events_show(struct seq_file *seq, void *v)
694faced7e0SGiuseppe Scrivano {
695faced7e0SGiuseppe Scrivano return __hugetlb_events_show(seq, false);
696faced7e0SGiuseppe Scrivano }
697faced7e0SGiuseppe Scrivano
hugetlb_events_local_show(struct seq_file * seq,void * v)698faced7e0SGiuseppe Scrivano static int hugetlb_events_local_show(struct seq_file *seq, void *v)
699faced7e0SGiuseppe Scrivano {
700faced7e0SGiuseppe Scrivano return __hugetlb_events_show(seq, true);
701faced7e0SGiuseppe Scrivano }
702faced7e0SGiuseppe Scrivano
70347179fe0SXiu Jianfeng static struct cftype hugetlb_dfl_tmpl[] = {
70447179fe0SXiu Jianfeng {
70547179fe0SXiu Jianfeng .name = "max",
70647179fe0SXiu Jianfeng .private = RES_LIMIT,
70747179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_u64_max,
70847179fe0SXiu Jianfeng .write = hugetlb_cgroup_write_dfl,
70947179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
71047179fe0SXiu Jianfeng },
71147179fe0SXiu Jianfeng {
71247179fe0SXiu Jianfeng .name = "rsvd.max",
71347179fe0SXiu Jianfeng .private = RES_RSVD_LIMIT,
71447179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_u64_max,
71547179fe0SXiu Jianfeng .write = hugetlb_cgroup_write_dfl,
71647179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
71747179fe0SXiu Jianfeng },
71847179fe0SXiu Jianfeng {
71947179fe0SXiu Jianfeng .name = "current",
72047179fe0SXiu Jianfeng .private = RES_USAGE,
72147179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_u64_max,
72247179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
72347179fe0SXiu Jianfeng },
72447179fe0SXiu Jianfeng {
72547179fe0SXiu Jianfeng .name = "rsvd.current",
72647179fe0SXiu Jianfeng .private = RES_RSVD_USAGE,
72747179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_u64_max,
72847179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
72947179fe0SXiu Jianfeng },
73047179fe0SXiu Jianfeng {
73147179fe0SXiu Jianfeng .name = "events",
73247179fe0SXiu Jianfeng .seq_show = hugetlb_events_show,
73347179fe0SXiu Jianfeng .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_file[0]),
73447179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
73547179fe0SXiu Jianfeng },
73647179fe0SXiu Jianfeng {
73747179fe0SXiu Jianfeng .name = "events.local",
73847179fe0SXiu Jianfeng .seq_show = hugetlb_events_local_show,
73947179fe0SXiu Jianfeng .file_offset = MEMFILE_OFFSET(struct hugetlb_cgroup, events_local_file[0]),
74047179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
74147179fe0SXiu Jianfeng },
74247179fe0SXiu Jianfeng {
74347179fe0SXiu Jianfeng .name = "numa_stat",
74447179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_numa_stat,
74547179fe0SXiu Jianfeng .flags = CFTYPE_NOT_ON_ROOT,
74647179fe0SXiu Jianfeng },
74747179fe0SXiu Jianfeng /* don't need terminator here */
74847179fe0SXiu Jianfeng };
74947179fe0SXiu Jianfeng
75047179fe0SXiu Jianfeng static struct cftype hugetlb_legacy_tmpl[] = {
75147179fe0SXiu Jianfeng {
75247179fe0SXiu Jianfeng .name = "limit_in_bytes",
75347179fe0SXiu Jianfeng .private = RES_LIMIT,
75447179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
75547179fe0SXiu Jianfeng .write = hugetlb_cgroup_write_legacy,
75647179fe0SXiu Jianfeng },
75747179fe0SXiu Jianfeng {
75847179fe0SXiu Jianfeng .name = "rsvd.limit_in_bytes",
75947179fe0SXiu Jianfeng .private = RES_RSVD_LIMIT,
76047179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
76147179fe0SXiu Jianfeng .write = hugetlb_cgroup_write_legacy,
76247179fe0SXiu Jianfeng },
76347179fe0SXiu Jianfeng {
76447179fe0SXiu Jianfeng .name = "usage_in_bytes",
76547179fe0SXiu Jianfeng .private = RES_USAGE,
76647179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
76747179fe0SXiu Jianfeng },
76847179fe0SXiu Jianfeng {
76947179fe0SXiu Jianfeng .name = "rsvd.usage_in_bytes",
77047179fe0SXiu Jianfeng .private = RES_RSVD_USAGE,
77147179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
77247179fe0SXiu Jianfeng },
77347179fe0SXiu Jianfeng {
77447179fe0SXiu Jianfeng .name = "max_usage_in_bytes",
77547179fe0SXiu Jianfeng .private = RES_MAX_USAGE,
77647179fe0SXiu Jianfeng .write = hugetlb_cgroup_reset,
77747179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
77847179fe0SXiu Jianfeng },
77947179fe0SXiu Jianfeng {
78047179fe0SXiu Jianfeng .name = "rsvd.max_usage_in_bytes",
78147179fe0SXiu Jianfeng .private = RES_RSVD_MAX_USAGE,
78247179fe0SXiu Jianfeng .write = hugetlb_cgroup_reset,
78347179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
78447179fe0SXiu Jianfeng },
78547179fe0SXiu Jianfeng {
78647179fe0SXiu Jianfeng .name = "failcnt",
78747179fe0SXiu Jianfeng .private = RES_FAILCNT,
78847179fe0SXiu Jianfeng .write = hugetlb_cgroup_reset,
78947179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
79047179fe0SXiu Jianfeng },
79147179fe0SXiu Jianfeng {
79247179fe0SXiu Jianfeng .name = "rsvd.failcnt",
79347179fe0SXiu Jianfeng .private = RES_RSVD_FAILCNT,
79447179fe0SXiu Jianfeng .write = hugetlb_cgroup_reset,
79547179fe0SXiu Jianfeng .read_u64 = hugetlb_cgroup_read_u64,
79647179fe0SXiu Jianfeng },
79747179fe0SXiu Jianfeng {
79847179fe0SXiu Jianfeng .name = "numa_stat",
79947179fe0SXiu Jianfeng .seq_show = hugetlb_cgroup_read_numa_stat,
80047179fe0SXiu Jianfeng },
80147179fe0SXiu Jianfeng /* don't need terminator here */
80247179fe0SXiu Jianfeng };
80347179fe0SXiu Jianfeng
80447179fe0SXiu Jianfeng static void __init
hugetlb_cgroup_cfttypes_init(struct hstate * h,struct cftype * cft,struct cftype * tmpl,int tmpl_size)80547179fe0SXiu Jianfeng hugetlb_cgroup_cfttypes_init(struct hstate *h, struct cftype *cft,
80647179fe0SXiu Jianfeng struct cftype *tmpl, int tmpl_size)
80747179fe0SXiu Jianfeng {
80847179fe0SXiu Jianfeng char buf[32];
80947179fe0SXiu Jianfeng int i, idx = hstate_index(h);
81047179fe0SXiu Jianfeng
81147179fe0SXiu Jianfeng /* format the size */
81247179fe0SXiu Jianfeng mem_fmt(buf, sizeof(buf), huge_page_size(h));
81347179fe0SXiu Jianfeng
81447179fe0SXiu Jianfeng for (i = 0; i < tmpl_size; cft++, tmpl++, i++) {
81547179fe0SXiu Jianfeng *cft = *tmpl;
81647179fe0SXiu Jianfeng /* rebuild the name */
817ed60c8e2SXiu Jianfeng scnprintf(cft->name, MAX_CFTYPE_NAME, "%s.%s", buf, tmpl->name);
81847179fe0SXiu Jianfeng /* rebuild the private */
81947179fe0SXiu Jianfeng cft->private = MEMFILE_PRIVATE(idx, tmpl->private);
82047179fe0SXiu Jianfeng /* rebuild the file_offset */
82147179fe0SXiu Jianfeng if (tmpl->file_offset) {
82247179fe0SXiu Jianfeng unsigned int offset = tmpl->file_offset;
82347179fe0SXiu Jianfeng
82447179fe0SXiu Jianfeng cft->file_offset = MEMFILE_OFFSET0(offset) +
82547179fe0SXiu Jianfeng MEMFILE_FIELD_SIZE(offset) * idx;
82647179fe0SXiu Jianfeng }
82747179fe0SXiu Jianfeng
82847179fe0SXiu Jianfeng lockdep_register_key(&cft->lockdep_key);
82947179fe0SXiu Jianfeng }
83047179fe0SXiu Jianfeng }
83147179fe0SXiu Jianfeng
__hugetlb_cgroup_file_dfl_init(struct hstate * h)832b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_dfl_init(struct hstate *h)
833abb8206cSAneesh Kumar K.V {
834b79d715cSXiu Jianfeng int idx = hstate_index(h);
835abb8206cSAneesh Kumar K.V
83647179fe0SXiu Jianfeng hugetlb_cgroup_cfttypes_init(h, dfl_files + idx * DFL_TMPL_SIZE,
83747179fe0SXiu Jianfeng hugetlb_dfl_tmpl, DFL_TMPL_SIZE);
838faced7e0SGiuseppe Scrivano }
839faced7e0SGiuseppe Scrivano
__hugetlb_cgroup_file_legacy_init(struct hstate * h)840b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_legacy_init(struct hstate *h)
841faced7e0SGiuseppe Scrivano {
842b79d715cSXiu Jianfeng int idx = hstate_index(h);
843faced7e0SGiuseppe Scrivano
84447179fe0SXiu Jianfeng hugetlb_cgroup_cfttypes_init(h, legacy_files + idx * LEGACY_TMPL_SIZE,
84547179fe0SXiu Jianfeng hugetlb_legacy_tmpl, LEGACY_TMPL_SIZE);
846faced7e0SGiuseppe Scrivano }
847faced7e0SGiuseppe Scrivano
__hugetlb_cgroup_file_init(struct hstate * h)848b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_init(struct hstate *h)
849faced7e0SGiuseppe Scrivano {
850b79d715cSXiu Jianfeng __hugetlb_cgroup_file_dfl_init(h);
851b79d715cSXiu Jianfeng __hugetlb_cgroup_file_legacy_init(h);
8527179e7bfSJianguo Wu }
8537179e7bfSJianguo Wu
__hugetlb_cgroup_file_pre_init(void)85447179fe0SXiu Jianfeng static void __init __hugetlb_cgroup_file_pre_init(void)
85547179fe0SXiu Jianfeng {
85647179fe0SXiu Jianfeng int cft_count;
85747179fe0SXiu Jianfeng
85847179fe0SXiu Jianfeng cft_count = hugetlb_max_hstate * DFL_TMPL_SIZE + 1; /* add terminator */
859bf4afc53SLinus Torvalds dfl_files = kzalloc_objs(struct cftype, cft_count);
86047179fe0SXiu Jianfeng BUG_ON(!dfl_files);
86147179fe0SXiu Jianfeng cft_count = hugetlb_max_hstate * LEGACY_TMPL_SIZE + 1; /* add terminator */
862bf4afc53SLinus Torvalds legacy_files = kzalloc_objs(struct cftype, cft_count);
86347179fe0SXiu Jianfeng BUG_ON(!legacy_files);
86447179fe0SXiu Jianfeng }
86547179fe0SXiu Jianfeng
__hugetlb_cgroup_file_post_init(void)866b79d715cSXiu Jianfeng static void __init __hugetlb_cgroup_file_post_init(void)
867b79d715cSXiu Jianfeng {
868b79d715cSXiu Jianfeng WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
869b79d715cSXiu Jianfeng dfl_files));
870b79d715cSXiu Jianfeng WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,
871b79d715cSXiu Jianfeng legacy_files));
872b79d715cSXiu Jianfeng }
873b79d715cSXiu Jianfeng
hugetlb_cgroup_file_init(void)8747179e7bfSJianguo Wu void __init hugetlb_cgroup_file_init(void)
8757179e7bfSJianguo Wu {
8767179e7bfSJianguo Wu struct hstate *h;
8777179e7bfSJianguo Wu
87847179fe0SXiu Jianfeng __hugetlb_cgroup_file_pre_init();
87959838b25SFrank van der Linden for_each_hstate(h)
880b79d715cSXiu Jianfeng __hugetlb_cgroup_file_init(h);
881b79d715cSXiu Jianfeng __hugetlb_cgroup_file_post_init();
8827179e7bfSJianguo Wu }
883abb8206cSAneesh Kumar K.V
88475754681SAneesh Kumar K.V /*
88575754681SAneesh Kumar K.V * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
88675754681SAneesh Kumar K.V * when we migrate hugepages
88775754681SAneesh Kumar K.V */
hugetlb_cgroup_migrate(struct folio * old_folio,struct folio * new_folio)88829f39430SSidhartha Kumar void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio)
8898e6ac7faSAneesh Kumar K.V {
8908e6ac7faSAneesh Kumar K.V struct hugetlb_cgroup *h_cg;
8911adc4d41SMina Almasry struct hugetlb_cgroup *h_cg_rsvd;
89229f39430SSidhartha Kumar struct hstate *h = folio_hstate(old_folio);
8938e6ac7faSAneesh Kumar K.V
8948e6ac7faSAneesh Kumar K.V if (hugetlb_cgroup_disabled())
8958e6ac7faSAneesh Kumar K.V return;
8968e6ac7faSAneesh Kumar K.V
897db71ef79SMike Kravetz spin_lock_irq(&hugetlb_lock);
898f074732dSSidhartha Kumar h_cg = hugetlb_cgroup_from_folio(old_folio);
899f074732dSSidhartha Kumar h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio);
900de656ed3SSidhartha Kumar set_hugetlb_cgroup(old_folio, NULL);
901de656ed3SSidhartha Kumar set_hugetlb_cgroup_rsvd(old_folio, NULL);
9028e6ac7faSAneesh Kumar K.V
9038e6ac7faSAneesh Kumar K.V /* move the h_cg details to new cgroup */
904de656ed3SSidhartha Kumar set_hugetlb_cgroup(new_folio, h_cg);
905de656ed3SSidhartha Kumar set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd);
90629f39430SSidhartha Kumar list_move(&new_folio->lru, &h->hugepage_activelist);
907db71ef79SMike Kravetz spin_unlock_irq(&hugetlb_lock);
9088e6ac7faSAneesh Kumar K.V }
9098e6ac7faSAneesh Kumar K.V
910faced7e0SGiuseppe Scrivano static struct cftype hugetlb_files[] = {
911faced7e0SGiuseppe Scrivano {} /* terminate */
912faced7e0SGiuseppe Scrivano };
913faced7e0SGiuseppe Scrivano
914073219e9STejun Heo struct cgroup_subsys hugetlb_cgrp_subsys = {
91592fb9748STejun Heo .css_alloc = hugetlb_cgroup_css_alloc,
91692fb9748STejun Heo .css_offline = hugetlb_cgroup_css_offline,
91792fb9748STejun Heo .css_free = hugetlb_cgroup_css_free,
918faced7e0SGiuseppe Scrivano .dfl_cftypes = hugetlb_files,
919faced7e0SGiuseppe Scrivano .legacy_cftypes = hugetlb_files,
9202bc64a20SAneesh Kumar K.V };
921