@@ -902,7 +902,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* to keep reservation accounting consistent.
*/
hugetlb_set_vma_policy(&pseudo_vma, inode, index);
- folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
+ folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0, true);
hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -713,7 +713,8 @@ struct huge_bootmem_page {
int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve);
+ unsigned long addr, int avoid_reserve,
+ bool restore_reserve_on_memcg_failure);
struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask);
struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
@@ -1016,7 +1017,8 @@ static inline int isolate_or_dissolve_huge_page(struct page *page,
static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
unsigned long addr,
- int avoid_reserve)
+ int avoid_reserve,
+ bool restore_reserve_on_memcg_failure)
{
return NULL;
}
@@ -677,6 +677,8 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
return __mem_cgroup_charge(folio, mm, gfp);
}
+int mem_cgroup_hugetlb_charge_folio(struct folio *folio, gfp_t gfp);
+
int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm,
gfp_t gfp, swp_entry_t entry);
void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
@@ -1251,6 +1253,12 @@ static inline int mem_cgroup_charge(struct folio *folio,
return 0;
}
+static inline int mem_cgroup_hugetlb_charge_folio(struct folio *folio,
+ gfp_t gfp)
+{
+ return 0;
+}
+
static inline int mem_cgroup_swapin_charge_folio(struct folio *folio,
struct mm_struct *mm, gfp_t gfp, swp_entry_t entry)
{
@@ -1902,6 +1902,7 @@ void free_huge_folio(struct folio *folio)
pages_per_huge_page(h), folio);
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
pages_per_huge_page(h), folio);
+ mem_cgroup_uncharge(folio);
if (restore_reserve)
h->resv_huge_pages++;
@@ -3004,7 +3005,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
}
struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
- unsigned long addr, int avoid_reserve)
+ unsigned long addr, int avoid_reserve,
+ bool restore_reserve_on_memcg_failure)
{
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
@@ -3119,6 +3121,15 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
pages_per_huge_page(h), folio);
}
+
+ /* undo allocation if memory controller disallows it. */
+ if (mem_cgroup_hugetlb_charge_folio(folio, GFP_KERNEL)) {
+ if (restore_reserve_on_memcg_failure)
+ restore_reserve_on_error(h, vma, addr, folio);
+ folio_put(folio);
+ return ERR_PTR(-ENOMEM);
+ }
+
return folio;
out_uncharge_cgroup:
@@ -5179,7 +5190,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
/* Do not use reserve as it's private owned */
- new_folio = alloc_hugetlb_folio(dst_vma, addr, 1);
+ new_folio = alloc_hugetlb_folio(dst_vma, addr, 1, false);
if (IS_ERR(new_folio)) {
folio_put(pte_folio);
ret = PTR_ERR(new_folio);
@@ -5656,7 +5667,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
* be acquired again before returning to the caller, as expected.
*/
spin_unlock(ptl);
- new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve);
+ new_folio = alloc_hugetlb_folio(vma, haddr, outside_reserve, true);
if (IS_ERR(new_folio)) {
/*
@@ -5930,7 +5941,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
VM_UFFD_MISSING);
}
- folio = alloc_hugetlb_folio(vma, haddr, 0);
+ folio = alloc_hugetlb_folio(vma, haddr, 0, true);
if (IS_ERR(folio)) {
/*
* Returning error will result in faulting task being
@@ -6352,7 +6363,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
goto out;
}
- folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0);
+ folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0, true);
if (IS_ERR(folio)) {
ret = -ENOMEM;
goto out;
@@ -6394,7 +6405,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
goto out;
}
- folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0);
+ folio = alloc_hugetlb_folio(dst_vma, dst_addr, 0, false);
if (IS_ERR(folio)) {
folio_put(*foliop);
ret = -ENOMEM;
@@ -7050,6 +7050,46 @@ int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp)
return ret;
}
+static struct mem_cgroup *get_mem_cgroup_from_current(void)
+{
+ struct mem_cgroup *memcg;
+
+again:
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ if (!css_tryget(&memcg->css)) {
+ rcu_read_unlock();
+ goto again;
+ }
+ rcu_read_unlock();
+ return memcg;
+}
+
+/**
+ * mem_cgroup_hugetlb_charge_folio - Charge a newly allocated hugetlb folio.
+ * @folio: folio to charge.
+ * @gfp: reclaim mode
+ *
+ * This function charges an allocated hugetlbf folio to the memcg of the
+ * current task.
+ *
+ * Returns 0 on success. Otherwise, an error code is returned.
+ */
+int mem_cgroup_hugetlb_charge_folio(struct folio *folio, gfp_t gfp)
+{
+ struct mem_cgroup *memcg;
+ int ret;
+
+ if (mem_cgroup_disabled())
+ return 0;
+
+ memcg = get_mem_cgroup_from_current();
+ ret = charge_memcg(folio, memcg, gfp);
+ mem_cgroup_put(memcg);
+
+ return ret;
+}
+
/**
* mem_cgroup_swapin_charge_folio - Charge a newly allocated folio for swapin.
* @folio: folio to charge.