Skip to content

Commit

Permalink
mm/madvise: Fix madvise_pageout for private file mappings
Browse files Browse the repository at this point in the history
When MADV_PAGEOUT is called on a private file mapping VMA region, we bail
out early if the process is neither owner nor write capable of the file.
However, this VMA may have both private/shared clean pages and private
dirty pages.  The opportunity of paging out the private dirty pages (Anon
pages) is missed.  Fix this behavior by allowing private file mappings
pageout further and perform the file access check along with PageAnon()
during page walk.

We observe ~10% improvement in zram usage, thus leaving more available
memory on a 4GB RAM system running Android.

[[email protected]: v2]
  Link: https://lkml.kernel.org/r/[email protected]
Link: https://lkml.kernel.org/r/[email protected]
Change-Id: I94666e8d78b9c6eacbd4c60daa48e757d4f5a7f0
Signed-off-by: Pavankumar Kondeti <[email protected]>
Cc: Charan Teja Kalla <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: Suren Baghdasaryan <[email protected]>
Cc: David Hildenbrand <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Juhyung Park <[email protected]>
  • Loading branch information
Pavankumar Kondeti authored and saikiran2001 committed Jan 31, 2023
1 parent 0b4523d commit 3ba1afc
Showing 1 changed file with 34 additions and 17 deletions.
51 changes: 34 additions & 17 deletions mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,20 @@ static long madvise_willneed(struct vm_area_struct *vma,
return 0;
}

static inline bool can_do_file_pageout(struct vm_area_struct *vma)
{
if (!vma->vm_file)
return false;
/*
* paging out pagecache only for non-anonymous mappings that correspond
* to the files the calling process could (if tried) open for writing;
* otherwise we'd be including shared non-exclusive mappings, which
* opens a side channel.
*/
return inode_owner_or_capable(file_inode(vma->vm_file)) ||
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
}

static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
Expand All @@ -318,6 +332,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
spinlock_t *ptl;
struct page *page = NULL;
LIST_HEAD(page_list);
bool pageout_anon_only_filter;

if (fatal_signal_pending(current))
return -EINTR;
Expand All @@ -326,6 +341,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
fatal_signal_pending(private->target_task))
return -EINTR;

pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) &&
!can_do_file_pageout(vma);

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge(*pmd)) {
pmd_t orig_pmd;
Expand All @@ -352,6 +370,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (page_mapcount(page) != 1)
goto huge_unlock;

if (pageout_anon_only_filter && !PageAnon(page))
goto huge_unlock;

if (next - addr != HPAGE_PMD_SIZE) {
int err;

Expand Down Expand Up @@ -420,6 +441,8 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (PageTransCompound(page)) {
if (page_mapcount(page) != 1)
break;
if (pageout_anon_only_filter && !PageAnon(page))
break;
get_page(page);
if (!trylock_page(page)) {
put_page(page);
Expand Down Expand Up @@ -447,6 +470,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
if (!PageLRU(page) || page_mapcount(page) != 1)
continue;

if (pageout_anon_only_filter && !PageAnon(page))
continue;

VM_BUG_ON_PAGE(PageTransCompound(page), page);

if (pte_young(ptent)) {
Expand Down Expand Up @@ -545,22 +571,6 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
vm_write_end(vma);
}

static inline bool can_do_pageout(struct vm_area_struct *vma)
{
if (vma_is_anonymous(vma))
return true;
if (!vma->vm_file)
return false;
/*
* paging out pagecache only for non-anonymous mappings that correspond
* to the files the calling process could (if tried) open for writing;
* otherwise we'd be including shared non-exclusive mappings, which
* opens a side channel.
*/
return inode_owner_or_capable(file_inode(vma->vm_file)) ||
inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
}

static long madvise_pageout(struct task_struct *task,
struct vm_area_struct *vma,
struct vm_area_struct **prev,
Expand All @@ -573,7 +583,14 @@ static long madvise_pageout(struct task_struct *task,
if (!can_madv_lru_vma(vma))
return -EINVAL;

if (!can_do_pageout(vma))
/*
* If the VMA belongs to a private file mapping, there can be private
* dirty pages which can be paged out if even this process is neither
* owner nor write capable of the file. We allow private file mappings
* further to pageout dirty anon pages.
*/
if (!vma_is_anonymous(vma) && (!can_do_file_pageout(vma) &&
(vma->vm_flags & VM_MAYSHARE)))
return 0;

lru_add_drain();
Expand Down

0 comments on commit 3ba1afc

Please sign in to comment.