Merge bd1264c37c ("mm/vmalloc: extend find_vmap_lowest_match_check with extra arguments") into android-mainline

Steps on the way to 6.1-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ie7d9250fea0211ee371a70d50b4d126ee59d2e78
This commit is contained in:
Greg Kroah-Hartman 2022-10-20 11:56:09 +02:00
commit aa321be667
32 changed files with 316 additions and 273 deletions

View file

@ -1469,6 +1469,14 @@
Permit 'security.evm' to be updated regardless of
current integrity status.
early_page_ext [KNL] Enforces page_ext initialization to earlier
stages so cover more early boot allocations.
Please note that as side effect some optimizations
might be disabled to achieve that (e.g. parallelized
memory initialization is disabled) so the boot process
might take longer, especially on systems with a lot of
memory. Available with CONFIG_PAGE_EXTENSION=y.
failslab=
fail_usercopy=
fail_page_alloc=

View file

@ -237,16 +237,6 @@ int pud_huge(pud_t pud)
return pud_large(pud);
}
struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
pud_t *pud, int flags)
{
if (flags & FOLL_GET)
return NULL;
return pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
}
bool __init arch_hugetlb_valid_size(unsigned long size)
{
if (MACHINE_HAS_EDAT1 && size == PMD_SIZE)

View file

@ -869,12 +869,6 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
}
}
/* return true if the memory block is offlined, otherwise, return false */
bool is_memblock_offlined(struct memory_block *mem)
{
return mem->state == MEM_OFFLINE;
}
static struct attribute *memory_root_attrs[] = {
#ifdef CONFIG_ARCH_MEMORY_PROBE
&dev_attr_probe.attr,

View file

@ -1410,9 +1410,19 @@ compress_again:
handle = zs_malloc(zram->mem_pool, comp_len,
GFP_NOIO | __GFP_HIGHMEM |
__GFP_MOVABLE);
if (!IS_ERR((void *)handle))
if (IS_ERR((void *)handle))
return PTR_ERR((void *)handle);
if (comp_len != PAGE_SIZE)
goto compress_again;
return PTR_ERR((void *)handle);
/*
* If the page is not compressible, you need to acquire the lock and
* execute the code below. The zcomp_stream_get() call is needed to
* disable the cpu hotplug and grab the zstrm buffer back.
* It is necessary that the dereferencing of the zstrm variable below
* occurs correctly.
*/
zstrm = zcomp_stream_get(zram->comp);
}
alloced_pages = zs_get_total_pages(zram->mem_pool);

View file

@ -8,6 +8,7 @@
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
#include <linux/time.h>
@ -219,8 +220,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
unsigned long index = cb->start >> PAGE_SHIFT;
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
struct folio_batch fbatch;
const int errno = blk_status_to_errno(cb->status);
int i;
int ret;
@ -228,24 +228,23 @@ static noinline void end_compressed_writeback(struct inode *inode,
if (errno)
mapping_set_error(inode->i_mapping, errno);
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
if (ret == 0) {
nr_pages -= 1;
index += 1;
continue;
}
folio_batch_init(&fbatch);
while (index <= end_index) {
ret = filemap_get_folios(inode->i_mapping, &index, end_index,
&fbatch);
if (ret == 0)
return;
for (i = 0; i < ret; i++) {
struct folio *folio = fbatch.folios[i];
if (errno)
SetPageError(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i],
folio_set_error(folio);
btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
put_page(pages[i]);
}
nr_pages -= ret;
index += ret;
folio_batch_release(&fbatch);
}
/* the inode may be gone now */
}

View file

@ -270,9 +270,8 @@ static int __process_pages_contig(struct address_space *mapping,
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
unsigned long nr_pages = end_index - start_index + 1;
unsigned long pages_processed = 0;
struct page *pages[16];
struct folio_batch fbatch;
int err = 0;
int i;
@ -281,16 +280,17 @@ static int __process_pages_contig(struct address_space *mapping,
ASSERT(processed_end && *processed_end == start);
}
if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
if ((page_ops & PAGE_SET_ERROR) && start_index <= end_index)
mapping_set_error(mapping, -EIO);
while (nr_pages > 0) {
int found_pages;
folio_batch_init(&fbatch);
while (index <= end_index) {
int found_folios;
found_pages = find_get_pages_contig(mapping, index,
min_t(unsigned long,
nr_pages, ARRAY_SIZE(pages)), pages);
if (found_pages == 0) {
found_folios = filemap_get_folios_contig(mapping, &index,
end_index, &fbatch);
if (found_folios == 0) {
/*
* Only if we're going to lock these pages, we can find
* nothing at @index.
@ -300,23 +300,20 @@ static int __process_pages_contig(struct address_space *mapping,
goto out;
}
for (i = 0; i < found_pages; i++) {
for (i = 0; i < found_folios; i++) {
int process_ret;
struct folio *folio = fbatch.folios[i];
process_ret = process_one_page(fs_info, mapping,
pages[i], locked_page, page_ops,
&folio->page, locked_page, page_ops,
start, end);
if (process_ret < 0) {
for (; i < found_pages; i++)
put_page(pages[i]);
err = -EAGAIN;
folio_batch_release(&fbatch);
goto out;
}
put_page(pages[i]);
pages_processed++;
pages_processed += folio_nr_pages(folio);
}
nr_pages -= found_pages;
index += found_pages;
folio_batch_release(&fbatch);
cond_resched();
}
out:

View file

@ -337,7 +337,7 @@ bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
*
* Even with 0 returned, the page still need extra check to make sure
* it's really the correct page, as the caller is using
* find_get_pages_contig(), which can race with page invalidating.
* filemap_get_folios_contig(), which can race with page invalidating.
*/
int btrfs_page_start_writer_lock(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)

View file

@ -4,6 +4,7 @@
*/
#include <linux/pagemap.h>
#include <linux/pagevec.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/sizes.h>
@ -20,39 +21,40 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
unsigned long flags)
{
int ret;
struct page *pages[16];
struct folio_batch fbatch;
unsigned long index = start >> PAGE_SHIFT;
unsigned long end_index = end >> PAGE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
int count = 0;
int loops = 0;
while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index,
min_t(unsigned long, nr_pages,
ARRAY_SIZE(pages)), pages);
folio_batch_init(&fbatch);
while (index <= end_index) {
ret = filemap_get_folios_contig(inode->i_mapping, &index,
end_index, &fbatch);
for (i = 0; i < ret; i++) {
struct folio *folio = fbatch.folios[i];
if (flags & PROCESS_TEST_LOCKED &&
!PageLocked(pages[i]))
!folio_test_locked(folio))
count++;
if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
unlock_page(pages[i]);
put_page(pages[i]);
if (flags & PROCESS_UNLOCK && folio_test_locked(folio))
folio_unlock(folio);
if (flags & PROCESS_RELEASE)
put_page(pages[i]);
folio_put(folio);
}
nr_pages -= ret;
index += ret;
folio_batch_release(&fbatch);
cond_resched();
loops++;
if (loops > 100000) {
printk(KERN_ERR
"stuck in a loop, start %llu, end %llu, nr_pages %lu, ret %d\n",
start, end, nr_pages, ret);
"stuck in a loop, start %llu, end %llu, ret %d\n",
start, end, ret);
break;
}
}
return count;
}

View file

@ -480,41 +480,36 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
sector_t start_blk,
sector_t *blkoff)
{
unsigned int i;
unsigned int i, nr_folios;
pgoff_t index;
unsigned int nblocks_in_page;
unsigned long length = 0;
sector_t b;
struct pagevec pvec;
struct page *page;
struct folio_batch fbatch;
struct folio *folio;
if (inode->i_mapping->nrpages == 0)
return 0;
index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec);
folio_batch_init(&fbatch);
repeat:
pvec.nr = find_get_pages_contig(inode->i_mapping, index, PAGEVEC_SIZE,
pvec.pages);
if (pvec.nr == 0)
nr_folios = filemap_get_folios_contig(inode->i_mapping, &index, ULONG_MAX,
&fbatch);
if (nr_folios == 0)
return length;
if (length > 0 && pvec.pages[0]->index > index)
goto out;
b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
i = 0;
do {
page = pvec.pages[i];
folio = fbatch.folios[i];
lock_page(page);
if (page_has_buffers(page)) {
folio_lock(folio);
if (folio_buffers(folio)) {
struct buffer_head *bh, *head;
sector_t b;
bh = head = page_buffers(page);
b = folio->index << (PAGE_SHIFT - inode->i_blkbits);
bh = head = folio_buffers(folio);
do {
if (b < start_blk)
continue;
@ -529,21 +524,17 @@ repeat:
} else {
if (length > 0)
goto out_locked;
b += nblocks_in_page;
}
unlock_page(page);
folio_unlock(folio);
} while (++i < pagevec_count(&pvec));
} while (++i < nr_folios);
index = page->index + 1;
pagevec_release(&pvec);
folio_batch_release(&fbatch);
cond_resched();
goto repeat;
out_locked:
unlock_page(page);
out:
pagevec_release(&pvec);
folio_unlock(folio);
folio_batch_release(&fbatch);
return length;
}

View file

@ -203,9 +203,9 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
unsigned long maxpages, lpages, nr, loop, ret;
unsigned long maxpages, lpages, nr_folios, loop, ret, nr_pages, pfn;
struct inode *inode = file_inode(file);
struct page **pages = NULL, **ptr, *page;
struct folio_batch fbatch;
loff_t isize;
/* the mapping mustn't extend beyond the EOF */
@ -221,31 +221,39 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
goto out;
/* gang-find the pages */
pages = kcalloc(lpages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */
folio_batch_init(&fbatch);
nr_pages = 0;
repeat:
nr_folios = filemap_get_folios_contig(inode->i_mapping, &pgoff,
ULONG_MAX, &fbatch);
if (!nr_folios) {
ret = -ENOSYS;
return ret;
}
if (ret == -ENOSYS) {
ret = (unsigned long) folio_address(fbatch.folios[0]);
pfn = folio_pfn(fbatch.folios[0]);
}
/* check the pages for physical adjacency */
ptr = pages;
page = *ptr++;
page++;
for (loop = lpages; loop > 1; loop--)
if (*ptr++ != page++)
goto out_free_pages;
for (loop = 0; loop < nr_folios; loop++) {
if (pfn + nr_pages != folio_pfn(fbatch.folios[loop])) {
ret = -ENOSYS;
goto out_free; /* leave if not physical adjacent */
}
nr_pages += folio_nr_pages(fbatch.folios[loop]);
if (nr_pages >= lpages)
goto out_free; /* successfully found desired pages*/
}
if (nr_pages < lpages) {
folio_batch_release(&fbatch);
goto repeat; /* loop if pages are missing */
}
/* okay - all conditions fulfilled */
ret = (unsigned long) page_address(pages[0]);
out_free_pages:
ptr = pages;
for (loop = nr; loop > 0; loop--)
put_page(*ptr++);
out_free:
kfree(pages);
folio_batch_release(&fbatch);
out:
return ret;
}

View file

@ -354,10 +354,11 @@ struct mem_cgroup {
};
/*
* size of first charge trial. "32" comes from vmscan.c's magic value.
* TODO: maybe necessary to use big numbers in big irons.
* size of first charge trial.
* TODO: maybe necessary to use big numbers in big irons or dynamic based of the
* workload.
*/
#define MEMCG_CHARGE_BATCH 32U
#define MEMCG_CHARGE_BATCH 64U
extern struct mem_cgroup *root_mem_cgroup;

View file

@ -11,7 +11,6 @@ struct page;
struct zone;
struct pglist_data;
struct mem_section;
struct memory_block;
struct memory_group;
struct resource;
struct vmem_altmap;
@ -216,6 +215,22 @@ void put_online_mems(void);
void mem_hotplug_begin(void);
void mem_hotplug_done(void);
/* See kswapd_is_running() */
static inline void pgdat_kswapd_lock(pg_data_t *pgdat)
{
mutex_lock(&pgdat->kswapd_lock);
}
static inline void pgdat_kswapd_unlock(pg_data_t *pgdat)
{
mutex_unlock(&pgdat->kswapd_lock);
}
static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat)
{
mutex_init(&pgdat->kswapd_lock);
}
#else /* ! CONFIG_MEMORY_HOTPLUG */
#define pfn_to_online_page(pfn) \
({ \
@ -252,6 +267,10 @@ static inline bool movable_node_is_enabled(void)
{
return false;
}
static inline void pgdat_kswapd_lock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_unlock(pg_data_t *pgdat) {}
static inline void pgdat_kswapd_lock_init(pg_data_t *pgdat) {}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
/*
@ -333,7 +352,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
extern void remove_pfn_range_from_zone(struct zone *zone,
unsigned long start_pfn,
unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap,
struct dev_pagemap *pgmap);

View file

@ -955,8 +955,10 @@ typedef struct pglist_data {
atomic_t nr_writeback_throttled;/* nr of writeback-throttled tasks */
unsigned long nr_reclaim_start; /* nr pages written while throttled
* when throttling started. */
struct task_struct *kswapd; /* Protected by
mem_hotplug_begin/done() */
#ifdef CONFIG_MEMORY_HOTPLUG
struct mutex kswapd_lock;
#endif
struct task_struct *kswapd; /* Protected by kswapd_lock */
int kswapd_order;
enum zone_type kswapd_highest_zoneidx;

View file

@ -3,15 +3,26 @@
#define _LINUX_PAGE_COUNTER_H
#include <linux/atomic.h>
#include <linux/cache.h>
#include <linux/kernel.h>
#include <asm/page.h>
#if defined(CONFIG_SMP)
struct pc_padding {
char x[0];
} ____cacheline_internodealigned_in_smp;
#define PC_PADDING(name) struct pc_padding name
#else
#define PC_PADDING(name)
#endif
struct page_counter {
/*
* Make sure 'usage' does not share cacheline with any other field. The
* memcg->memory.usage is a hot member of struct mem_cgroup.
*/
atomic_long_t usage;
unsigned long min;
unsigned long low;
unsigned long high;
unsigned long max;
PC_PADDING(_pad1_);
/* effective memory.min and memory.min usage tracking */
unsigned long emin;
@ -23,18 +34,18 @@ struct page_counter {
atomic_long_t low_usage;
atomic_long_t children_low_usage;
/* legacy */
unsigned long watermark;
unsigned long failcnt;
/*
* 'parent' is placed here to be far from 'usage' to reduce
* cache false sharing, as 'usage' is written mostly while
* parent is frequently read for cgroup's hierarchical
* counting nature.
*/
/* Keep all the read most fields in a separete cacheline. */
PC_PADDING(_pad2_);
unsigned long min;
unsigned long low;
unsigned long high;
unsigned long max;
struct page_counter *parent;
};
} ____cacheline_internodealigned_in_smp;
#if BITS_PER_LONG == 32
#define PAGE_COUNTER_MAX LONG_MAX

View file

@ -36,9 +36,15 @@ struct page_ext {
unsigned long flags;
};
extern bool early_page_ext;
extern unsigned long page_ext_size;
extern void pgdat_page_ext_init(struct pglist_data *pgdat);
static inline bool early_page_ext_enabled(void)
{
return early_page_ext;
}
#ifdef CONFIG_SPARSEMEM
static inline void page_ext_init_flatmem(void)
{
@ -68,6 +74,11 @@ static inline struct page_ext *page_ext_next(struct page_ext *curr)
#else /* !CONFIG_PAGE_EXTENSION */
struct page_ext;
static inline bool early_page_ext_enabled(void)
{
return false;
}
static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
{
}

View file

@ -718,8 +718,8 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch);
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
unsigned int nr_pages, struct page **pages);
unsigned filemap_get_folios_contig(struct address_space *mapping,
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
struct page **pages);

View file

@ -1276,8 +1276,7 @@ static inline int pgd_devmap(pgd_t pgd)
#endif
#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \
(defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
!defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
static inline int pud_trans_huge(pud_t pud)
{
return 0;
@ -1598,11 +1597,7 @@ typedef unsigned int pgtbl_mod_mask;
#endif
#ifndef has_transparent_hugepage
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define has_transparent_hugepage() 1
#else
#define has_transparent_hugepage() 0
#endif
#define has_transparent_hugepage() IS_BUILTIN(CONFIG_TRANSPARENT_HUGEPAGE)
#endif
/*

View file

@ -847,6 +847,9 @@ static void __init mm_init(void)
pgtable_init();
debug_objects_mem_init();
vmalloc_init();
/* Should be run after vmap initialization */
if (early_page_ext_enabled())
page_ext_init();
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
@ -1615,7 +1618,8 @@ static noinline void __init kernel_init_freeable(void)
padata_init();
page_alloc_init_late();
/* Initialize page ext after all struct pages are initialized. */
page_ext_init();
if (!early_page_ext_enabled())
page_ext_init();
do_basic_setup();

View file

@ -23,7 +23,7 @@ menuconfig SWAP
in your computer. If unsure say Y.
config ZSWAP
bool "Compressed cache for swap pages (EXPERIMENTAL)"
bool "Compressed cache for swap pages"
depends on SWAP
select FRONTSWAP
select CRYPTO
@ -36,12 +36,6 @@ config ZSWAP
in the case where decompressing from RAM is faster than swap device
reads, can also improve workload performance.
This is marked experimental because it is a new feature (as of
v3.11) that interacts heavily with memory reclaim. While these
interactions don't cause any known issues on simple memory setups,
they have not be fully explored on the large set of potential
configurations and workloads that exist.
config ZSWAP_DEFAULT_ON
bool "Enable the compressed cache for swap pages by default"
depends on ZSWAP

View file

@ -776,8 +776,6 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb)
int bdi_init(struct backing_dev_info *bdi)
{
int ret;
bdi->dev = NULL;
kref_init(&bdi->refcnt);
@ -788,9 +786,7 @@ int bdi_init(struct backing_dev_info *bdi)
INIT_LIST_HEAD(&bdi->wb_list);
init_waitqueue_head(&bdi->wb_waitq);
ret = cgwb_bdi_init(bdi);
return ret;
return cgwb_bdi_init(bdi);
}
struct backing_dev_info *bdi_alloc(int node_id)

View file

@ -1977,9 +1977,21 @@ static inline bool is_via_compact_memory(int order)
return order == -1;
}
/*
* Determine whether kswapd is (or recently was!) running on this node.
*
* pgdat_kswapd_lock() pins pgdat->kswapd, so a concurrent kswapd_stop() can't
* zero it.
*/
static bool kswapd_is_running(pg_data_t *pgdat)
{
return pgdat->kswapd && task_is_running(pgdat->kswapd);
bool running;
pgdat_kswapd_lock(pgdat);
running = pgdat->kswapd && task_is_running(pgdat->kswapd);
pgdat_kswapd_unlock(pgdat);
return running;
}
/*

View file

@ -88,49 +88,6 @@ void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr)
#define DAMON_MAX_SUBSCORE (100)
#define DAMON_MAX_AGE_IN_LOG (32)
int damon_pageout_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s)
{
unsigned int max_nr_accesses;
int freq_subscore;
unsigned int age_in_sec;
int age_in_log, age_subscore;
unsigned int freq_weight = s->quota.weight_nr_accesses;
unsigned int age_weight = s->quota.weight_age;
int hotness;
max_nr_accesses = c->aggr_interval / c->sample_interval;
freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses;
age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000;
for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec;
age_in_log++, age_in_sec >>= 1)
;
/* If frequency is 0, higher age means it's colder */
if (freq_subscore == 0)
age_in_log *= -1;
/*
* Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG].
* Scale it to be in [0, 100] and set it as age subscore.
*/
age_in_log += DAMON_MAX_AGE_IN_LOG;
age_subscore = age_in_log * DAMON_MAX_SUBSCORE /
DAMON_MAX_AGE_IN_LOG / 2;
hotness = (freq_weight * freq_subscore + age_weight * age_subscore);
if (freq_weight + age_weight)
hotness /= freq_weight + age_weight;
/*
* Transform it to fit in [0, DAMOS_MAX_SCORE]
*/
hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE;
/* Return coldness of the region */
return DAMOS_MAX_SCORE - hotness;
}
int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s)
{
@ -172,3 +129,12 @@ int damon_hot_score(struct damon_ctx *c, struct damon_region *r,
return hotness;
}
int damon_pageout_score(struct damon_ctx *c, struct damon_region *r,
struct damos *s)
{
int hotness = damon_hot_score(c, r, s);
/* Return coldness of the region */
return DAMOS_MAX_SCORE - hotness;
}

View file

@ -166,8 +166,7 @@ out:
return result.accessed;
}
static void __damon_pa_check_access(struct damon_ctx *ctx,
struct damon_region *r)
static void __damon_pa_check_access(struct damon_region *r)
{
static unsigned long last_addr;
static unsigned long last_page_sz = PAGE_SIZE;
@ -196,7 +195,7 @@ static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx)
damon_for_each_target(t, ctx) {
damon_for_each_region(r, t) {
__damon_pa_check_access(ctx, r);
__damon_pa_check_access(r);
max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
}
}

View file

@ -542,16 +542,15 @@ static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
* mm 'mm_struct' for the given virtual address space
* r the region to be checked
*/
static void __damon_va_check_access(struct damon_ctx *ctx,
struct mm_struct *mm, struct damon_region *r)
static void __damon_va_check_access(struct mm_struct *mm,
struct damon_region *r, bool same_target)
{
static struct mm_struct *last_mm;
static unsigned long last_addr;
static unsigned long last_page_sz = PAGE_SIZE;
static bool last_accessed;
/* If the region is in the last checked page, reuse the result */
if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) ==
if (same_target && (ALIGN_DOWN(last_addr, last_page_sz) ==
ALIGN_DOWN(r->sampling_addr, last_page_sz))) {
if (last_accessed)
r->nr_accesses++;
@ -562,7 +561,6 @@ static void __damon_va_check_access(struct damon_ctx *ctx,
if (last_accessed)
r->nr_accesses++;
last_mm = mm;
last_addr = r->sampling_addr;
}
@ -572,14 +570,17 @@ static unsigned int damon_va_check_accesses(struct damon_ctx *ctx)
struct mm_struct *mm;
struct damon_region *r;
unsigned int max_nr_accesses = 0;
bool same_target;
damon_for_each_target(t, ctx) {
mm = damon_get_mm(t);
if (!mm)
continue;
same_target = false;
damon_for_each_region(r, t) {
__damon_va_check_access(ctx, mm, r);
__damon_va_check_access(mm, r, same_target);
max_nr_accesses = max(r->nr_accesses, max_nr_accesses);
same_target = true;
}
mmput(mm);
}

View file

@ -2197,30 +2197,31 @@ bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
}
/**
* find_get_pages_contig - gang contiguous pagecache lookup
* filemap_get_folios_contig - Get a batch of contiguous folios
* @mapping: The address_space to search
* @index: The starting page index
* @nr_pages: The maximum number of pages
* @pages: Where the resulting pages are placed
* @start: The starting page index
* @end: The final page index (inclusive)
* @fbatch: The batch to fill
*
* find_get_pages_contig() works exactly like find_get_pages_range(),
* except that the returned number of pages are guaranteed to be
* contiguous.
* filemap_get_folios_contig() works exactly like filemap_get_folios(),
* except the returned folios are guaranteed to be contiguous. This may
* not return all contiguous folios if the batch gets filled up.
*
* Return: the number of pages which were found.
* Return: The number of folios found.
* Also update @start to be positioned for traversal of the next folio.
*/
unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
unsigned int nr_pages, struct page **pages)
{
XA_STATE(xas, &mapping->i_pages, index);
struct folio *folio;
unsigned int ret = 0;
if (unlikely(!nr_pages))
return 0;
unsigned filemap_get_folios_contig(struct address_space *mapping,
pgoff_t *start, pgoff_t end, struct folio_batch *fbatch)
{
XA_STATE(xas, &mapping->i_pages, *start);
unsigned long nr;
struct folio *folio;
rcu_read_lock();
for (folio = xas_load(&xas); folio; folio = xas_next(&xas)) {
for (folio = xas_load(&xas); folio && xas.xa_index <= end;
folio = xas_next(&xas)) {
if (xas_retry(&xas, folio))
continue;
/*
@ -2228,33 +2229,45 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
* No current caller is looking for DAX entries.
*/
if (xa_is_value(folio))
break;
goto update_start;
if (!folio_try_get_rcu(folio))
goto retry;
if (unlikely(folio != xas_reload(&xas)))
goto put_page;
goto put_folio;
again:
pages[ret] = folio_file_page(folio, xas.xa_index);
if (++ret == nr_pages)
break;
if (folio_more_pages(folio, xas.xa_index, ULONG_MAX)) {
xas.xa_index++;
folio_ref_inc(folio);
goto again;
if (!folio_batch_add(fbatch, folio)) {
nr = folio_nr_pages(folio);
if (folio_test_hugetlb(folio))
nr = 1;
*start = folio->index + nr;
goto out;
}
continue;
put_page:
put_folio:
folio_put(folio);
retry:
xas_reset(&xas);
}
update_start:
nr = folio_batch_count(fbatch);
if (nr) {
folio = fbatch->folios[nr - 1];
if (folio_test_hugetlb(folio))
*start = folio->index + 1;
else
*start = folio->index + folio_nr_pages(folio);
}
out:
rcu_read_unlock();
return ret;
return folio_batch_count(fbatch);
}
EXPORT_SYMBOL(find_get_pages_contig);
EXPORT_SYMBOL(filemap_get_folios_contig);
/**
* find_get_pages_range_tag - Find and return head pages matching @tag.

View file

@ -1940,8 +1940,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
node_states_clear_node(node, &arg);
if (arg.status_change_nid >= 0) {
kswapd_stop(node);
kcompactd_stop(node);
kswapd_stop(node);
}
writeback_set_ratelimit();
@ -1969,11 +1969,10 @@ failed_removal:
static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
{
int ret = !is_memblock_offlined(mem);
int *nid = arg;
*nid = mem->nid;
if (unlikely(ret)) {
if (unlikely(mem->state != MEM_OFFLINE)) {
phys_addr_t beginpa, endpa;
beginpa = PFN_PHYS(section_nr_to_pfn(mem->start_section_nr));

View file

@ -186,9 +186,16 @@ again:
get_page(page);
/*
* Optimize for the common case where page is only mapped once
* in one process. If we can lock the page, then we can safely
* set up a special migration page table entry now.
* We rely on trylock_page() to avoid deadlock between
* concurrent migrations where each is waiting on the others
* page lock. If we can't immediately lock the page we fail this
* migration as it is only best effort anyway.
*
* If we can lock the page it's safe to set up a migration entry
* now. In the common case where the page is mapped once in a
* single process setting up the migration entry now is an
* optimisation to avoid walking the rmap later with
* try_to_migrate().
*/
if (trylock_page(page)) {
bool anon_exclusive;

View file

@ -482,6 +482,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
{
static unsigned long prev_end_pfn, nr_initialised;
if (early_page_ext_enabled())
return false;
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
@ -3777,8 +3779,7 @@ struct page *__rmqueue_pcplist(struct zone *zone, unsigned int order,
/* Lock and remove page from the per-cpu list */
static struct page *rmqueue_pcplist(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
gfp_t gfp_flags, int migratetype,
unsigned int alloc_flags)
int migratetype, unsigned int alloc_flags)
{
struct per_cpu_pages *pcp;
struct list_head *list;
@ -3839,7 +3840,7 @@ struct page *rmqueue(struct zone *preferred_zone,
if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
migratetype != MIGRATE_MOVABLE) {
page = rmqueue_pcplist(preferred_zone, zone, order,
gfp_flags, migratetype, alloc_flags);
migratetype, alloc_flags);
if (likely(page))
goto out;
}
@ -7663,6 +7664,7 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
int i;
pgdat_resize_init(pgdat);
pgdat_kswapd_lock_init(pgdat);
pgdat_init_split_queue(pgdat);
pgdat_init_kcompactd(pgdat);

View file

@ -17,24 +17,23 @@ static void propagate_protected_usage(struct page_counter *c,
unsigned long usage)
{
unsigned long protected, old_protected;
unsigned long low, min;
long delta;
if (!c->parent)
return;
min = READ_ONCE(c->min);
if (min || atomic_long_read(&c->min_usage)) {
protected = min(usage, min);
protected = min(usage, READ_ONCE(c->min));
old_protected = atomic_long_read(&c->min_usage);
if (protected != old_protected) {
old_protected = atomic_long_xchg(&c->min_usage, protected);
delta = protected - old_protected;
if (delta)
atomic_long_add(delta, &c->parent->children_min_usage);
}
low = READ_ONCE(c->low);
if (low || atomic_long_read(&c->low_usage)) {
protected = min(usage, low);
protected = min(usage, READ_ONCE(c->low));
old_protected = atomic_long_read(&c->low_usage);
if (protected != old_protected) {
old_protected = atomic_long_xchg(&c->low_usage, protected);
delta = protected - old_protected;
if (delta)

View file

@ -91,6 +91,14 @@ unsigned long page_ext_size = sizeof(struct page_ext);
static unsigned long total_usage;
static struct page_ext *lookup_page_ext(const struct page *page);
bool early_page_ext;
static int __init setup_early_page_ext(char *str)
{
early_page_ext = true;
return 0;
}
early_param("early_page_ext", setup_early_page_ext);
static bool __init invoke_need_callbacks(void)
{
int i;

View file

@ -1300,12 +1300,12 @@ find_vmap_lowest_match(struct rb_root *root, unsigned long size,
#include <linux/random.h>
static struct vmap_area *
find_vmap_lowest_linear_match(unsigned long size,
find_vmap_lowest_linear_match(struct list_head *head, unsigned long size,
unsigned long align, unsigned long vstart)
{
struct vmap_area *va;
list_for_each_entry(va, &free_vmap_area_list, list) {
list_for_each_entry(va, head, list) {
if (!is_within_this_va(va, size, align, vstart))
continue;
@ -1316,7 +1316,8 @@ find_vmap_lowest_linear_match(unsigned long size,
}
static void
find_vmap_lowest_match_check(unsigned long size, unsigned long align)
find_vmap_lowest_match_check(struct rb_root *root, struct list_head *head,
unsigned long size, unsigned long align)
{
struct vmap_area *va_1, *va_2;
unsigned long vstart;
@ -1325,8 +1326,8 @@ find_vmap_lowest_match_check(unsigned long size, unsigned long align)
get_random_bytes(&rnd, sizeof(rnd));
vstart = VMALLOC_START + rnd;
va_1 = find_vmap_lowest_match(size, align, vstart, false);
va_2 = find_vmap_lowest_linear_match(size, align, vstart);
va_1 = find_vmap_lowest_match(root, size, align, vstart, false);
va_2 = find_vmap_lowest_linear_match(head, size, align, vstart);
if (va_1 != va_2)
pr_emerg("not lowest: t: 0x%p, l: 0x%p, v: 0x%lx\n",
@ -1513,7 +1514,7 @@ __alloc_vmap_area(struct rb_root *root, struct list_head *head,
return vend;
#if DEBUG_AUGMENT_LOWEST_MATCH_CHECK
find_vmap_lowest_match_check(size, align);
find_vmap_lowest_match_check(root, head, size, align);
#endif
return nva_start_addr;

View file

@ -4651,16 +4651,17 @@ void kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
if (pgdat->kswapd)
return;
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
BUG_ON(system_state < SYSTEM_RUNNING);
pr_err("Failed to start kswapd on node %d\n", nid);
pgdat->kswapd = NULL;
pgdat_kswapd_lock(pgdat);
if (!pgdat->kswapd) {
pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid);
if (IS_ERR(pgdat->kswapd)) {
/* failure at boot is fatal */
BUG_ON(system_state < SYSTEM_RUNNING);
pr_err("Failed to start kswapd on node %d\n", nid);
pgdat->kswapd = NULL;
}
}
pgdat_kswapd_unlock(pgdat);
}
/*
@ -4669,12 +4670,16 @@ void kswapd_run(int nid)
*/
void kswapd_stop(int nid)
{
struct task_struct *kswapd = NODE_DATA(nid)->kswapd;
pg_data_t *pgdat = NODE_DATA(nid);
struct task_struct *kswapd;
pgdat_kswapd_lock(pgdat);
kswapd = pgdat->kswapd;
if (kswapd) {
kthread_stop(kswapd);
NODE_DATA(nid)->kswapd = NULL;
pgdat->kswapd = NULL;
}
pgdat_kswapd_unlock(pgdat);
}
static int __init kswapd_init(void)