Skip to content

Commit 1bb0e85

Browse files
author
CKI KWF Bot
committed
Merge: MM: revert POSIX_FADV_NOREUSE
MR: https://gitlab.com/redhat/rhel/src/kernel/rhel-9/-/merge_requests/3517 JIRA: https://issues.redhat.com/browse/RHEL-80655 Upstream Status: RHEL-only This series reverts the introduction of POSIX_FADV_NOREUSE in 9.6. As documented in the jira ticket, it was found that downstream commit d908e31 seems responsible for a 10-20x slow down in OCP's etcd compaction operation which may be due to increased OCP API latency. We believe that d908e31 is regressing MADV_RANDOM in a way that causes performance degradation for applications using this hint, as after this commit the pages backing the VMAs that are marked for random access will not receive a second chance to be re-activated once they are in the LRU inactive list. Signed-off-by: Luiz Capitulino <[email protected]> Approved-by: Rafael Aquini <[email protected]> Approved-by: Aristeu Rozanski <[email protected]> Approved-by: Waiman Long <[email protected]> Approved-by: CKI KWF Bot <[email protected]> Merged-by: CKI KWF Bot <[email protected]>
2 parents f1a9fd9 + 7535c3f commit 1bb0e85

File tree

6 files changed

+30
-43
lines changed

6 files changed

+30
-43
lines changed

include/linux/fs.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
163163
/* File supports DIRECT IO */
164164
#define FMODE_CAN_ODIRECT ((__force fmode_t)0x400000)
165165

166-
#define FMODE_NOREUSE ((__force fmode_t)0x800000)
167-
168166
/* File supports non-exclusive O_DIRECT writes from multiple threads */
169167
#define FMODE_DIO_PARALLEL_WRITE ((__force fmode_t)0x1000000)
170168

include/linux/mm_inline.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -579,15 +579,4 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr,
579579
#endif
580580
}
581581

582-
static inline bool vma_has_recency(struct vm_area_struct *vma)
583-
{
584-
if (vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))
585-
return false;
586-
587-
if (vma->vm_file && (vma->vm_file->f_mode & FMODE_NOREUSE))
588-
return false;
589-
590-
return true;
591-
}
592-
593582
#endif

mm/fadvise.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
7979
case POSIX_FADV_NORMAL:
8080
file->f_ra.ra_pages = bdi->ra_pages;
8181
spin_lock(&file->f_lock);
82-
file->f_mode &= ~(FMODE_RANDOM | FMODE_NOREUSE);
82+
file->f_mode &= ~FMODE_RANDOM;
8383
spin_unlock(&file->f_lock);
8484
break;
8585
case POSIX_FADV_RANDOM:
@@ -106,9 +106,6 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
106106
force_page_cache_readahead(mapping, file, start_index, nrpages);
107107
break;
108108
case POSIX_FADV_NOREUSE:
109-
spin_lock(&file->f_lock);
110-
file->f_mode |= FMODE_NOREUSE;
111-
spin_unlock(&file->f_lock);
112109
break;
113110
case POSIX_FADV_DONTNEED:
114111
__filemap_fdatawrite_range(mapping, offset, endbyte,

mm/memory.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,7 +1460,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
14601460
force_flush = 1;
14611461
}
14621462
}
1463-
if (pte_young(ptent) && likely(vma_has_recency(vma)))
1463+
if (pte_young(ptent) &&
1464+
likely(!(vma->vm_flags & VM_SEQ_READ)))
14641465
mark_page_accessed(page);
14651466
}
14661467
rss[mm_counter(page)]--;
@@ -5252,8 +5253,8 @@ static inline void mm_account_fault(struct mm_struct *mm, struct pt_regs *regs,
52525253
#ifdef CONFIG_LRU_GEN
52535254
static void lru_gen_enter_fault(struct vm_area_struct *vma)
52545255
{
5255-
/* the LRU algorithm only applies to accesses with recency */
5256-
current->in_lru_fault = vma_has_recency(vma);
5256+
/* the LRU algorithm doesn't apply to sequential or random reads */
5257+
current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ));
52575258
}
52585259

52595260
static void lru_gen_exit_fault(void)

mm/rmap.c

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -821,15 +821,25 @@ static bool folio_referenced_one(struct folio *folio,
821821
}
822822

823823
if (pvmw.pte) {
824-
if (lru_gen_enabled() &&
825-
pte_young(ptep_get(pvmw.pte))) {
824+
if (lru_gen_enabled() && pte_young(ptep_get(pvmw.pte)) &&
825+
!(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) {
826826
lru_gen_look_around(&pvmw);
827827
referenced++;
828828
}
829829

830830
if (ptep_clear_flush_young_notify(vma, address,
831-
pvmw.pte))
832-
referenced++;
831+
pvmw.pte)) {
832+
/*
833+
* Don't treat a reference through
834+
* a sequentially read mapping as such.
835+
* If the folio has been used in another mapping,
836+
* we will catch it; if this other mapping is
837+
* already gone, the unmap path will have set
838+
* the referenced flag or activated the folio.
839+
*/
840+
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
841+
referenced++;
842+
}
833843
} else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
834844
if (pmdp_clear_flush_young_notify(vma, address,
835845
pvmw.pmd))
@@ -863,20 +873,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)
863873
struct folio_referenced_arg *pra = arg;
864874
struct mem_cgroup *memcg = pra->memcg;
865875

866-
/*
867-
* Ignore references from this mapping if it has no recency. If the
868-
* folio has been used in another mapping, we will catch it; if this
869-
* other mapping is already gone, the unmap path will have set the
870-
* referenced flag or activated the folio in zap_pte_range().
871-
*/
872-
if (!vma_has_recency(vma))
873-
return true;
874-
875-
/*
876-
* If we are reclaiming on behalf of a cgroup, skip counting on behalf
877-
* of references from different cgroups.
878-
*/
879-
if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
876+
if (!mm_match_cgroup(vma->vm_mm, memcg))
880877
return true;
881878

882879
return false;
@@ -907,7 +904,6 @@ int folio_referenced(struct folio *folio, int is_locked,
907904
.arg = (void *)&pra,
908905
.anon_lock = folio_lock_anon_vma_read,
909906
.try_lock = true,
910-
.invalid_vma = invalid_folio_referenced_vma,
911907
};
912908

913909
*vm_flags = 0;
@@ -923,6 +919,15 @@ int folio_referenced(struct folio *folio, int is_locked,
923919
return 1;
924920
}
925921

922+
/*
923+
* If we are reclaiming on behalf of a cgroup, skip
924+
* counting on behalf of references from different
925+
* cgroups
926+
*/
927+
if (memcg) {
928+
rwc.invalid_vma = invalid_folio_referenced_vma;
929+
}
930+
926931
rmap_walk(folio, &rwc);
927932
*vm_flags = pra.vm_flags;
928933

mm/vmscan.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3161,10 +3161,7 @@ static int should_skip_vma(unsigned long start, unsigned long end, struct mm_wal
31613161
if (is_vm_hugetlb_page(vma))
31623162
return true;
31633163

3164-
if (!vma_has_recency(vma))
3165-
return true;
3166-
3167-
if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL))
3164+
if (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ))
31683165
return true;
31693166

31703167
if (vma == get_gate_vma(vma->vm_mm))

0 commit comments

Comments
 (0)