Skip to content

Commit 71ee2fd

Browse files
committed
Merge tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs pipe updates from Christian Brauner: - Introduce struct file_operations pipeanon_fops - Don't update {a,c,m}time for anonymous pipes to avoid the performance costs associated with it - Change pipe_write() to never add a zero-sized buffer - Limit the slots in pipe_resize_ring() - Use pipe_buf() to retrieve the pipe buffer everywhere - Drop an always true check in anon_pipe_write() - Cache 2 pages instead of 1 - Avoid spurious calls to prepare_to_wait_event() in ___wait_event() * tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: fs/splice: Use pipe_buf() helper to retrieve pipe buffer fs/pipe: Use pipe_buf() helper to retrieve pipe buffer kernel/watch_queue: Use pipe_buf() to retrieve the pipe buffer fs/pipe: Limit the slots in pipe_resize_ring() wait: avoid spurious calls to prepare_to_wait_event() in ___wait_event() pipe: cache 2 pages instead of 1 pipe: drop an always true check in anon_pipe_write() pipe: change pipe_write() to never add a zero-sized buffer pipe: don't update {a,c,m}time for anonymous pipes pipe: introduce struct file_operations pipeanon_fops
2 parents fd101da + 3732d8f commit 71ee2fd

File tree

5 files changed

+124
-109
lines changed

5 files changed

+124
-109
lines changed

fs/pipe.c

Lines changed: 103 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -112,20 +112,40 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
112112
pipe_lock(pipe2);
113113
}
114114

115+
static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
116+
{
117+
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
118+
if (pipe->tmp_page[i]) {
119+
struct page *page = pipe->tmp_page[i];
120+
pipe->tmp_page[i] = NULL;
121+
return page;
122+
}
123+
}
124+
125+
return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
126+
}
127+
128+
static void anon_pipe_put_page(struct pipe_inode_info *pipe,
129+
struct page *page)
130+
{
131+
if (page_count(page) == 1) {
132+
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
133+
if (!pipe->tmp_page[i]) {
134+
pipe->tmp_page[i] = page;
135+
return;
136+
}
137+
}
138+
}
139+
140+
put_page(page);
141+
}
142+
115143
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
116144
struct pipe_buffer *buf)
117145
{
118146
struct page *page = buf->page;
119147

120-
/*
121-
* If nobody else uses this page, and we don't already have a
122-
* temporary page, let's keep track of it as a one-deep
123-
* allocation cache. (Otherwise just release our reference to it)
124-
*/
125-
if (page_count(page) == 1 && !pipe->tmp_page)
126-
pipe->tmp_page = page;
127-
else
128-
put_page(page);
148+
anon_pipe_put_page(pipe, page);
129149
}
130150

131151
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
@@ -247,7 +267,7 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
247267
}
248268

249269
static ssize_t
250-
pipe_read(struct kiocb *iocb, struct iov_iter *to)
270+
anon_pipe_read(struct kiocb *iocb, struct iov_iter *to)
251271
{
252272
size_t total_len = iov_iter_count(to);
253273
struct file *filp = iocb->ki_filp;
@@ -274,7 +294,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
274294
/* Read ->head with a barrier vs post_one_notification() */
275295
unsigned int head = smp_load_acquire(&pipe->head);
276296
unsigned int tail = pipe->tail;
277-
unsigned int mask = pipe->ring_size - 1;
278297

279298
#ifdef CONFIG_WATCH_QUEUE
280299
if (pipe->note_loss) {
@@ -301,7 +320,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
301320
#endif
302321

303322
if (!pipe_empty(head, tail)) {
304-
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
323+
struct pipe_buffer *buf = pipe_buf(pipe, tail);
305324
size_t chars = buf->len;
306325
size_t written;
307326
int error;
@@ -359,29 +378,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
359378
break;
360379
}
361380
mutex_unlock(&pipe->mutex);
362-
363381
/*
364382
* We only get here if we didn't actually read anything.
365383
*
366-
* However, we could have seen (and removed) a zero-sized
367-
* pipe buffer, and might have made space in the buffers
368-
* that way.
369-
*
370-
* You can't make zero-sized pipe buffers by doing an empty
371-
* write (not even in packet mode), but they can happen if
372-
* the writer gets an EFAULT when trying to fill a buffer
373-
* that already got allocated and inserted in the buffer
374-
* array.
375-
*
376-
* So we still need to wake up any pending writers in the
377-
* _very_ unlikely case that the pipe was full, but we got
378-
* no data.
379-
*/
380-
if (unlikely(wake_writer))
381-
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
382-
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
383-
384-
/*
385384
* But because we didn't read anything, at this point we can
386385
* just return directly with -ERESTARTSYS if we're interrupted,
387386
* since we've done any required wakeups and there's no need
@@ -390,7 +389,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
390389
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
391390
return -ERESTARTSYS;
392391

393-
wake_writer = false;
394392
wake_next_reader = true;
395393
mutex_lock(&pipe->mutex);
396394
}
@@ -403,8 +401,15 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
403401
if (wake_next_reader)
404402
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
405403
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
404+
return ret;
405+
}
406+
407+
static ssize_t
408+
fifo_pipe_read(struct kiocb *iocb, struct iov_iter *to)
409+
{
410+
int ret = anon_pipe_read(iocb, to);
406411
if (ret > 0)
407-
file_accessed(filp);
412+
file_accessed(iocb->ki_filp);
408413
return ret;
409414
}
410415

@@ -424,7 +429,7 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
424429
}
425430

426431
static ssize_t
427-
pipe_write(struct kiocb *iocb, struct iov_iter *from)
432+
anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
428433
{
429434
struct file *filp = iocb->ki_filp;
430435
struct pipe_inode_info *pipe = filp->private_data;
@@ -471,8 +476,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
471476
was_empty = pipe_empty(head, pipe->tail);
472477
chars = total_len & (PAGE_SIZE-1);
473478
if (chars && !was_empty) {
474-
unsigned int mask = pipe->ring_size - 1;
475-
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
479+
struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
476480
int offset = buf->offset + buf->len;
477481

478482
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
@@ -503,54 +507,44 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
503507

504508
head = pipe->head;
505509
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
506-
unsigned int mask = pipe->ring_size - 1;
507510
struct pipe_buffer *buf;
508-
struct page *page = pipe->tmp_page;
511+
struct page *page;
509512
int copied;
510513

511-
if (!page) {
512-
page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
513-
if (unlikely(!page)) {
514-
ret = ret ? : -ENOMEM;
515-
break;
516-
}
517-
pipe->tmp_page = page;
514+
page = anon_pipe_get_page(pipe);
515+
if (unlikely(!page)) {
516+
if (!ret)
517+
ret = -ENOMEM;
518+
break;
518519
}
519520

520-
/* Allocate a slot in the ring in advance and attach an
521-
* empty buffer. If we fault or otherwise fail to use
522-
* it, either the reader will consume it or it'll still
523-
* be there for the next write.
524-
*/
525-
pipe->head = head + 1;
521+
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
522+
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
523+
anon_pipe_put_page(pipe, page);
524+
if (!ret)
525+
ret = -EFAULT;
526+
break;
527+
}
526528

529+
pipe->head = head + 1;
527530
/* Insert it into the buffer array */
528-
buf = &pipe->bufs[head & mask];
531+
buf = pipe_buf(pipe, head);
529532
buf->page = page;
530533
buf->ops = &anon_pipe_buf_ops;
531534
buf->offset = 0;
532-
buf->len = 0;
533535
if (is_packetized(filp))
534536
buf->flags = PIPE_BUF_FLAG_PACKET;
535537
else
536538
buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
537-
pipe->tmp_page = NULL;
538539

539-
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
540-
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
541-
if (!ret)
542-
ret = -EFAULT;
543-
break;
544-
}
545-
ret += copied;
546540
buf->len = copied;
541+
ret += copied;
547542

548543
if (!iov_iter_count(from))
549544
break;
550-
}
551545

552-
if (!pipe_full(head, pipe->tail, pipe->max_usage))
553546
continue;
547+
}
554548

555549
/* Wait for buffer space to become available. */
556550
if ((filp->f_flags & O_NONBLOCK) ||
@@ -602,11 +596,21 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
602596
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
603597
if (wake_next_writer)
604598
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
605-
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
606-
int err = file_update_time(filp);
607-
if (err)
608-
ret = err;
609-
sb_end_write(file_inode(filp)->i_sb);
599+
return ret;
600+
}
601+
602+
static ssize_t
603+
fifo_pipe_write(struct kiocb *iocb, struct iov_iter *from)
604+
{
605+
int ret = anon_pipe_write(iocb, from);
606+
if (ret > 0) {
607+
struct file *filp = iocb->ki_filp;
608+
if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
609+
int err = file_update_time(filp);
610+
if (err)
611+
ret = err;
612+
sb_end_write(file_inode(filp)->i_sb);
613+
}
610614
}
611615
return ret;
612616
}
@@ -853,8 +857,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
853857
if (pipe->watch_queue)
854858
put_watch_queue(pipe->watch_queue);
855859
#endif
856-
if (pipe->tmp_page)
857-
__free_page(pipe->tmp_page);
860+
for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
861+
if (pipe->tmp_page[i])
862+
__free_page(pipe->tmp_page[i]);
863+
}
858864
kfree(pipe->bufs);
859865
kfree(pipe);
860866
}
@@ -874,6 +880,8 @@ static const struct dentry_operations pipefs_dentry_operations = {
874880
.d_dname = pipefs_dname,
875881
};
876882

883+
static const struct file_operations pipeanon_fops;
884+
877885
static struct inode * get_pipe_inode(void)
878886
{
879887
struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
@@ -891,7 +899,7 @@ static struct inode * get_pipe_inode(void)
891899
inode->i_pipe = pipe;
892900
pipe->files = 2;
893901
pipe->readers = pipe->writers = 1;
894-
inode->i_fop = &pipefifo_fops;
902+
inode->i_fop = &pipeanon_fops;
895903

896904
/*
897905
* Mark the inode dirty from the very beginning,
@@ -934,7 +942,7 @@ int create_pipe_files(struct file **res, int flags)
934942

935943
f = alloc_file_pseudo(inode, pipe_mnt, "",
936944
O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
937-
&pipefifo_fops);
945+
&pipeanon_fops);
938946
if (IS_ERR(f)) {
939947
free_pipe_info(inode->i_pipe);
940948
iput(inode);
@@ -945,7 +953,7 @@ int create_pipe_files(struct file **res, int flags)
945953
f->f_pipe = 0;
946954

947955
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
948-
&pipefifo_fops);
956+
&pipeanon_fops);
949957
if (IS_ERR(res[0])) {
950958
put_pipe_info(inode, inode->i_pipe);
951959
fput(f);
@@ -1109,8 +1117,8 @@ static void wake_up_partner(struct pipe_inode_info *pipe)
11091117

11101118
static int fifo_open(struct inode *inode, struct file *filp)
11111119
{
1120+
bool is_pipe = inode->i_fop == &pipeanon_fops;
11121121
struct pipe_inode_info *pipe;
1113-
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
11141122
int ret;
11151123

11161124
filp->f_pipe = 0;
@@ -1234,8 +1242,19 @@ static int fifo_open(struct inode *inode, struct file *filp)
12341242

12351243
const struct file_operations pipefifo_fops = {
12361244
.open = fifo_open,
1237-
.read_iter = pipe_read,
1238-
.write_iter = pipe_write,
1245+
.read_iter = fifo_pipe_read,
1246+
.write_iter = fifo_pipe_write,
1247+
.poll = pipe_poll,
1248+
.unlocked_ioctl = pipe_ioctl,
1249+
.release = pipe_release,
1250+
.fasync = pipe_fasync,
1251+
.splice_write = iter_file_splice_write,
1252+
};
1253+
1254+
static const struct file_operations pipeanon_fops = {
1255+
.open = fifo_open,
1256+
.read_iter = anon_pipe_read,
1257+
.write_iter = anon_pipe_write,
12391258
.poll = pipe_poll,
12401259
.unlocked_ioctl = pipe_ioctl,
12411260
.release = pipe_release,
@@ -1271,6 +1290,10 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
12711290
struct pipe_buffer *bufs;
12721291
unsigned int head, tail, mask, n;
12731292

1293+
/* nr_slots larger than limits of pipe->{head,tail} */
1294+
if (unlikely(nr_slots > (pipe_index_t)-1u))
1295+
return -EINVAL;
1296+
12741297
bufs = kcalloc(nr_slots, sizeof(*bufs),
12751298
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
12761299
if (unlikely(!bufs))
@@ -1390,7 +1413,9 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
13901413
{
13911414
struct pipe_inode_info *pipe = file->private_data;
13921415

1393-
if (file->f_op != &pipefifo_fops || !pipe)
1416+
if (!pipe)
1417+
return NULL;
1418+
if (file->f_op != &pipefifo_fops && file->f_op != &pipeanon_fops)
13941419
return NULL;
13951420
if (for_splice && pipe_has_watch_queue(pipe))
13961421
return NULL;

0 commit comments

Comments
 (0)