331 lines
9.2 KiB
Diff
331 lines
9.2 KiB
Diff
From: Nick Piggin <npiggin@suse.de>
|
|
Subject: SLAB_DESTROY_BY_RCU for file slab
|
|
Patch-mainline: not yet
|
|
|
|
Use SLAB_DESTROY_BY_RCU for file slab cache. Ensure we have the correct
|
|
object by using a spinlock to protect the refcount rather than having it
|
|
atomic (problem with it being atomic is having to release the last ref
|
|
on a file we have incorrectly picked up a reference to).
|
|
|
|
This improves single threaded repeated open/close performance by 28% by
|
|
avoiding the full RCU cycle.
|
|
|
|
Signed-off-by: Nick Piggin <npiggin@suse.de>
|
|
---
|
|
drivers/net/ppp_generic.c | 4 +-
|
|
drivers/scsi/osst.c | 2 -
|
|
drivers/scsi/st.c | 2 -
|
|
fs/aio.c | 4 +-
|
|
fs/file_table.c | 70 +++++++++++++++++++++++-----------------------
|
|
fs/open.c | 2 -
|
|
include/linux/fs.h | 25 +++++++++++++---
|
|
kernel/perf_event.c | 4 +-
|
|
net/sched/sch_atm.c | 4 +-
|
|
net/unix/garbage.c | 2 -
|
|
10 files changed, 69 insertions(+), 50 deletions(-)
|
|
|
|
--- a/drivers/net/ppp_generic.c
|
|
+++ b/drivers/net/ppp_generic.c
|
|
@@ -590,12 +590,12 @@ static long ppp_ioctl(struct file *file,
|
|
if (file == ppp->owner)
|
|
ppp_shutdown_interface(ppp);
|
|
}
|
|
- if (atomic_long_read(&file->f_count) <= 2) {
|
|
+ if (file->f_count <= 2) {
|
|
ppp_release(NULL, file);
|
|
err = 0;
|
|
} else
|
|
printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%ld\n",
|
|
- atomic_long_read(&file->f_count));
|
|
+ file->f_count);
|
|
unlock_kernel();
|
|
return err;
|
|
}
|
|
--- a/drivers/scsi/osst.c
|
|
+++ b/drivers/scsi/osst.c
|
|
@@ -4824,7 +4824,7 @@ static int os_scsi_tape_flush(struct fil
|
|
struct osst_request * SRpnt = NULL;
|
|
char * name = tape_name(STp);
|
|
|
|
- if (file_count(filp) > 1)
|
|
+ if (filp->f_count > 1)
|
|
return 0;
|
|
|
|
if ((STps->rw == ST_WRITING || STp->dirty) && !STp->pos_unknown) {
|
|
--- a/drivers/scsi/st.c
|
|
+++ b/drivers/scsi/st.c
|
|
@@ -1272,7 +1272,7 @@ static int st_flush(struct file *filp, f
|
|
struct st_partstat *STps = &(STp->ps[STp->partition]);
|
|
char *name = tape_name(STp);
|
|
|
|
- if (file_count(filp) > 1)
|
|
+ if (filp->f_count > 1)
|
|
return 0;
|
|
|
|
if (STps->rw == ST_WRITING && !STp->pos_unknown) {
|
|
--- a/fs/aio.c
|
|
+++ b/fs/aio.c
|
|
@@ -545,7 +545,7 @@ static void aio_fput_routine(struct work
|
|
static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
|
|
{
|
|
dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
|
|
- req, atomic_long_read(&req->ki_filp->f_count));
|
|
+ req, req->ki_filp->f_count);
|
|
|
|
assert_spin_locked(&ctx->ctx_lock);
|
|
|
|
@@ -563,7 +563,7 @@ static int __aio_put_req(struct kioctx *
|
|
* we would not be holding the last reference to the file*, so
|
|
* this function will be executed w/out any aio kthread wakeup.
|
|
*/
|
|
- if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
|
|
+ if (unlikely(file_dec_and_test(req->ki_filp))) {
|
|
get_ioctx(ctx);
|
|
spin_lock(&fput_lock);
|
|
list_add(&req->ki_list, &fput_head);
|
|
--- a/fs/file_table.c
|
|
+++ b/fs/file_table.c
|
|
@@ -40,19 +40,12 @@ static struct kmem_cache *filp_cachep __
|
|
|
|
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
|
|
|
|
-static inline void file_free_rcu(struct rcu_head *head)
|
|
-{
|
|
- struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
|
|
-
|
|
- put_cred(f->f_cred);
|
|
- kmem_cache_free(filp_cachep, f);
|
|
-}
|
|
-
|
|
static inline void file_free(struct file *f)
|
|
{
|
|
percpu_counter_dec(&nr_files);
|
|
file_check_state(f);
|
|
- call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
|
|
+ put_cred(f->f_cred);
|
|
+ kmem_cache_free(filp_cachep, f);
|
|
}
|
|
|
|
/*
|
|
@@ -127,7 +120,7 @@ struct file *get_empty_filp(void)
|
|
goto fail_sec;
|
|
|
|
INIT_LIST_HEAD(&f->f_u.fu_list);
|
|
- atomic_long_set(&f->f_count, 1);
|
|
+ f->f_count = 1;
|
|
rwlock_init(&f->f_owner.lock);
|
|
f->f_cred = get_cred(cred);
|
|
spin_lock_init(&f->f_lock);
|
|
@@ -196,7 +189,7 @@ EXPORT_SYMBOL(alloc_file);
|
|
|
|
void fput(struct file *file)
|
|
{
|
|
- if (atomic_long_dec_and_test(&file->f_count))
|
|
+ if (unlikely(file_dec_and_test(file)))
|
|
__fput(file);
|
|
}
|
|
|
|
@@ -267,21 +260,38 @@ void __fput(struct file *file)
|
|
mntput(mnt);
|
|
}
|
|
|
|
-struct file *fget(unsigned int fd)
|
|
+static inline struct file *get_stable_file(struct files_struct *files, unsigned int fd)
|
|
{
|
|
+ struct fdtable *fdt;
|
|
struct file *file;
|
|
- struct files_struct *files = current->files;
|
|
|
|
rcu_read_lock();
|
|
- file = fcheck_files(files, fd);
|
|
- if (file) {
|
|
- if (!atomic_long_inc_not_zero(&file->f_count)) {
|
|
- /* File object ref couldn't be taken */
|
|
- rcu_read_unlock();
|
|
- return NULL;
|
|
+ fdt = files_fdtable(files);
|
|
+ if (likely(fd < fdt->max_fds)) {
|
|
+ file = rcu_dereference(fdt->fd[fd]);
|
|
+ if (file) {
|
|
+ spin_lock(&file->f_lock);
|
|
+ if (unlikely(file != fdt->fd[fd] || !file->f_count)) {
|
|
+ spin_unlock(&file->f_lock);
|
|
+ file = NULL;
|
|
+ goto out;
|
|
+ }
|
|
+ file->f_count++;
|
|
+ spin_unlock(&file->f_lock);
|
|
}
|
|
- }
|
|
+ } else
|
|
+ file = NULL;
|
|
+out:
|
|
rcu_read_unlock();
|
|
+ return file;
|
|
+}
|
|
+
|
|
+struct file *fget(unsigned int fd)
|
|
+{
|
|
+ struct file *file;
|
|
+ struct files_struct *files = current->files;
|
|
+
|
|
+ file = get_stable_file(files, fd);
|
|
|
|
return file;
|
|
}
|
|
@@ -300,20 +310,12 @@ struct file *fget_light(unsigned int fd,
|
|
struct file *file;
|
|
struct files_struct *files = current->files;
|
|
|
|
- *fput_needed = 0;
|
|
if (likely((atomic_read(&files->count) == 1))) {
|
|
+ *fput_needed = 0;
|
|
file = fcheck_files(files, fd);
|
|
} else {
|
|
- rcu_read_lock();
|
|
- file = fcheck_files(files, fd);
|
|
- if (file) {
|
|
- if (atomic_long_inc_not_zero(&file->f_count))
|
|
- *fput_needed = 1;
|
|
- else
|
|
- /* Didn't get the reference, someone's freed */
|
|
- file = NULL;
|
|
- }
|
|
- rcu_read_unlock();
|
|
+ *fput_needed = 1;
|
|
+ file = get_stable_file(files, fd);
|
|
}
|
|
|
|
return file;
|
|
@@ -322,7 +324,7 @@ struct file *fget_light(unsigned int fd,
|
|
|
|
void put_filp(struct file *file)
|
|
{
|
|
- if (atomic_long_dec_and_test(&file->f_count)) {
|
|
+ if (unlikely(file_dec_and_test(file))) {
|
|
security_file_free(file);
|
|
file_kill(file);
|
|
file_free(file);
|
|
@@ -388,7 +390,7 @@ retry:
|
|
struct vfsmount *mnt;
|
|
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
|
|
continue;
|
|
- if (!file_count(f))
|
|
+ if (!f->f_count)
|
|
continue;
|
|
if (!(f->f_mode & FMODE_WRITE))
|
|
continue;
|
|
@@ -414,7 +416,7 @@ void __init files_init(unsigned long mem
|
|
int n;
|
|
|
|
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
|
|
- SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
|
+ SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU | SLAB_PANIC, NULL);
|
|
|
|
/*
|
|
* One file with associated inode and dcache is very roughly 1K.
|
|
--- a/fs/open.c
|
|
+++ b/fs/open.c
|
|
@@ -1114,7 +1114,7 @@ int filp_close(struct file *filp, fl_own
|
|
{
|
|
int retval = 0;
|
|
|
|
- if (!file_count(filp)) {
|
|
+ if (unlikely(!filp->f_count)) {
|
|
printk(KERN_ERR "VFS: Close: file count is 0\n");
|
|
return 0;
|
|
}
|
|
--- a/include/linux/fs.h
|
|
+++ b/include/linux/fs.h
|
|
@@ -920,9 +920,10 @@ struct file {
|
|
#define f_dentry f_path.dentry
|
|
#define f_vfsmnt f_path.mnt
|
|
const struct file_operations *f_op;
|
|
- spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
|
|
- atomic_long_t f_count;
|
|
+ /* f_lock protects f_count, f_ep_links, f_flags, no IRQ */
|
|
+ spinlock_t f_lock;
|
|
unsigned int f_flags;
|
|
+ long f_count;
|
|
fmode_t f_mode;
|
|
loff_t f_pos;
|
|
struct fown_struct f_owner;
|
|
@@ -949,8 +950,24 @@ extern spinlock_t files_lock;
|
|
#define file_list_lock() spin_lock(&files_lock);
|
|
#define file_list_unlock() spin_unlock(&files_lock);
|
|
|
|
-#define get_file(x) atomic_long_inc(&(x)->f_count)
|
|
-#define file_count(x) atomic_long_read(&(x)->f_count)
|
|
+static inline void get_file(struct file *f)
|
|
+{
|
|
+ spin_lock(&f->f_lock);
|
|
+ f->f_count++;
|
|
+ spin_unlock(&f->f_lock);
|
|
+}
|
|
+
|
|
+static inline int file_dec_and_test(struct file *f)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ spin_lock(&f->f_lock);
|
|
+ f->f_count--;
|
|
+ ret = (f->f_count == 0);
|
|
+ spin_unlock(&f->f_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
|
|
#ifdef CONFIG_DEBUG_WRITECOUNT
|
|
static inline void file_take_write(struct file *f)
|
|
--- a/kernel/perf_event.c
|
|
+++ b/kernel/perf_event.c
|
|
@@ -4617,7 +4617,7 @@ static int perf_event_set_output(struct
|
|
if (event->data)
|
|
goto out;
|
|
|
|
- atomic_long_inc(&output_file->f_count);
|
|
+ get_file(output_file);
|
|
|
|
set:
|
|
mutex_lock(&event->mmap_mutex);
|
|
@@ -4878,7 +4878,7 @@ inherit_event(struct perf_event *parent_
|
|
* we are in the parent and we know that the filp still
|
|
* exists and has a nonzero count:
|
|
*/
|
|
- atomic_long_inc(&parent_event->filp->f_count);
|
|
+ get_file(parent_event->filp);
|
|
|
|
/*
|
|
* Link this into the parent event's child list
|
|
--- a/net/sched/sch_atm.c
|
|
+++ b/net/sched/sch_atm.c
|
|
@@ -164,7 +164,7 @@ static void atm_tc_put(struct Qdisc *sch
|
|
tcf_destroy_chain(&flow->filter_list);
|
|
if (flow->sock) {
|
|
pr_debug("atm_tc_put: f_count %ld\n",
|
|
- file_count(flow->sock->file));
|
|
+ flow->sock->file->f_count);
|
|
flow->vcc->pop = flow->old_pop;
|
|
sockfd_put(flow->sock);
|
|
}
|
|
@@ -260,7 +260,7 @@ static int atm_tc_change(struct Qdisc *s
|
|
sock = sockfd_lookup(fd, &error);
|
|
if (!sock)
|
|
return error; /* f_count++ */
|
|
- pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
|
|
+ pr_debug("atm_tc_change: f_count %ld\n", sock->file->f_count);
|
|
if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
|
|
error = -EPROTOTYPE;
|
|
goto err_out;
|
|
--- a/net/unix/garbage.c
|
|
+++ b/net/unix/garbage.c
|
|
@@ -311,7 +311,7 @@ void unix_gc(void)
|
|
long total_refs;
|
|
long inflight_refs;
|
|
|
|
- total_refs = file_count(u->sk.sk_socket->file);
|
|
+ total_refs = u->sk.sk_socket->file->f_count;
|
|
inflight_refs = atomic_long_read(&u->inflight);
|
|
|
|
BUG_ON(inflight_refs < 1);
|