340 lines
9.6 KiB
Diff
340 lines
9.6 KiB
Diff
|
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||
|
Subject: [PATCH 27/31] nfs: enable swap on NFS
|
||
|
Patch-mainline: not yet
|
||
|
|
||
|
Implement all the new swapfile a_ops for NFS. This will set the NFS socket to
|
||
|
SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset
|
||
|
SOCK_MEMALLOC before engaging the protocol ->connect() method.
|
||
|
|
||
|
PF_MEMALLOC should allow the allocation of struct socket and related objects
|
||
|
and the early (re)setting of SOCK_MEMALLOC should allow us to receive the
|
||
|
packets required for the TCP connection buildup.
|
||
|
|
||
|
(swapping continues over a server reset during heavy network traffic)
|
||
|
|
||
|
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
|
||
|
Signed-off-by: Suresh Jayaraman <sjayaraman@suse.de>
|
||
|
---
|
||
|
fs/nfs/Kconfig | 10 ++++++
|
||
|
fs/nfs/file.c | 18 +++++++++++
|
||
|
fs/nfs/write.c | 22 +++++++++++++
|
||
|
include/linux/nfs_fs.h | 2 +
|
||
|
include/linux/sunrpc/xprt.h | 5 ++-
|
||
|
net/sunrpc/Kconfig | 5 +++
|
||
|
net/sunrpc/sched.c | 9 ++++-
|
||
|
net/sunrpc/xprtsock.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
|
||
|
8 files changed, 138 insertions(+), 3 deletions(-)
|
||
|
|
||
|
--- a/fs/nfs/Kconfig
|
||
|
+++ b/fs/nfs/Kconfig
|
||
|
@@ -74,6 +74,16 @@ config NFS_V4
|
||
|
|
||
|
If unsure, say N.
|
||
|
|
||
|
+config NFS_SWAP
|
||
|
+ bool "Provide swap over NFS support"
|
||
|
+ default n
|
||
|
+ depends on NFS_FS
|
||
|
+ select SUNRPC_SWAP
|
||
|
+ help
|
||
|
+ This option enables swapon to work on files located on NFS mounts.
|
||
|
+
|
||
|
+ For more details, see Documentation/network-swap.txt
|
||
|
+
|
||
|
config NFS_V4_1
|
||
|
bool "NFS client support for NFSv4.1 (DEVELOPER ONLY)"
|
||
|
depends on NFS_V4 && EXPERIMENTAL
|
||
|
--- a/fs/nfs/file.c
|
||
|
+++ b/fs/nfs/file.c
|
||
|
@@ -519,6 +519,18 @@ static int nfs_launder_page(struct page
|
||
|
return nfs_wb_page(inode, page);
|
||
|
}
|
||
|
|
||
|
+#ifdef CONFIG_NFS_SWAP
|
||
|
+static int nfs_swapon(struct file *file)
|
||
|
+{
|
||
|
+ return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
|
||
|
+}
|
||
|
+
|
||
|
+static int nfs_swapoff(struct file *file)
|
||
|
+{
|
||
|
+ return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
const struct address_space_operations nfs_file_aops = {
|
||
|
.readpage = nfs_readpage,
|
||
|
.readpages = nfs_readpages,
|
||
|
@@ -533,6 +545,12 @@ const struct address_space_operations nf
|
||
|
.migratepage = nfs_migrate_page,
|
||
|
.launder_page = nfs_launder_page,
|
||
|
.error_remove_page = generic_error_remove_page,
|
||
|
+#ifdef CONFIG_NFS_SWAP
|
||
|
+ .swapon = nfs_swapon,
|
||
|
+ .swapoff = nfs_swapoff,
|
||
|
+ .swap_out = nfs_swap_out,
|
||
|
+ .swap_in = nfs_readpage,
|
||
|
+#endif
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
--- a/fs/nfs/write.c
|
||
|
+++ b/fs/nfs/write.c
|
||
|
@@ -356,6 +356,28 @@ int nfs_writepage(struct page *page, str
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
|
||
|
+ unsigned int offset, unsigned int count);
|
||
|
+
|
||
|
+int nfs_swap_out(struct file *file, struct page *page,
|
||
|
+ struct writeback_control *wbc)
|
||
|
+{
|
||
|
+ struct nfs_open_context *ctx = nfs_file_open_context(file);
|
||
|
+ int status;
|
||
|
+
|
||
|
+ status = nfs_writepage_setup(ctx, page, 0, nfs_page_length(page));
|
||
|
+ if (status < 0) {
|
||
|
+ nfs_set_pageerror(page);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ status = nfs_writepage_locked(page, wbc);
|
||
|
+
|
||
|
+out:
|
||
|
+ unlock_page(page);
|
||
|
+ return status;
|
||
|
+}
|
||
|
+
|
||
|
static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
|
||
|
{
|
||
|
int ret;
|
||
|
--- a/include/linux/nfs_fs.h
|
||
|
+++ b/include/linux/nfs_fs.h
|
||
|
@@ -469,6 +469,8 @@ extern int nfs_writepages(struct addres
|
||
|
extern int nfs_flush_incompatible(struct file *file, struct page *page);
|
||
|
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
|
||
|
extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
|
||
|
+extern int nfs_swap_out(struct file *file, struct page *page,
|
||
|
+ struct writeback_control *wbc);
|
||
|
|
||
|
/*
|
||
|
* Try to write back everything synchronously (but check the
|
||
|
--- a/include/linux/sunrpc/xprt.h
|
||
|
+++ b/include/linux/sunrpc/xprt.h
|
||
|
@@ -168,7 +168,9 @@ struct rpc_xprt {
|
||
|
unsigned int max_reqs; /* total slots */
|
||
|
unsigned long state; /* transport state */
|
||
|
unsigned char shutdown : 1, /* being shut down */
|
||
|
- resvport : 1; /* use a reserved port */
|
||
|
+ resvport : 1, /* use a reserved port */
|
||
|
+ swapper : 1; /* we're swapping over this
|
||
|
+ transport */
|
||
|
unsigned int bind_index; /* bind function index */
|
||
|
|
||
|
/*
|
||
|
@@ -302,6 +304,7 @@ void xprt_release_rqst_cong(struct rpc
|
||
|
void xprt_disconnect_done(struct rpc_xprt *xprt);
|
||
|
void xprt_force_disconnect(struct rpc_xprt *xprt);
|
||
|
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
|
||
|
+int xs_swapper(struct rpc_xprt *xprt, int enable);
|
||
|
|
||
|
/*
|
||
|
* Reserved bit positions in xprt->state
|
||
|
--- a/net/sunrpc/Kconfig
|
||
|
+++ b/net/sunrpc/Kconfig
|
||
|
@@ -17,6 +17,11 @@ config SUNRPC_XPRT_RDMA
|
||
|
|
||
|
If unsure, say N.
|
||
|
|
||
|
+config SUNRPC_SWAP
|
||
|
+ def_bool n
|
||
|
+ depends on SUNRPC
|
||
|
+ select NETVM
|
||
|
+
|
||
|
config RPCSEC_GSS_KRB5
|
||
|
tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
|
||
|
depends on SUNRPC && EXPERIMENTAL
|
||
|
--- a/net/sunrpc/sched.c
|
||
|
+++ b/net/sunrpc/sched.c
|
||
|
@@ -747,7 +747,10 @@ static void rpc_async_schedule(struct wo
|
||
|
void *rpc_malloc(struct rpc_task *task, size_t size)
|
||
|
{
|
||
|
struct rpc_buffer *buf;
|
||
|
- gfp_t gfp = RPC_IS_SWAPPER(task) ? GFP_ATOMIC : GFP_NOWAIT;
|
||
|
+ gfp_t gfp = GFP_NOWAIT;
|
||
|
+
|
||
|
+ if (RPC_IS_SWAPPER(task))
|
||
|
+ gfp |= __GFP_MEMALLOC;
|
||
|
|
||
|
size += sizeof(struct rpc_buffer);
|
||
|
if (size <= RPC_BUFFER_MAXSIZE)
|
||
|
@@ -818,6 +821,8 @@ static void rpc_init_task(struct rpc_tas
|
||
|
kref_get(&task->tk_client->cl_kref);
|
||
|
if (task->tk_client->cl_softrtry)
|
||
|
task->tk_flags |= RPC_TASK_SOFT;
|
||
|
+ if (task->tk_client->cl_xprt->swapper)
|
||
|
+ task->tk_flags |= RPC_TASK_SWAPPER;
|
||
|
}
|
||
|
|
||
|
if (task->tk_ops->rpc_call_prepare != NULL)
|
||
|
@@ -843,7 +848,7 @@ static void rpc_init_task(struct rpc_tas
|
||
|
static struct rpc_task *
|
||
|
rpc_alloc_task(void)
|
||
|
{
|
||
|
- return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
|
||
|
+ return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOIO);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
--- a/net/sunrpc/xprtsock.c
|
||
|
+++ b/net/sunrpc/xprtsock.c
|
||
|
@@ -1642,6 +1642,57 @@ static inline void xs_reclassify_socket6
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
+#ifdef CONFIG_SUNRPC_SWAP
|
||
|
+static void xs_set_memalloc(struct rpc_xprt *xprt)
|
||
|
+{
|
||
|
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
|
||
|
+ xprt);
|
||
|
+
|
||
|
+ if (xprt->swapper)
|
||
|
+ sk_set_memalloc(transport->inet);
|
||
|
+}
|
||
|
+
|
||
|
+#define RPC_BUF_RESERVE_PAGES \
|
||
|
+ kmalloc_estimate_objs(sizeof(struct rpc_rqst), GFP_KERNEL, RPC_MAX_SLOT_TABLE)
|
||
|
+#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
|
||
|
+
|
||
|
+/**
|
||
|
+ * xs_swapper - Tag this transport as being used for swap.
|
||
|
+ * @xprt: transport to tag
|
||
|
+ * @enable: enable/disable
|
||
|
+ *
|
||
|
+ */
|
||
|
+int xs_swapper(struct rpc_xprt *xprt, int enable)
|
||
|
+{
|
||
|
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
|
||
|
+ xprt);
|
||
|
+ int err = 0;
|
||
|
+
|
||
|
+ if (enable) {
|
||
|
+ /*
|
||
|
+ * keep one extra sock reference so the reserve won't dip
|
||
|
+ * when the socket gets reconnected.
|
||
|
+ */
|
||
|
+ err = sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
|
||
|
+ if (!err) {
|
||
|
+ xprt->swapper = 1;
|
||
|
+ xs_set_memalloc(xprt);
|
||
|
+ }
|
||
|
+ } else if (xprt->swapper) {
|
||
|
+ xprt->swapper = 0;
|
||
|
+ sk_clear_memalloc(transport->inet);
|
||
|
+ sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
|
||
|
+ }
|
||
|
+
|
||
|
+ return err;
|
||
|
+}
|
||
|
+EXPORT_SYMBOL_GPL(xs_swapper);
|
||
|
+#else
|
||
|
+static void xs_set_memalloc(struct rpc_xprt *xprt)
|
||
|
+{
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
|
||
|
{
|
||
|
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
|
||
|
@@ -1666,6 +1717,8 @@ static void xs_udp_finish_connecting(str
|
||
|
transport->sock = sock;
|
||
|
transport->inet = sk;
|
||
|
|
||
|
+ xs_set_memalloc(xprt);
|
||
|
+
|
||
|
write_unlock_bh(&sk->sk_callback_lock);
|
||
|
}
|
||
|
xs_udp_do_set_buffer_size(xprt);
|
||
|
@@ -1683,11 +1736,15 @@ static void xs_udp_connect_worker4(struc
|
||
|
container_of(work, struct sock_xprt, connect_worker.work);
|
||
|
struct rpc_xprt *xprt = &transport->xprt;
|
||
|
struct socket *sock = transport->sock;
|
||
|
+ unsigned long pflags = current->flags;
|
||
|
int err, status = -EIO;
|
||
|
|
||
|
if (xprt->shutdown)
|
||
|
goto out;
|
||
|
|
||
|
+ if (xprt->swapper)
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+
|
||
|
/* Start by resetting any existing state */
|
||
|
xs_reset_transport(transport);
|
||
|
|
||
|
@@ -1714,6 +1771,7 @@ static void xs_udp_connect_worker4(struc
|
||
|
out:
|
||
|
xprt_clear_connecting(xprt);
|
||
|
xprt_wake_pending_tasks(xprt, status);
|
||
|
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
@@ -1728,11 +1786,15 @@ static void xs_udp_connect_worker6(struc
|
||
|
container_of(work, struct sock_xprt, connect_worker.work);
|
||
|
struct rpc_xprt *xprt = &transport->xprt;
|
||
|
struct socket *sock = transport->sock;
|
||
|
+ unsigned long pflags = current->flags;
|
||
|
int err, status = -EIO;
|
||
|
|
||
|
if (xprt->shutdown)
|
||
|
goto out;
|
||
|
|
||
|
+ if (xprt->swapper)
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+
|
||
|
/* Start by resetting any existing state */
|
||
|
xs_reset_transport(transport);
|
||
|
|
||
|
@@ -1759,6 +1821,7 @@ static void xs_udp_connect_worker6(struc
|
||
|
out:
|
||
|
xprt_clear_connecting(xprt);
|
||
|
xprt_wake_pending_tasks(xprt, status);
|
||
|
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
@@ -1833,6 +1896,8 @@ static int xs_tcp_finish_connecting(stru
|
||
|
if (!xprt_bound(xprt))
|
||
|
return -ENOTCONN;
|
||
|
|
||
|
+ xs_set_memalloc(xprt);
|
||
|
+
|
||
|
/* Tell the socket layer to start connecting... */
|
||
|
xprt->stat.connect_count++;
|
||
|
xprt->stat.connect_start = jiffies;
|
||
|
@@ -1853,11 +1918,15 @@ static void xs_tcp_setup_socket(struct r
|
||
|
struct sock_xprt *))
|
||
|
{
|
||
|
struct socket *sock = transport->sock;
|
||
|
+ unsigned long pflags = current->flags;
|
||
|
int status = -EIO;
|
||
|
|
||
|
if (xprt->shutdown)
|
||
|
goto out;
|
||
|
|
||
|
+ if (xprt->swapper)
|
||
|
+ current->flags |= PF_MEMALLOC;
|
||
|
+
|
||
|
if (!sock) {
|
||
|
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
|
||
|
sock = create_sock(xprt, transport);
|
||
|
@@ -1918,6 +1987,7 @@ out_eagain:
|
||
|
out:
|
||
|
xprt_clear_connecting(xprt);
|
||
|
xprt_wake_pending_tasks(xprt, status);
|
||
|
+ tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
||
|
}
|
||
|
|
||
|
static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
|