1244 lines
34 KiB
Diff
1244 lines
34 KiB
Diff
From: Mark Fasheh <mfasheh@suse.com>
|
|
Date: Thu, 19 Nov 2009 15:15:38 -0800
|
|
Subject: ocfs2: allocation reservations
|
|
Patch-mainline: 2.6.33?
|
|
References: bnc#501563 FATE#307247
|
|
|
|
ocfs2: allocation reservations
|
|
|
|
This patch improves Ocfs2 allocation policy by allowing an inode to
|
|
reserve a portion of the local alloc bitmap for itself. Allocation windows
|
|
are advisory in that they won't block use of that portion of the bitmap.
|
|
This makes dealing with corner cases much easier - we can always fall back
|
|
to previous policy.
|
|
|
|
Reservation windows are represented internally by a red-black tree. Within
|
|
that tree, each node represents the reservation window of one inode. When
|
|
new data is written, we try to allocate from the window first. If that
|
|
allocation fails, we fall back to our old heuristics and a new window is
|
|
computed from the results. Allocation windows will also be extended if
|
|
allocation from them succeeds.
|
|
|
|
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
|
|
|
|
---
|
|
Documentation/filesystems/ocfs2.txt | 3
|
|
fs/ocfs2/Makefile | 1
|
|
fs/ocfs2/aops.c | 2
|
|
fs/ocfs2/cluster/masklog.c | 1
|
|
fs/ocfs2/cluster/masklog.h | 1
|
|
fs/ocfs2/dir.c | 2
|
|
fs/ocfs2/file.c | 19 +
|
|
fs/ocfs2/inode.c | 4
|
|
fs/ocfs2/inode.h | 2
|
|
fs/ocfs2/localalloc.c | 39 +-
|
|
fs/ocfs2/ocfs2.h | 5
|
|
fs/ocfs2/reservations.c | 668 ++++++++++++++++++++++++++++++++++++
|
|
fs/ocfs2/reservations.h | 151 ++++++++
|
|
fs/ocfs2/suballoc.c | 1
|
|
fs/ocfs2/suballoc.h | 2
|
|
fs/ocfs2/super.c | 27 +
|
|
16 files changed, 922 insertions(+), 6 deletions(-)
|
|
|
|
--- a/Documentation/filesystems/ocfs2.txt
|
|
+++ b/Documentation/filesystems/ocfs2.txt
|
|
@@ -80,3 +80,6 @@ user_xattr (*) Enables Extended User Att
|
|
nouser_xattr Disables Extended User Attributes.
|
|
acl Enables POSIX Access Control Lists support.
|
|
noacl (*) Disables POSIX Access Control Lists support.
|
|
+resv_level=3 (*) Set how agressive allocation reservations will be.
|
|
+ Valid values are between 0 (reservations off) to 6
|
|
+ (maximum space for reservations).
|
|
--- a/fs/ocfs2/Makefile
|
|
+++ b/fs/ocfs2/Makefile
|
|
@@ -29,6 +29,7 @@ ocfs2-objs := \
|
|
mmap.o \
|
|
namei.o \
|
|
refcounttree.o \
|
|
+ reservations.o \
|
|
resize.o \
|
|
slot_map.o \
|
|
suballoc.o \
|
|
--- a/fs/ocfs2/aops.c
|
|
+++ b/fs/ocfs2/aops.c
|
|
@@ -1735,6 +1735,8 @@ int ocfs2_write_begin_nolock(struct addr
|
|
goto out;
|
|
}
|
|
|
|
+ data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
|
|
+
|
|
credits = ocfs2_calc_extend_credits(inode->i_sb,
|
|
&di->id2.i_list,
|
|
clusters_to_alloc);
|
|
--- a/fs/ocfs2/cluster/masklog.c
|
|
+++ b/fs/ocfs2/cluster/masklog.c
|
|
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[
|
|
define_mask(ERROR),
|
|
define_mask(NOTICE),
|
|
define_mask(KTHREAD),
|
|
+ define_mask(RESERVATIONS),
|
|
};
|
|
|
|
static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
|
|
--- a/fs/ocfs2/cluster/masklog.h
|
|
+++ b/fs/ocfs2/cluster/masklog.h
|
|
@@ -119,6 +119,7 @@
|
|
#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
|
|
#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
|
|
#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
|
|
+#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
|
|
|
|
#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
|
|
#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
|
|
--- a/fs/ocfs2/dir.c
|
|
+++ b/fs/ocfs2/dir.c
|
|
@@ -2991,6 +2991,7 @@ static int ocfs2_expand_inline_dir(struc
|
|
* if we only get one now, that's enough to continue. The rest
|
|
* will be claimed after the conversion to extents.
|
|
*/
|
|
+ data_ac->ac_resv = &oi->ip_la_data_resv;
|
|
ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
|
|
if (ret) {
|
|
mlog_errno(ret);
|
|
@@ -3368,6 +3369,7 @@ static int ocfs2_extend_dir(struct ocfs2
|
|
mlog_errno(status);
|
|
goto bail;
|
|
}
|
|
+ data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
|
|
|
|
credits = ocfs2_calc_extend_credits(sb, el, 1);
|
|
} else {
|
|
--- a/fs/ocfs2/file.c
|
|
+++ b/fs/ocfs2/file.c
|
|
@@ -147,6 +147,7 @@ leave:
|
|
static int ocfs2_file_release(struct inode *inode, struct file *file)
|
|
{
|
|
struct ocfs2_inode_info *oi = OCFS2_I(inode);
|
|
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
|
|
|
mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file,
|
|
file->f_path.dentry->d_name.len,
|
|
@@ -157,6 +158,21 @@ static int ocfs2_file_release(struct ino
|
|
oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT;
|
|
spin_unlock(&oi->ip_lock);
|
|
|
|
+#if 0
|
|
+ /*
|
|
+ * Disable this for now. Keeping the reservation around a bit
|
|
+ * longer gives an improvement for workloads which rapidly do
|
|
+ * open()/write()/close() against a file.
|
|
+ */
|
|
+ if ((file->f_mode & FMODE_WRITE) &&
|
|
+ (atomic_read(&inode->i_writecount) == 1)) {
|
|
+ down_write(&oi->ip_alloc_sem);
|
|
+ ocfs2_resv_discard(&osb->osb_la_resmap,
|
|
+ &oi->ip_la_data_resv);
|
|
+ up_write(&oi->ip_alloc_sem);
|
|
+ }
|
|
+#endif
|
|
+
|
|
ocfs2_free_file_private(inode, file);
|
|
|
|
mlog_exit(0);
|
|
@@ -488,6 +504,9 @@ static int ocfs2_truncate_file(struct in
|
|
|
|
down_write(&OCFS2_I(inode)->ip_alloc_sem);
|
|
|
|
+ ocfs2_resv_discard(&osb->osb_la_resmap,
|
|
+ &OCFS2_I(inode)->ip_la_data_resv);
|
|
+
|
|
/*
|
|
* The inode lock forced other nodes to sync and drop their
|
|
* pages, which (correctly) happens even if we have a truncate
|
|
--- a/fs/ocfs2/inode.c
|
|
+++ b/fs/ocfs2/inode.c
|
|
@@ -1101,6 +1101,10 @@ void ocfs2_clear_inode(struct inode *ino
|
|
ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
|
|
ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
|
|
|
|
+ ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
|
|
+ &oi->ip_la_data_resv);
|
|
+ ocfs2_resv_init_once(&oi->ip_la_data_resv);
|
|
+
|
|
/* We very well may get a clear_inode before all an inodes
|
|
* metadata has hit disk. Of course, we can't drop any cluster
|
|
* locks until the journal has finished with it. The only
|
|
--- a/fs/ocfs2/inode.h
|
|
+++ b/fs/ocfs2/inode.h
|
|
@@ -70,6 +70,8 @@ struct ocfs2_inode_info
|
|
/* Only valid if the inode is the dir. */
|
|
u32 ip_last_used_slot;
|
|
u64 ip_last_used_group;
|
|
+
|
|
+ struct ocfs2_alloc_reservation ip_la_data_resv;
|
|
};
|
|
|
|
/*
|
|
--- a/fs/ocfs2/localalloc.c
|
|
+++ b/fs/ocfs2/localalloc.c
|
|
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(
|
|
|
|
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
|
|
struct ocfs2_dinode *alloc,
|
|
- u32 numbits);
|
|
+ u32 numbits,
|
|
+ struct ocfs2_alloc_reservation *resv);
|
|
|
|
static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
|
|
|
|
@@ -262,6 +263,8 @@ void ocfs2_shutdown_local_alloc(struct o
|
|
|
|
osb->local_alloc_state = OCFS2_LA_DISABLED;
|
|
|
|
+ ocfs2_resmap_uninit(&osb->osb_la_resmap);
|
|
+
|
|
main_bm_inode = ocfs2_get_system_file_inode(osb,
|
|
GLOBAL_BITMAP_SYSTEM_INODE,
|
|
OCFS2_INVALID_SLOT);
|
|
@@ -498,7 +501,7 @@ static int ocfs2_local_alloc_in_range(st
|
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
|
la = OCFS2_LOCAL_ALLOC(alloc);
|
|
|
|
- start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
|
|
+ start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted, NULL);
|
|
if (start == -1) {
|
|
mlog_errno(-ENOSPC);
|
|
return 0;
|
|
@@ -664,7 +667,8 @@ int ocfs2_claim_local_alloc_bits(struct
|
|
alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
|
|
la = OCFS2_LOCAL_ALLOC(alloc);
|
|
|
|
- start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
|
|
+ start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted,
|
|
+ ac->ac_resv);
|
|
if (start == -1) {
|
|
/* TODO: Shouldn't we just BUG here? */
|
|
status = -ENOSPC;
|
|
@@ -687,6 +691,9 @@ int ocfs2_claim_local_alloc_bits(struct
|
|
goto bail;
|
|
}
|
|
|
|
+ ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
|
|
+ bits_wanted);
|
|
+
|
|
while(bits_wanted--)
|
|
ocfs2_set_bit(start++, bitmap);
|
|
|
|
@@ -722,11 +729,13 @@ static u32 ocfs2_local_alloc_count_bits(
|
|
}
|
|
|
|
static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
|
|
- struct ocfs2_dinode *alloc,
|
|
- u32 numbits)
|
|
+ struct ocfs2_dinode *alloc,
|
|
+ u32 numbits,
|
|
+ struct ocfs2_alloc_reservation *resv)
|
|
{
|
|
int numfound, bitoff, left, startoff, lastzero;
|
|
void *bitmap = NULL;
|
|
+ struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
|
|
|
|
mlog_entry("(numbits wanted = %u)\n", numbits);
|
|
|
|
@@ -738,6 +747,20 @@ static int ocfs2_local_alloc_find_clear_
|
|
|
|
bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
|
|
|
|
+ /*
|
|
+ * Ask the reservations code first whether this request can be
|
|
+ * easily fulfilled. No errors here are fatal - if we didn't
|
|
+ * find the number of bits needed, we'll just take the slow
|
|
+ * path.
|
|
+ */
|
|
+ if (ocfs2_resmap_resv_bits(resmap, resv, bitmap, &bitoff, &numfound)
|
|
+ == 0) {
|
|
+ if (numfound >= numbits) {
|
|
+ numfound = numbits;
|
|
+ goto bail;
|
|
+ }
|
|
+ }
|
|
+
|
|
numfound = bitoff = startoff = 0;
|
|
lastzero = -1;
|
|
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
|
|
@@ -772,8 +795,10 @@ static int ocfs2_local_alloc_find_clear_
|
|
|
|
if (numfound == numbits)
|
|
bitoff = startoff - numfound;
|
|
- else
|
|
+ else {
|
|
+ numfound = 0;
|
|
bitoff = -1;
|
|
+ }
|
|
|
|
bail:
|
|
mlog_exit(bitoff);
|
|
@@ -1096,6 +1121,8 @@ retry_enospc:
|
|
memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
|
|
le16_to_cpu(la->la_size));
|
|
|
|
+ ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count);
|
|
+
|
|
mlog(0, "New window allocated:\n");
|
|
mlog(0, "window la_bm_off = %u\n",
|
|
OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
|
|
--- a/fs/ocfs2/ocfs2.h
|
|
+++ b/fs/ocfs2/ocfs2.h
|
|
@@ -47,6 +47,7 @@
|
|
/* For struct ocfs2_blockcheck_stats */
|
|
#include "blockcheck.h"
|
|
|
|
+#include "reservations.h"
|
|
|
|
/* Caching of metadata buffers */
|
|
|
|
@@ -349,6 +350,10 @@ struct ocfs2_super
|
|
|
|
u64 la_last_gd;
|
|
|
|
+ struct ocfs2_reservation_map osb_la_resmap;
|
|
+
|
|
+ unsigned int osb_resv_level;
|
|
+
|
|
/* Next three fields are for local node slot recovery during
|
|
* mount. */
|
|
int dirty;
|
|
--- /dev/null
|
|
+++ b/fs/ocfs2/reservations.c
|
|
@@ -0,0 +1,668 @@
|
|
+/* -*- mode: c; c-basic-offset: 8; -*-
|
|
+ * vim: noexpandtab sw=8 ts=8 sts=0:
|
|
+ *
|
|
+ * reservations.c
|
|
+ *
|
|
+ * Allocation reservations implementation
|
|
+ *
|
|
+ * Some code borrowed from fs/ext3/balloc.c and is:
|
|
+ *
|
|
+ * Copyright (C) 1992, 1993, 1994, 1995
|
|
+ * Remy Card (card@masi.ibp.fr)
|
|
+ * Laboratoire MASI - Institut Blaise Pascal
|
|
+ * Universite Pierre et Marie Curie (Paris VI)
|
|
+ *
|
|
+ * The rest is copyright (C) 2009 Novell. All rights reserved.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public
|
|
+ * License as published by the Free Software Foundation; either
|
|
+ * version 2 of the License, or (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ */
|
|
+
|
|
+#include <linux/fs.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/bitops.h>
|
|
+
|
|
+#define MLOG_MASK_PREFIX ML_RESERVATIONS
|
|
+#include <cluster/masklog.h>
|
|
+
|
|
+#include "ocfs2.h"
|
|
+
|
|
+#ifdef CONFIG_OCFS2_DEBUG_FS
|
|
+#define OCFS2_CHECK_RESERVATIONS
|
|
+#endif
|
|
+
|
|
+#define OCFS2_CHECK_RESERVATIONS
|
|
+
|
|
+
|
|
+DEFINE_SPINLOCK(resv_lock);
|
|
+
|
|
+#define OCFS2_MIN_RESV_WINDOW_BITS 8
|
|
+#define OCFS2_MAX_RESV_WINDOW_BITS 1024
|
|
+
|
|
+static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ struct ocfs2_super *osb = resmap->m_osb;
|
|
+
|
|
+ mlog(0, "resv_level: %u\n", osb->osb_resv_level);
|
|
+
|
|
+ switch (osb->osb_resv_level) {
|
|
+ case 6:
|
|
+ return OCFS2_MAX_RESV_WINDOW_BITS;
|
|
+ case 5:
|
|
+ return 512;
|
|
+ case 4:
|
|
+ return 256;
|
|
+ case 3:
|
|
+ return 128;
|
|
+ case 2:
|
|
+ return 64;
|
|
+ }
|
|
+
|
|
+ return OCFS2_MIN_RESV_WINDOW_BITS;
|
|
+}
|
|
+
|
|
+static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ if (resv->r_len)
|
|
+ return resv->r_start + resv->r_len - 1;
|
|
+ return resv->r_start;
|
|
+}
|
|
+
|
|
+static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ return !!(resv->r_len == 0);
|
|
+}
|
|
+
|
|
+static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ if (resmap->m_osb->osb_resv_level == 0)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ struct ocfs2_super *osb = resmap->m_osb;
|
|
+ struct rb_node *node;
|
|
+ struct ocfs2_alloc_reservation *resv;
|
|
+ int i = 0;
|
|
+
|
|
+ mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
|
|
+ osb->dev_str, resmap->m_bitmap_len);
|
|
+
|
|
+ node = rb_first(&resmap->m_reservations);
|
|
+ while (node) {
|
|
+ resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
|
|
+
|
|
+ mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
|
|
+ "\tlast_len: %u\tallocated: %u\n", resv->r_start,
|
|
+ ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
|
|
+ resv->r_last_len, resv->r_allocated);
|
|
+
|
|
+ node = rb_next(node);
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ mlog(ML_NOTICE, "%d reservations found\n", i);
|
|
+}
|
|
+
|
|
+#ifdef OCFS2_CHECK_RESERVATIONS
|
|
+static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ unsigned int off = 0;
|
|
+ int i = 0;
|
|
+ struct rb_node *node;
|
|
+ struct ocfs2_alloc_reservation *resv;
|
|
+
|
|
+ node = rb_first(&resmap->m_reservations);
|
|
+ while (node) {
|
|
+ resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
|
|
+
|
|
+ if (i > 0 && resv->r_start <= off) {
|
|
+ mlog(ML_ERROR, "reservation %d has bad start off!\n",
|
|
+ i);
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ if (resv->r_len == 0) {
|
|
+ mlog(ML_ERROR, "reservation %d has no length!\n",
|
|
+ i);
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ if (resv->r_start > ocfs2_resv_end(resv)) {
|
|
+ mlog(ML_ERROR, "reservation %d has invalid range!\n",
|
|
+ i);
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ if (ocfs2_resv_end(resv) > resmap->m_bitmap_len) {
|
|
+ mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
|
|
+ i);
|
|
+ goto bad;
|
|
+ }
|
|
+
|
|
+ off = ocfs2_resv_end(resv);
|
|
+ node = rb_next(node);
|
|
+
|
|
+ i++;
|
|
+ }
|
|
+ return;
|
|
+
|
|
+bad:
|
|
+ ocfs2_dump_resv(resmap);
|
|
+ BUG();
|
|
+}
|
|
+#else
|
|
+static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+
|
|
+}
|
|
+#endif
|
|
+
|
|
+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ memset(resv, 0, sizeof(*resv));
|
|
+}
|
|
+
|
|
+int ocfs2_resmap_init(struct ocfs2_super *osb,
|
|
+ struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ memset(resmap, 0, sizeof(*resmap));
|
|
+
|
|
+ resmap->m_osb = osb;
|
|
+ resmap->m_reservations = RB_ROOT;
|
|
+ /* m_bitmap_len is initialized to zero by the above memset. */
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ resv->r_len = 0;
|
|
+ resv->r_allocated = 0;
|
|
+}
|
|
+
|
|
+static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ if (resv->r_inuse) {
|
|
+ rb_erase(&resv->r_node, &resmap->m_reservations);
|
|
+ resv->r_inuse = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ assert_spin_locked(&resv_lock);
|
|
+
|
|
+ __ocfs2_resv_trunc(resv);
|
|
+ ocfs2_resv_remove(resmap, resv);
|
|
+}
|
|
+
|
|
+/* does nothing if 'resv' is null */
|
|
+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ if (resv) {
|
|
+ spin_lock(&resv_lock);
|
|
+ __ocfs2_resv_discard(resmap, resv);
|
|
+ spin_unlock(&resv_lock);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ struct rb_node *node;
|
|
+ struct ocfs2_alloc_reservation *resv;
|
|
+
|
|
+ assert_spin_locked(&resv_lock);
|
|
+
|
|
+ while ((node = rb_last(&resmap->m_reservations)) != NULL) {
|
|
+ resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
|
|
+
|
|
+ __ocfs2_resv_discard(resmap, resv);
|
|
+ /*
|
|
+ * last_len and last_start no longer make sense if
|
|
+ * we're changing the range of our allocations.
|
|
+ */
|
|
+ resv->r_last_len = resv->r_last_start = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* If any parameters have changed, this function will call
|
|
+ * ocfs2_resv_trunc against all existing reservations. */
|
|
+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
|
|
+ unsigned int clen)
|
|
+{
|
|
+ if (ocfs2_resmap_disabled(resmap))
|
|
+ return;
|
|
+
|
|
+ spin_lock(&resv_lock);
|
|
+
|
|
+ ocfs2_resmap_clear_all_resv(resmap);
|
|
+ resmap->m_bitmap_len = clen;
|
|
+
|
|
+ spin_unlock(&resv_lock);
|
|
+}
|
|
+
|
|
+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
|
|
+{
|
|
+ /* Does nothing for now. Keep this around for API symmetry */
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Determine the number of available bits between my_resv and the next
|
|
+ * window and extends my_resv accordingly.
|
|
+ */
|
|
+static int ocfs2_try_to_extend_resv(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *my_resv)
|
|
+{
|
|
+ unsigned int available, avail_end;
|
|
+ struct rb_node *next, *node = &my_resv->r_node;
|
|
+ struct ocfs2_alloc_reservation *next_resv;
|
|
+ unsigned int bits = ocfs2_resv_window_bits(resmap);
|
|
+
|
|
+ next = rb_next(node);
|
|
+
|
|
+ if (next) {
|
|
+ next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
|
|
+ r_node);
|
|
+ avail_end = next_resv->r_start;
|
|
+ } else {
|
|
+ avail_end = resmap->m_bitmap_len - 1;
|
|
+ }
|
|
+
|
|
+ if (ocfs2_resv_end(my_resv) == avail_end)
|
|
+ return -ENOENT;
|
|
+
|
|
+ available = avail_end - ocfs2_resv_end(my_resv) - 1;
|
|
+
|
|
+ my_resv->r_len += available;
|
|
+ if (my_resv->r_len > bits)
|
|
+ my_resv->r_len = bits;
|
|
+
|
|
+ ocfs2_check_resmap(resmap);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *new)
|
|
+{
|
|
+ struct rb_root *root = &resmap->m_reservations;
|
|
+ struct rb_node *parent = NULL;
|
|
+ struct rb_node **p = &root->rb_node;
|
|
+ struct ocfs2_alloc_reservation *tmp;
|
|
+
|
|
+ assert_spin_locked(&resv_lock);
|
|
+
|
|
+ mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
|
|
+ new->r_len);
|
|
+
|
|
+ while(*p) {
|
|
+ parent = *p;
|
|
+
|
|
+ tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
|
|
+
|
|
+ if (new->r_start < tmp->r_start)
|
|
+ p = &(*p)->rb_left;
|
|
+ else if (new->r_start > ocfs2_resv_end(tmp))
|
|
+ p = &(*p)->rb_right;
|
|
+ else {
|
|
+ /* This should never happen! */
|
|
+ mlog(ML_ERROR, "Duplicate reservation window!\n");
|
|
+ BUG();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ rb_link_node(&new->r_node, parent, p);
|
|
+ rb_insert_color(&new->r_node, root);
|
|
+ new->r_inuse = 1;
|
|
+
|
|
+ ocfs2_check_resmap(resmap);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * ocfs2_find_resv() - find the window which contains goal
|
|
+ * @resmap: reservation map to search
|
|
+ * @goal: which bit to search for
|
|
+ *
|
|
+ * If a window containing that goal is not found, we return the window
|
|
+ * which comes before goal. Returns NULL on empty rbtree or no window
|
|
+ * before goal.
|
|
+ */
|
|
+static struct ocfs2_alloc_reservation *
|
|
+ocfs2_find_resv(struct ocfs2_reservation_map *resmap, unsigned int goal)
|
|
+{
|
|
+ struct ocfs2_alloc_reservation *resv;
|
|
+ struct rb_node *n = resmap->m_reservations.rb_node;
|
|
+
|
|
+ assert_spin_locked(&resv_lock);
|
|
+
|
|
+ if (!n)
|
|
+ return NULL;
|
|
+
|
|
+ do {
|
|
+ resv = rb_entry(n, struct ocfs2_alloc_reservation, r_node);
|
|
+
|
|
+ if (goal < resv->r_start)
|
|
+ n = n->rb_left;
|
|
+ else if (goal > ocfs2_resv_end(resv))
|
|
+ n = n->rb_right;
|
|
+ else
|
|
+ return resv;
|
|
+ } while (n);
|
|
+
|
|
+ /*
|
|
+ * The goal sits on one end of the tree. If it's the leftmost
|
|
+ * end, we return NULL.
|
|
+ */
|
|
+ if (resv->r_start > goal)
|
|
+ return NULL;
|
|
+
|
|
+ return resv;
|
|
+}
|
|
+
|
|
+static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv)
|
|
+{
|
|
+ struct rb_root *root = &resmap->m_reservations;
|
|
+ unsigned int last_start = resv->r_last_start;
|
|
+ unsigned int goal = 0;
|
|
+ unsigned int len = ocfs2_resv_window_bits(resmap);
|
|
+ unsigned int gap_start, gap_end, gap_len;
|
|
+ struct ocfs2_alloc_reservation *prev_resv, *next_resv;
|
|
+ struct rb_node *prev, *next;
|
|
+
|
|
+ if (resv->r_last_len) {
|
|
+ unsigned int last_end = last_start + resv->r_last_len - 1;
|
|
+
|
|
+ goal = last_end + 1;
|
|
+
|
|
+ if (goal >= resmap->m_bitmap_len)
|
|
+ goal = 0;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Nasty cases to consider:
|
|
+ *
|
|
+ * - rbtree is empty
|
|
+ * - our window should be first in all reservations
|
|
+ * - our window should be last in all reservations
|
|
+ * - need to make sure we don't go past end of bitmap
|
|
+ */
|
|
+
|
|
+ assert_spin_locked(&resv_lock);
|
|
+
|
|
+ if (RB_EMPTY_ROOT(root)) {
|
|
+ /*
|
|
+ * Easiest case - empty tree. We can just take
|
|
+ * whatever window we want.
|
|
+ */
|
|
+
|
|
+ mlog(0, "Empty root\n");
|
|
+
|
|
+ resv->r_start = goal;
|
|
+ resv->r_len = len;
|
|
+ if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len)
|
|
+ resv->r_len = resmap->m_bitmap_len - resv->r_start;
|
|
+
|
|
+ ocfs2_resv_insert(resmap, resv);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ prev_resv = ocfs2_find_resv(resmap, goal);
|
|
+
|
|
+ if (prev_resv == NULL) {
|
|
+ mlog(0, "Farthest left window\n");
|
|
+
|
|
+ /* Ok, we're the farthest left window. */
|
|
+ next = rb_first(root);
|
|
+ next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
|
|
+ r_node);
|
|
+
|
|
+ /*
|
|
+ * Try to allocate at far left of tree. If that
|
|
+ * doesn't fit, we just start our linear search from
|
|
+ * next_resv
|
|
+ */
|
|
+ if (next_resv->r_start > (goal + len - 1)) {
|
|
+ resv->r_start = goal;
|
|
+ resv->r_len = len;
|
|
+
|
|
+ ocfs2_resv_insert(resmap, resv);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ prev_resv = next_resv;
|
|
+ next_resv = NULL;
|
|
+ }
|
|
+
|
|
+ prev = &prev_resv->r_node;
|
|
+
|
|
+ /* Now we do a linear search for a window, starting at 'prev_rsv' */
|
|
+ while (1) {
|
|
+ next = rb_next(prev);
|
|
+ if (next) {
|
|
+ mlog(0, "One more resv found in linear search\n");
|
|
+ next_resv = rb_entry(next,
|
|
+ struct ocfs2_alloc_reservation,
|
|
+ r_node);
|
|
+
|
|
+ gap_start = ocfs2_resv_end(prev_resv) + 1;
|
|
+ gap_end = next_resv->r_start - 1;
|
|
+ gap_len = gap_end - gap_start + 1;
|
|
+ } else {
|
|
+ mlog(0, "No next node\n");
|
|
+ /*
|
|
+ * We're at the rightmost edge of the
|
|
+ * tree. See if a reservation between this
|
|
+ * window and the end of the bitmap will work.
|
|
+ */
|
|
+ gap_start = ocfs2_resv_end(prev_resv) + 1;
|
|
+ gap_end = resmap->m_bitmap_len - 1;
|
|
+ gap_len = gap_end - gap_start + 1;
|
|
+ }
|
|
+
|
|
+ if (gap_start <= gap_end
|
|
+ && gap_start >= goal
|
|
+ && gap_len >= len) {
|
|
+ resv->r_start = gap_start;
|
|
+ resv->r_len = len;
|
|
+
|
|
+ ocfs2_resv_insert(resmap, resv);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (!next)
|
|
+ break;
|
|
+
|
|
+ prev = next;
|
|
+ prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
|
|
+ r_node);
|
|
+ }
|
|
+}
|
|
+
|
|
+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv,
|
|
+ u32 cstart, u32 clen)
|
|
+{
|
|
+ unsigned int cend = cstart + clen - 1;
|
|
+
|
|
+ if (resmap == NULL || ocfs2_resmap_disabled(resmap))
|
|
+ return;
|
|
+
|
|
+ if (resv == NULL)
|
|
+ return;
|
|
+
|
|
+ spin_lock(&resv_lock);
|
|
+
|
|
+ mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
|
|
+ "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
|
|
+ cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
|
|
+ resv->r_len, resv->r_last_start, resv->r_last_len);
|
|
+
|
|
+ resv->r_last_len = clen;
|
|
+ resv->r_last_start = cstart;
|
|
+
|
|
+ if (ocfs2_resv_empty(resv)) {
|
|
+ mlog(0, "Empty reservation, find a new window.\n");
|
|
+ /*
|
|
+ * Allocation occured without a window. We find an
|
|
+ * initial reservation for this inode, based on what
|
|
+ * was allocated already.
|
|
+ */
|
|
+ ocfs2_resv_find_window(resmap, resv);
|
|
+ goto out_unlock;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Did the allocation occur completely outside our
|
|
+ * reservation? Clear it then. Otherwise, try to extend our
|
|
+ * reservation or alloc a new one, if we've used all the bits.
|
|
+ */
|
|
+ if (cend < resv->r_start ||
|
|
+ cstart > ocfs2_resv_end(resv)) {
|
|
+ mlog(0, "Allocated outside reservation\n");
|
|
+
|
|
+ /* Truncate and remove reservation */
|
|
+ __ocfs2_resv_discard(resmap, resv);
|
|
+
|
|
+ if (cend < resv->r_start) {
|
|
+ /*
|
|
+ * The window wasn't used for some reason. We
|
|
+ * should start our search *past* it to give a
|
|
+ * better chance the next window will be
|
|
+ * used. Best way to do this right now is to
|
|
+ * fool the search code...
|
|
+ */
|
|
+ resv->r_last_start = ocfs2_resv_end(resv) + 1;
|
|
+ resv->r_last_len = 1;
|
|
+ }
|
|
+
|
|
+ ocfs2_resv_find_window(resmap, resv);
|
|
+ goto out_unlock;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We allocated at least partially from our
|
|
+ * reservation. Adjust it and try to extend. Otherwise, we
|
|
+ * search for a new window.
|
|
+ */
|
|
+
|
|
+ resv->r_allocated += clen;
|
|
+
|
|
+ if (cend < ocfs2_resv_end(resv)) {
|
|
+ u32 old_end;
|
|
+
|
|
+ mlog(0, "Allocation left at end\n");
|
|
+
|
|
+ /*
|
|
+ * Partial allocation, leaving some bits free at
|
|
+ * end. We move over the start of the window to take
|
|
+ * this into account and try to extend it.
|
|
+ */
|
|
+ old_end = ocfs2_resv_end(resv);
|
|
+ resv->r_start = cend + 1; /* Start just past last allocation */
|
|
+ resv->r_len = old_end - resv->r_start + 1;
|
|
+
|
|
+ if (ocfs2_try_to_extend_resv(resmap, resv) == 0)
|
|
+ goto out_unlock;
|
|
+ }
|
|
+
|
|
+ mlog(0, "discard reservation\n");
|
|
+
|
|
+ /*
|
|
+ * No free bits at end or extend failed above. Truncate and
|
|
+ * re-search for a new window.
|
|
+ */
|
|
+
|
|
+ __ocfs2_resv_discard(resmap, resv);
|
|
+
|
|
+ ocfs2_resv_find_window(resmap, resv);
|
|
+
|
|
+out_unlock:
|
|
+ mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
|
|
+ "r_len: %u r_last_start: %u r_last_len: %u\n",
|
|
+ resv->r_start, ocfs2_resv_end(resv), resv->r_len,
|
|
+ resv->r_last_start, resv->r_last_len);
|
|
+
|
|
+ spin_unlock(&resv_lock);
|
|
+}
|
|
+
|
|
+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv,
|
|
+ char *disk_bitmap, int *cstart, int *clen)
|
|
+{
|
|
+ int ret = -ENOSPC;
|
|
+ unsigned int start, len, best_start = 0, best_len = 0;
|
|
+
|
|
+ if (resv == NULL || ocfs2_resmap_disabled(resmap))
|
|
+ return -ENOSPC;
|
|
+
|
|
+ spin_lock(&resv_lock);
|
|
+
|
|
+ if (ocfs2_resv_empty(resv)) {
|
|
+ mlog(0, "empty reservation, find new window\n");
|
|
+
|
|
+ ocfs2_resv_find_window(resmap, resv);
|
|
+
|
|
+ if (ocfs2_resv_empty(resv)) {
|
|
+ /*
|
|
+ * If resv is still empty, we return zero
|
|
+ * bytes and allow ocfs2_resmap_claimed_bits()
|
|
+ * to start our new reservation after the
|
|
+ * allocator has done it's work.
|
|
+ */
|
|
+ *cstart = *clen = 0;
|
|
+ ret = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ start = resv->r_start;
|
|
+ len = 0;
|
|
+
|
|
+ while (start <= ocfs2_resv_end(resv)) {
|
|
+ if (ocfs2_test_bit(start, disk_bitmap)) {
|
|
+ mlog(0,
|
|
+ "Reservation was taken at bit %d\n",
|
|
+ start + len);
|
|
+ best_len = 0;
|
|
+ goto next;
|
|
+ }
|
|
+
|
|
+ /* This is basic, but since the local alloc is
|
|
+ * used very predictably, I think we're ok. */
|
|
+ if (!best_len) {
|
|
+ best_start = start;
|
|
+ best_len = 1;
|
|
+ } else {
|
|
+ best_len++;
|
|
+ }
|
|
+
|
|
+next:
|
|
+ start++;
|
|
+ }
|
|
+
|
|
+ if (best_len) {
|
|
+ ret = 0;
|
|
+ *cstart = best_start;
|
|
+ *clen = best_len;
|
|
+ }
|
|
+out:
|
|
+ spin_unlock(&resv_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
--- /dev/null
|
|
+++ b/fs/ocfs2/reservations.h
|
|
@@ -0,0 +1,151 @@
|
|
+/* -*- mode: c; c-basic-offset: 8; -*-
|
|
+ * vim: noexpandtab sw=8 ts=8 sts=0:
|
|
+ *
|
|
+ * reservations.h
|
|
+ *
|
|
+ * Allocation reservations function prototypes and structures.
|
|
+ *
|
|
+ * Copyright (C) 2009 Novell. All rights reserved.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public
|
|
+ * License as published by the Free Software Foundation; either
|
|
+ * version 2 of the License, or (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public
|
|
+ * License along with this program; if not, write to the
|
|
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
+ * Boston, MA 021110-1307, USA.
|
|
+ */
|
|
+
|
|
+#ifndef OCFS2_RESERVATIONS_H
|
|
+#define OCFS2_RESERVATIONS_H
|
|
+
|
|
+#include <linux/rbtree.h>
|
|
+
|
|
+struct ocfs2_bitmap_resv_ops;
|
|
+
|
|
+#define OCFS2_DEFAULT_RESV_LEVEL 3
|
|
+#define OCFS2_MAX_RESV_LEVEL 7
|
|
+#define OCFS2_MIN_RESV_LEVEL 0
|
|
+
|
|
+struct ocfs2_alloc_reservation {
|
|
+ struct rb_node r_node;
|
|
+
|
|
+ unsigned int r_start;
|
|
+ unsigned int r_len;
|
|
+
|
|
+ unsigned int r_last_len;
|
|
+ unsigned int r_last_start;
|
|
+
|
|
+ unsigned int r_allocated;
|
|
+
|
|
+ int r_inuse;
|
|
+};
|
|
+
|
|
+struct ocfs2_reservation_map {
|
|
+ struct rb_root m_reservations;
|
|
+
|
|
+ struct ocfs2_super *m_osb;
|
|
+
|
|
+ /* The following are not initialized to meaningful values until a disk
|
|
+ * bitmap is provided. */
|
|
+ u32 m_bitmap_len; /* Number of valid
|
|
+ * bits available */
|
|
+};
|
|
+
|
|
+void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
|
|
+
|
|
+/**
|
|
+ * ocfs2_resv_discard() - truncate a reservation
|
|
+ * @resmap:
|
|
+ * @resv: the reservation to truncate.
|
|
+ *
|
|
+ * After this function is called, the reservation will be empty, and
|
|
+ * unlinked from the rbtree.
|
|
+ */
|
|
+void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv);
|
|
+
|
|
+
|
|
+/**
|
|
+ * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
|
|
+ * @resmap: struct ocfs2_reservation_map to initialize
|
|
+ * @obj: unused for now
|
|
+ * @ops: unused for now
|
|
+ * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
|
|
+ *
|
|
+ * Only possible return value other than '0' is -ENOMEM for failure to
|
|
+ * allocation mirror bitmap.
|
|
+ */
|
|
+int ocfs2_resmap_init(struct ocfs2_super *osb,
|
|
+ struct ocfs2_reservation_map *resmap);
|
|
+
|
|
+/**
|
|
+ * ocfs2_resmap_restart() - "restart" a reservation bitmap
|
|
+ * @resmap: reservations bitmap
|
|
+ * @clen: Number of valid bits in the bitmap
|
|
+ *
|
|
+ * Re-initialize the parameters of a reservation bitmap. This is
|
|
+ * useful for local alloc window slides.
|
|
+ *
|
|
+ * If any bitmap parameters have changed, this function will call
|
|
+ * ocfs2_trunc_resv against all existing reservations. A future
|
|
+ * version will recalculate existing reservations based on the new
|
|
+ * bitmap.
|
|
+ */
|
|
+void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
|
|
+ unsigned int clen);
|
|
+
|
|
+/**
|
|
+ * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
|
|
+ * @resmap: the struct ocfs2_reservation_map to uninitialize
|
|
+ */
|
|
+void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
|
|
+
|
|
+/**
|
|
+ * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
|
|
+ * @resmap: reservations bitmap
|
|
+ * @resv: reservation to base search from
|
|
+ * @disk_bitmap: up to date (from disk) allocation bitmap
|
|
+ * @cstart: start of proposed allocation
|
|
+ * @clen: length (in clusters) of proposed allocation
|
|
+ *
|
|
+ * Using the reservation data from resv, this function will compare
|
|
+ * resmap and disk_bitmap to determine what part (if any) of the
|
|
+ * reservation window is still clear to use. An empty resv passed here
|
|
+ * will just return no allocation.
|
|
+ *
|
|
+ * On success, zero is returned and the valid allocation area is set in cstart
|
|
+ * and clen. If no allocation is found, they are set to zero.
|
|
+ *
|
|
+ * Returns nonzero on error.
|
|
+ */
|
|
+int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv,
|
|
+ char *disk_bitmap, int *cstart, int *clen);
|
|
+
|
|
+/**
|
|
+ * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
|
|
+ * @resmap: reservations bitmap
|
|
+ * @resv: optional reservation to recalulate based on new bitmap
|
|
+ * @cstart: start of allocation in clusters
|
|
+ * @clen: end of allocation in clusters.
|
|
+ *
|
|
+ * Tell the reservation code that bits were used to fulfill allocation in
|
|
+ * resmap. The bits don't have to have been part of any existing
|
|
+ * reservation. But we must always call this function when bits are claimed.
|
|
+ * Internally, the reservations code will use this information to mark the
|
|
+ * reservations bitmap. If resv is passed, it's next allocation window will be
|
|
+ * calculated.
|
|
+ */
|
|
+void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
|
|
+ struct ocfs2_alloc_reservation *resv,
|
|
+ u32 cstart, u32 clen);
|
|
+
|
|
+#endif /* OCFS2_RESERVATIONS_H */
|
|
--- a/fs/ocfs2/suballoc.c
|
|
+++ b/fs/ocfs2/suballoc.c
|
|
@@ -137,6 +137,7 @@ void ocfs2_free_ac_resource(struct ocfs2
|
|
}
|
|
brelse(ac->ac_bh);
|
|
ac->ac_bh = NULL;
|
|
+ ac->ac_resv = NULL;
|
|
}
|
|
|
|
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
|
|
--- a/fs/ocfs2/suballoc.h
|
|
+++ b/fs/ocfs2/suballoc.h
|
|
@@ -54,6 +54,8 @@ struct ocfs2_alloc_context {
|
|
u64 ac_last_group;
|
|
u64 ac_max_block; /* Highest block number to allocate. 0 is
|
|
is the same as ~0 - unlimited */
|
|
+
|
|
+ struct ocfs2_alloc_reservation *ac_resv;
|
|
};
|
|
|
|
void ocfs2_init_steal_slots(struct ocfs2_super *osb);
|
|
--- a/fs/ocfs2/super.c
|
|
+++ b/fs/ocfs2/super.c
|
|
@@ -95,6 +95,7 @@ struct mount_options
|
|
unsigned int atime_quantum;
|
|
signed short slot;
|
|
unsigned int localalloc_opt;
|
|
+ unsigned int resv_level;
|
|
char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
|
|
};
|
|
|
|
@@ -176,6 +177,7 @@ enum {
|
|
Opt_noacl,
|
|
Opt_usrquota,
|
|
Opt_grpquota,
|
|
+ Opt_resv_level,
|
|
Opt_err,
|
|
};
|
|
|
|
@@ -202,6 +204,7 @@ static const match_table_t tokens = {
|
|
{Opt_noacl, "noacl"},
|
|
{Opt_usrquota, "usrquota"},
|
|
{Opt_grpquota, "grpquota"},
|
|
+ {Opt_resv_level, "resv_level=%u"},
|
|
{Opt_err, NULL}
|
|
};
|
|
|
|
@@ -1030,6 +1033,7 @@ static int ocfs2_fill_super(struct super
|
|
osb->osb_commit_interval = parsed_options.commit_interval;
|
|
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
|
|
osb->local_alloc_bits = osb->local_alloc_default_bits;
|
|
+ osb->osb_resv_level = parsed_options.resv_level;
|
|
|
|
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
|
|
if (status)
|
|
@@ -1290,6 +1294,7 @@ static int ocfs2_parse_options(struct su
|
|
mopt->slot = OCFS2_INVALID_SLOT;
|
|
mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE;
|
|
mopt->cluster_stack[0] = '\0';
|
|
+ mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
|
|
|
|
if (!options) {
|
|
status = 1;
|
|
@@ -1433,6 +1438,17 @@ static int ocfs2_parse_options(struct su
|
|
mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
|
|
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
|
|
break;
|
|
+ case Opt_resv_level:
|
|
+ if (is_remount)
|
|
+ break;
|
|
+ if (match_int(&args[0], &option)) {
|
|
+ status = 0;
|
|
+ goto bail;
|
|
+ }
|
|
+ if (option >= OCFS2_MIN_RESV_LEVEL &&
|
|
+ option < OCFS2_MAX_RESV_LEVEL)
|
|
+ mopt->resv_level = option;
|
|
+ break;
|
|
default:
|
|
mlog(ML_ERROR,
|
|
"Unrecognized mount option \"%s\" "
|
|
@@ -1514,6 +1530,9 @@ static int ocfs2_show_options(struct seq
|
|
else
|
|
seq_printf(s, ",noacl");
|
|
|
|
+ if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
|
|
+ seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -1688,6 +1707,8 @@ static void ocfs2_inode_init_once(void *
|
|
oi->ip_blkno = 0ULL;
|
|
oi->ip_clusters = 0;
|
|
|
|
+ ocfs2_resv_init_once(&oi->ip_la_data_resv);
|
|
+
|
|
ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
|
|
ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
|
|
ocfs2_lock_res_init_once(&oi->ip_open_lockres);
|
|
@@ -2042,6 +2063,12 @@ static int ocfs2_initialize_super(struct
|
|
|
|
init_waitqueue_head(&osb->osb_mount_event);
|
|
|
|
+ status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
|
|
+ if (status) {
|
|
+ mlog_errno(status);
|
|
+ goto bail;
|
|
+ }
|
|
+
|
|
osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
|
|
if (!osb->vol_label) {
|
|
mlog(ML_ERROR, "unable to alloc vol label\n");
|