Date: Thu, 09 Oct 2008 17:11:53 +1100 From: Donald Douwsma Subject: DMAPI support for xfs Patch-mainline: Not yet References: bnc#450658 Acked-by: Jan Kara --- fs/xfs/Kconfig | 13 fs/xfs/Makefile | 5 fs/xfs/dmapi/Makefile | 28 fs/xfs/dmapi/xfs_dm.c | 3327 +++++++++++++++++++++++++++++++++++++++++++ fs/xfs/dmapi/xfs_dm.h | 23 fs/xfs/linux-2.6/xfs_file.c | 76 fs/xfs/linux-2.6/xfs_ksyms.c | 92 + fs/xfs/linux-2.6/xfs_linux.h | 4 fs/xfs/linux-2.6/xfs_super.c | 13 fs/xfs/xfs_dmops.c | 20 fs/xfs/xfs_itable.c | 2 fs/xfs/xfs_itable.h | 5 fs/xfs/xfs_mount.h | 1 fs/xfs/xfs_rw.c | 1 fs/xfs/xfs_rw.h | 5 fs/xfs/xfs_vnodeops.c | 2 16 files changed, 3609 insertions(+), 8 deletions(-) --- a/fs/xfs/Kconfig +++ b/fs/xfs/Kconfig @@ -36,6 +36,19 @@ config XFS_QUOTA with or without the generic quota support enabled (CONFIG_QUOTA) - they are completely independent subsystems. +config XFS_DMAPI + tristate "XFS DMAPI support" + depends on XFS_FS + select DMAPI + help + The Data Management API is a system interface used to implement + the interface defined in the X/Open document: + "Systems Management: Data Storage Management (XDSM) API", + dated February 1997. This interface is used by hierarchical + storage management systems. + + If unsure, say N. + config XFS_POSIX_ACL bool "XFS POSIX ACL support" depends on XFS_FS --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -41,6 +41,8 @@ ifeq ($(CONFIG_XFS_QUOTA),y) xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o endif +obj-$(CONFIG_XFS_DMAPI) += dmapi/ + xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o @@ -107,7 +109,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_iops.o \ xfs_super.o \ xfs_sync.o \ - xfs_xattr.o) + xfs_xattr.o \ + xfs_ksyms.o) # Objects in support/ xfs-y += $(addprefix support/, \ --- /dev/null +++ b/fs/xfs/dmapi/Makefile @@ -0,0 +1,28 @@ +# +# Copyright (c) 2006 Silicon Graphics, Inc. +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# + +EXTRA_CFLAGS += -I$(src)/.. -I$(src)/../linux-2.6 +EXTRA_CFLAGS += -I$(srctree)/fs/dmapi + +ifeq ($(CONFIG_XFS_DEBUG),y) + EXTRA_CFLAGS += -g -DDEBUG +endif + +obj-$(CONFIG_XFS_DMAPI) += xfs_dmapi.o + +xfs_dmapi-y += xfs_dm.o --- /dev/null +++ b/fs/xfs/dmapi/xfs_dm.c @@ -0,0 +1,3327 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rw.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_inode_item.h" +#include "xfs_vnodeops.h" +#include +#include +#include "xfs_dm.h" + +#include + +#define MAXNAMLEN MAXNAMELEN + +#define MIN_DIO_SIZE(mp) ((mp)->m_sb.sb_sectsize) +#define MAX_DIO_SIZE(mp) (INT_MAX & ~(MIN_DIO_SIZE(mp) - 1)) + +static void up_rw_sems(struct inode *ip, int flags) +{ + if (flags & DM_FLAGS_IALLOCSEM_WR) + up_write(&ip->i_alloc_sem); + if (flags & DM_FLAGS_IMUX) + mutex_unlock(&ip->i_mutex); +} + +static void down_rw_sems(struct inode *ip, int flags) +{ + if (flags & DM_FLAGS_IMUX) + mutex_lock(&ip->i_mutex); + if (flags & DM_FLAGS_IALLOCSEM_WR) + down_write(&ip->i_alloc_sem); +} + + +/* Structure used to hold the on-disk version of a dm_attrname_t. All + on-disk attribute names start with the 8-byte string "SGI_DMI_". +*/ + +typedef struct { + char dan_chars[DMATTR_PREFIXLEN + DM_ATTR_NAME_SIZE + 1]; +} dm_dkattrname_t; + +/* Structure used by xfs_dm_get_bulkall(), used as the "private_data" + * that we want xfs_bulkstat to send to our formatter. + */ +typedef struct { + dm_fsid_t fsid; + void __user *laststruct; + dm_dkattrname_t attrname; +} dm_bulkstat_one_t; + +/* In the on-disk inode, DMAPI attribute names consist of the user-provided + name with the DMATTR_PREFIXSTRING pre-pended. This string must NEVER be + changed! +*/ + +static const char dmattr_prefix[DMATTR_PREFIXLEN + 1] = DMATTR_PREFIXSTRING; + +static dm_size_t dm_min_dio_xfer = 0; /* direct I/O disabled for now */ + + +/* See xfs_dm_get_dmattr() for a description of why this is needed. */ + +#define XFS_BUG_KLUDGE 256 /* max size of an in-inode attribute value */ + +#define DM_MAX_ATTR_BYTES_ON_DESTROY 256 + +#define DM_STAT_SIZE(dmtype,namelen) \ + (sizeof(dmtype) + sizeof(dm_handle_t) + namelen) + +#define DM_STAT_ALIGN (sizeof(__uint64_t)) + +/* DMAPI's E2BIG == EA's ERANGE */ +#define DM_EA_XLATE_ERR(err) { if (err == ERANGE) err = E2BIG; } + +static inline size_t dm_stat_align(size_t size) +{ + return (size + (DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); +} + +static inline size_t dm_stat_size(size_t namelen) +{ + return dm_stat_align(sizeof(dm_stat_t) + sizeof(dm_handle_t) + namelen); +} + +/* + * xfs_dm_send_data_event() + * + * Send data event to DMAPI. Drop IO lock (if specified) before + * the dm_send_data_event() call and reacquire it afterwards. + */ +int +xfs_dm_send_data_event( + dm_eventtype_t event, + xfs_inode_t *ip, + xfs_off_t offset, + size_t length, + int flags, + int *lock_flags) +{ + struct inode *inode = &ip->i_vnode; + int error; + uint16_t dmstate; + + /* Returns positive errors to XFS */ + + do { + dmstate = ip->i_d.di_dmstate; + if (lock_flags) + xfs_iunlock(ip, *lock_flags); + + up_rw_sems(inode, flags); + + error = dm_send_data_event(event, inode, DM_RIGHT_NULL, + offset, length, flags); + error = -error; /* DMAPI returns negative errors */ + + down_rw_sems(inode, flags); + + if (lock_flags) + xfs_ilock(ip, *lock_flags); + } while (!error && (ip->i_d.di_dmstate != dmstate)); + + return error; +} + +/* prohibited_mr_events + * + * Return event bits representing any events which cannot have managed + * region events set due to memory mapping of the file. If the maximum + * protection allowed in any pregion includes PROT_WRITE, and the region + * is shared and not text, then neither READ nor WRITE events can be set. + * Otherwise if the file is memory mapped, no READ event can be set. + * + */ +STATIC int +prohibited_mr_events( + struct address_space *mapping) +{ + int prohibited = (1 << DM_EVENT_READ); + + if (!mapping_mapped(mapping)) + return 0; + + spin_lock(&mapping->i_mmap_lock); + if (mapping_writably_mapped(mapping)) + prohibited |= (1 << DM_EVENT_WRITE); + spin_unlock(&mapping->i_mmap_lock); + + return prohibited; +} + +#ifdef DEBUG_RIGHTS +STATIC int +xfs_vp_to_hexhandle( + struct inode *inode, + u_int type, + char *buffer) +{ + dm_handle_t handle; + u_char *ip; + int length; + int error; + int i; + + /* + * XXX: dm_vp_to_handle doesn't exist. + * Looks like this debug code is rather dead. + */ + if ((error = dm_vp_to_handle(inode, &handle))) + return(error); + + if (type == DM_FSYS_OBJ) { /* a filesystem handle */ + length = DM_FSHSIZE; + } else { + length = DM_HSIZE(handle); + } + for (ip = (u_char *)&handle, i = 0; i < length; i++) { + *buffer++ = "0123456789abcdef"[ip[i] >> 4]; + *buffer++ = "0123456789abcdef"[ip[i] & 0xf]; + } + *buffer = '\0'; + return(0); +} +#endif /* DEBUG_RIGHTS */ + + + + +/* Copy in and validate an attribute name from user space. It should be a + string of at least one and at most DM_ATTR_NAME_SIZE characters. Because + the dm_attrname_t structure doesn't provide room for the trailing NULL + byte, we just copy in one extra character and then zero it if it + happens to be non-NULL. +*/ + +STATIC int +xfs_copyin_attrname( + dm_attrname_t __user *from, /* dm_attrname_t in user space */ + dm_dkattrname_t *to) /* name buffer in kernel space */ +{ + int error = 0; + size_t len; + + strcpy(to->dan_chars, dmattr_prefix); + + len = strnlen_user((char __user *)from, DM_ATTR_NAME_SIZE); + if (len == 0) + error = EFAULT; + else { + if (copy_from_user(&to->dan_chars[DMATTR_PREFIXLEN], from, len)) + to->dan_chars[sizeof(to->dan_chars) - 1] = '\0'; + else if (to->dan_chars[DMATTR_PREFIXLEN] == '\0') + error = EINVAL; + else + to->dan_chars[DMATTR_PREFIXLEN + len - 1] = '\0'; + } + + return error; +} + + +/* + * Convert the XFS flags into their DMAPI flag equivalent for export + */ +STATIC uint +_xfs_dic2dmflags( + __uint16_t di_flags) +{ + uint flags = 0; + + if (di_flags & XFS_DIFLAG_ANY) { + if (di_flags & XFS_DIFLAG_REALTIME) + flags |= DM_XFLAG_REALTIME; + if (di_flags & XFS_DIFLAG_PREALLOC) + flags |= DM_XFLAG_PREALLOC; + if (di_flags & XFS_DIFLAG_IMMUTABLE) + flags |= DM_XFLAG_IMMUTABLE; + if (di_flags & XFS_DIFLAG_APPEND) + flags |= DM_XFLAG_APPEND; + if (di_flags & XFS_DIFLAG_SYNC) + flags |= DM_XFLAG_SYNC; + if (di_flags & XFS_DIFLAG_NOATIME) + flags |= DM_XFLAG_NOATIME; + if (di_flags & XFS_DIFLAG_NODUMP) + flags |= DM_XFLAG_NODUMP; + } + return flags; +} + +STATIC uint +xfs_ip2dmflags( + xfs_inode_t *ip) +{ + return _xfs_dic2dmflags(ip->i_d.di_flags) | + (XFS_IFORK_Q(ip) ? DM_XFLAG_HASATTR : 0); +} + +STATIC uint +xfs_dic2dmflags( + xfs_dinode_t *dip) +{ + return _xfs_dic2dmflags(be16_to_cpu(dip->di_flags)) | + (XFS_DFORK_Q(dip) ? DM_XFLAG_HASATTR : 0); +} + +/* + * This copies selected fields in an inode into a dm_stat structure. Because + * these fields must return the same values as they would in stat(), the + * majority of this code was copied directly from xfs_getattr(). Any future + * changes to xfs_gettattr() must also be reflected here. + */ +STATIC void +xfs_dip_to_stat( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + dm_stat_t *buf) +{ + xfs_dinode_t *dic = dip; + + /* + * The inode format changed when we moved the link count and + * made it 32 bits long. If this is an old format inode, + * convert it in memory to look like a new one. If it gets + * flushed to disk we will convert back before flushing or + * logging it. We zero out the new projid field and the old link + * count field. We'll handle clearing the pad field (the remains + * of the old uuid field) when we actually convert the inode to + * the new format. We don't change the version number so that we + * can distinguish this from a real new format inode. + */ + if (dic->di_version == 1) { + buf->dt_nlink = be16_to_cpu(dic->di_onlink); + /*buf->dt_xfs_projid = 0;*/ + } else { + buf->dt_nlink = be32_to_cpu(dic->di_nlink); + /*buf->dt_xfs_projid = be16_to_cpu(dic->di_projid);*/ + } + buf->dt_ino = ino; + buf->dt_dev = new_encode_dev(mp->m_ddev_targp->bt_dev); + buf->dt_mode = be16_to_cpu(dic->di_mode); + buf->dt_uid = be32_to_cpu(dic->di_uid); + buf->dt_gid = be32_to_cpu(dic->di_gid); + buf->dt_size = be64_to_cpu(dic->di_size); + buf->dt_atime = be32_to_cpu(dic->di_atime.t_sec); + buf->dt_mtime = be32_to_cpu(dic->di_mtime.t_sec); + buf->dt_ctime = be32_to_cpu(dic->di_ctime.t_sec); + buf->dt_xfs_xflags = xfs_dic2dmflags(dip); + buf->dt_xfs_extsize = + be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; + buf->dt_xfs_extents = be32_to_cpu(dic->di_nextents); + buf->dt_xfs_aextents = be16_to_cpu(dic->di_anextents); + buf->dt_xfs_igen = be32_to_cpu(dic->di_gen); + buf->dt_xfs_dmstate = be16_to_cpu(dic->di_dmstate); + + switch (dic->di_format) { + case XFS_DINODE_FMT_DEV: + buf->dt_rdev = xfs_dinode_get_rdev(dic); + buf->dt_blksize = BLKDEV_IOSIZE; + buf->dt_blocks = 0; + break; + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_UUID: + buf->dt_rdev = 0; + buf->dt_blksize = mp->m_sb.sb_blocksize; + buf->dt_blocks = 0; + break; + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + buf->dt_rdev = 0; + buf->dt_blksize = mp->m_sb.sb_blocksize; + buf->dt_blocks = + XFS_FSB_TO_BB(mp, be64_to_cpu(dic->di_nblocks)); + break; + } + + memset(&buf->dt_pad1, 0, sizeof(buf->dt_pad1)); + memset(&buf->dt_pad2, 0, sizeof(buf->dt_pad2)); + memset(&buf->dt_pad3, 0, sizeof(buf->dt_pad3)); + + /* Finally fill in the DMAPI specific fields */ + buf->dt_pers = 0; + buf->dt_change = 0; + buf->dt_nevents = DM_EVENT_MAX; + buf->dt_emask = be32_to_cpu(dic->di_dmevmask); + buf->dt_dtime = be32_to_cpu(dic->di_ctime.t_sec); + /* Set if one of READ, WRITE or TRUNCATE bits is set in emask */ + buf->dt_pmanreg = (DMEV_ISSET(DM_EVENT_READ, buf->dt_emask) || + DMEV_ISSET(DM_EVENT_WRITE, buf->dt_emask) || + DMEV_ISSET(DM_EVENT_TRUNCATE, buf->dt_emask)) ? 1 : 0; +} + +/* + * Pull out both ondisk and incore fields, incore has preference. + * The inode must be kept locked SHARED by the caller. + */ +STATIC void +xfs_ip_to_stat( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_inode_t *ip, + dm_stat_t *buf) +{ + xfs_icdinode_t *dic = &ip->i_d; + + buf->dt_ino = ino; + buf->dt_nlink = dic->di_nlink; + /*buf->dt_xfs_projid = dic->di_projid;*/ + buf->dt_mode = dic->di_mode; + buf->dt_uid = dic->di_uid; + buf->dt_gid = dic->di_gid; + buf->dt_size = XFS_ISIZE(ip); + buf->dt_dev = new_encode_dev(mp->m_ddev_targp->bt_dev); + buf->dt_atime = VFS_I(ip)->i_atime.tv_sec; + buf->dt_mtime = dic->di_mtime.t_sec; + buf->dt_ctime = dic->di_ctime.t_sec; + buf->dt_xfs_xflags = xfs_ip2dmflags(ip); + buf->dt_xfs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; + buf->dt_xfs_extents = dic->di_nextents; + buf->dt_xfs_aextents = dic->di_anextents; + buf->dt_xfs_igen = dic->di_gen; + buf->dt_xfs_dmstate = dic->di_dmstate; + + switch (dic->di_format) { + case XFS_DINODE_FMT_DEV: + buf->dt_rdev = ip->i_df.if_u2.if_rdev; + buf->dt_blksize = BLKDEV_IOSIZE; + buf->dt_blocks = 0; + break; + case XFS_DINODE_FMT_LOCAL: + case XFS_DINODE_FMT_UUID: + buf->dt_rdev = 0; + buf->dt_blksize = mp->m_sb.sb_blocksize; + buf->dt_blocks = 0; + break; + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + buf->dt_rdev = 0; + buf->dt_blksize = mp->m_sb.sb_blocksize; + buf->dt_blocks = XFS_FSB_TO_BB(mp, + (dic->di_nblocks + ip->i_delayed_blks)); + break; + } + + memset(&buf->dt_pad1, 0, sizeof(buf->dt_pad1)); + memset(&buf->dt_pad2, 0, sizeof(buf->dt_pad2)); + memset(&buf->dt_pad3, 0, sizeof(buf->dt_pad3)); + + /* Finally fill in the DMAPI specific fields */ + buf->dt_pers = 0; + buf->dt_change = 0; + buf->dt_nevents = DM_EVENT_MAX; + buf->dt_emask = dic->di_dmevmask; + buf->dt_dtime = dic->di_ctime.t_sec; + /* Set if one of READ, WRITE or TRUNCATE bits is set in emask */ + buf->dt_pmanreg = (DMEV_ISSET(DM_EVENT_READ, buf->dt_emask) || + DMEV_ISSET(DM_EVENT_WRITE, buf->dt_emask) || + DMEV_ISSET(DM_EVENT_TRUNCATE, buf->dt_emask)) ? 1 : 0; +} + +/* + * Take the handle and put it at the end of a dm_xstat buffer. + * dt_compname is unused in bulkstat - so we zero it out. + * Finally, update link in dm_xstat_t to point to next struct. + */ +STATIC void +xfs_dm_handle_to_xstat( + dm_xstat_t *xbuf, + size_t xstat_sz, + dm_handle_t *handle, + size_t handle_sz) +{ + dm_stat_t *sbuf = &xbuf->dx_statinfo; + + memcpy(xbuf + 1, handle, handle_sz); + sbuf->dt_handle.vd_offset = (ssize_t) sizeof(dm_xstat_t); + sbuf->dt_handle.vd_length = (size_t) DM_HSIZE(*handle); + memset(&sbuf->dt_compname, 0, sizeof(dm_vardata_t)); + sbuf->_link = xstat_sz; +} + +STATIC int +xfs_dm_bulkall_iget_one( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_daddr_t bno, + int *value_lenp, + dm_xstat_t *xbuf, + u_int *xstat_szp, + char *attr_name, + caddr_t attr_buf) +{ + xfs_inode_t *ip; + dm_handle_t handle; + u_int xstat_sz = *xstat_szp; + int value_len = *value_lenp; + int error; + + error = xfs_iget(mp, NULL, ino, + XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); + if (error) + return error; + + xfs_ip_to_stat(mp, ino, ip, &xbuf->dx_statinfo); + dm_ip_to_handle(&ip->i_vnode, &handle); + xfs_dm_handle_to_xstat(xbuf, xstat_sz, &handle, sizeof(handle)); + + /* Drop ILOCK_SHARED for call to xfs_attr_get */ + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + memset(&xbuf->dx_attrdata, 0, sizeof(dm_vardata_t)); + error = xfs_attr_get(ip, attr_name, attr_buf, &value_len, ATTR_ROOT); + iput(&ip->i_vnode); + + DM_EA_XLATE_ERR(error); + if (error && (error != ENOATTR)) { + if (error == E2BIG) + error = ENOMEM; + return error; + } + + /* How much space was in the attr? */ + if (error != ENOATTR) { + xbuf->dx_attrdata.vd_offset = xstat_sz; + xbuf->dx_attrdata.vd_length = value_len; + xstat_sz += (value_len+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + } + *xstat_szp = xbuf->dx_statinfo._link = xstat_sz; + *value_lenp = value_len; + return 0; +} + + +STATIC int +xfs_dm_inline_attr( + xfs_mount_t *mp, + xfs_dinode_t *dip, + char *attr_name, + caddr_t attr_buf, + int *value_lenp) +{ + if (dip->di_aformat == XFS_DINODE_FMT_LOCAL) { + xfs_attr_shortform_t *sf; + xfs_attr_sf_entry_t *sfe; + unsigned int namelen = strlen(attr_name); + unsigned int valuelen = *value_lenp; + int i; + + sf = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); + sfe = &sf->list[0]; + for (i = 0; i < sf->hdr.count; + sfe = XFS_ATTR_SF_NEXTENTRY(sfe), i++) { + if (sfe->namelen != namelen) + continue; + if (!(sfe->flags & XFS_ATTR_ROOT)) + continue; + if (memcmp(attr_name, sfe->nameval, namelen) != 0) + continue; + if (valuelen < sfe->valuelen) + return ERANGE; + valuelen = sfe->valuelen; + memcpy(attr_buf, &sfe->nameval[namelen], valuelen); + *value_lenp = valuelen; + return 0; + } + } + *value_lenp = 0; + return ENOATTR; +} + +STATIC void +dm_dip_to_handle( + xfs_ino_t ino, + xfs_dinode_t *dip, + dm_fsid_t *fsid, + dm_handle_t *handlep) +{ + dm_fid_t fid; + int hsize; + + fid.dm_fid_len = sizeof(struct dm_fid) - sizeof(fid.dm_fid_len); + fid.dm_fid_pad = 0; + fid.dm_fid_ino = ino; + fid.dm_fid_gen = be32_to_cpu(dip->di_gen); + + memcpy(&handlep->ha_fsid, fsid, sizeof(*fsid)); + memcpy(&handlep->ha_fid, &fid, fid.dm_fid_len + sizeof(fid.dm_fid_len)); + hsize = DM_HSIZE(*handlep); + memset((char *)handlep + hsize, 0, sizeof(*handlep) - hsize); +} + +STATIC int +xfs_dm_bulkall_inline_one( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + dm_fsid_t *fsid, + int *value_lenp, + dm_xstat_t *xbuf, + u_int *xstat_szp, + char *attr_name, + caddr_t attr_buf) +{ + dm_handle_t handle; + u_int xstat_sz = *xstat_szp; + int value_len = *value_lenp; + int error; + + if (dip->di_mode == 0) + return ENOENT; + + xfs_dip_to_stat(mp, ino, dip, &xbuf->dx_statinfo); + dm_dip_to_handle(ino, dip, fsid, &handle); + xfs_dm_handle_to_xstat(xbuf, xstat_sz, &handle, sizeof(handle)); + + memset(&xbuf->dx_attrdata, 0, sizeof(dm_vardata_t)); + error = xfs_dm_inline_attr(mp, dip, attr_name, attr_buf, &value_len); + DM_EA_XLATE_ERR(error); + if (error && (error != ENOATTR)) { + if (error == E2BIG) + error = ENOMEM; + return error; + } + + /* How much space was in the attr? */ + if (error != ENOATTR) { + xbuf->dx_attrdata.vd_offset = xstat_sz; + xbuf->dx_attrdata.vd_length = value_len; + xstat_sz += (value_len+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + } + *xstat_szp = xbuf->dx_statinfo._link = xstat_sz; + *value_lenp = value_len; + return 0; +} + +/* + * This is used by dm_get_bulkall(). + * Given a inumber, it igets the inode and fills the given buffer + * with the dm_xstat structure for the file. + */ +STATIC int +xfs_dm_bulkall_one( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting block of inode cluster */ + int *ubused, /* amount of buffer we used */ + void *dibuff, /* on-disk inode buffer */ + int *res) /* bulkstat result code */ +{ + dm_xstat_t *xbuf; + u_int xstat_sz; + int error; + int value_len; + int kern_buf_sz; + int attr_buf_sz; + caddr_t attr_buf; + void __user *attr_user_buf; + dm_bulkstat_one_t *dmb = (dm_bulkstat_one_t*)private_data; + + /* Returns positive errors to XFS */ + + *res = BULKSTAT_RV_NOTHING; + + if (!buffer || xfs_internal_inum(mp, ino)) + return EINVAL; + + xstat_sz = DM_STAT_SIZE(*xbuf, 0); + xstat_sz = (xstat_sz + (DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + if (xstat_sz > ubsize) + return ENOMEM; + + kern_buf_sz = xstat_sz; + xbuf = kmem_alloc(kern_buf_sz, KM_SLEEP); + + /* Determine place to drop attr value, and available space. */ + value_len = ubsize - xstat_sz; + if (value_len > ATTR_MAX_VALUELEN) + value_len = ATTR_MAX_VALUELEN; + + attr_user_buf = buffer + xstat_sz; + attr_buf_sz = value_len; + attr_buf = kmem_alloc(attr_buf_sz, KM_SLEEP); + + if (!dibuff) + error = xfs_dm_bulkall_iget_one(mp, ino, bno, + &value_len, xbuf, &xstat_sz, + dmb->attrname.dan_chars, + attr_buf); + else + error = xfs_dm_bulkall_inline_one(mp, ino, + (xfs_dinode_t *)dibuff, + &dmb->fsid, + &value_len, xbuf, &xstat_sz, + dmb->attrname.dan_chars, + attr_buf); + if (error) + goto out_free_buffers; + + if (copy_to_user(buffer, xbuf, kern_buf_sz)) { + error = EFAULT; + goto out_free_buffers; + } + if (copy_to_user(attr_user_buf, attr_buf, value_len)) { + error = EFAULT; + goto out_free_buffers; + } + + kmem_free(attr_buf); + kmem_free(xbuf); + + *res = BULKSTAT_RV_DIDONE; + if (ubused) + *ubused = xstat_sz; + dmb->laststruct = buffer; + return 0; + + out_free_buffers: + kmem_free(attr_buf); + kmem_free(xbuf); + return error; +} + +/* + * Take the handle and put it at the end of a dm_stat buffer. + * dt_compname is unused in bulkstat - so we zero it out. + * Finally, update link in dm_stat_t to point to next struct. + */ +STATIC void +xfs_dm_handle_to_stat( + dm_stat_t *sbuf, + size_t stat_sz, + dm_handle_t *handle, + size_t handle_sz) +{ + memcpy(sbuf + 1, handle, handle_sz); + sbuf->dt_handle.vd_offset = (ssize_t) sizeof(dm_stat_t); + sbuf->dt_handle.vd_length = (size_t) DM_HSIZE(*handle); + memset(&sbuf->dt_compname, 0, sizeof(dm_vardata_t)); + sbuf->_link = stat_sz; +} + +STATIC int +xfs_dm_bulkattr_iget_one( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_daddr_t bno, + dm_stat_t *sbuf, + u_int stat_sz) +{ + xfs_inode_t *ip; + dm_handle_t handle; + int error; + + error = xfs_iget(mp, NULL, ino, + XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); + if (error) + return error; + + xfs_ip_to_stat(mp, ino, ip, sbuf); + dm_ip_to_handle(&ip->i_vnode, &handle); + xfs_dm_handle_to_stat(sbuf, stat_sz, &handle, sizeof(handle)); + + xfs_iput(ip, XFS_ILOCK_SHARED); + return 0; +} + +STATIC int +xfs_dm_bulkattr_inline_one( + xfs_mount_t *mp, + xfs_ino_t ino, + xfs_dinode_t *dip, + dm_fsid_t *fsid, + dm_stat_t *sbuf, + u_int stat_sz) +{ + dm_handle_t handle; + + if (dip->di_mode == 0) + return ENOENT; + xfs_dip_to_stat(mp, ino, dip, sbuf); + dm_dip_to_handle(ino, dip, fsid, &handle); + xfs_dm_handle_to_stat(sbuf, stat_sz, &handle, sizeof(handle)); + return 0; +} + +/* + * This is used by dm_get_bulkattr(). + * Given a inumber, it igets the inode and fills the given buffer + * with the dm_stat structure for the file. + */ +STATIC int +xfs_dm_bulkattr_one( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting block of inode cluster */ + int *ubused, /* amount of buffer we used */ + void *dibuff, /* on-disk inode buffer */ + int *res) /* bulkstat result code */ +{ + dm_stat_t *sbuf; + u_int stat_sz; + int error; + dm_bulkstat_one_t *dmb = (dm_bulkstat_one_t*)private_data; + + /* Returns positive errors to XFS */ + + *res = BULKSTAT_RV_NOTHING; + + if (!buffer || xfs_internal_inum(mp, ino)) + return EINVAL; + + stat_sz = DM_STAT_SIZE(*sbuf, 0); + stat_sz = (stat_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + if (stat_sz > ubsize) + return ENOMEM; + + sbuf = kmem_alloc(stat_sz, KM_SLEEP); + + if (!dibuff) + error = xfs_dm_bulkattr_iget_one(mp, ino, bno, sbuf, stat_sz); + else + error = xfs_dm_bulkattr_inline_one(mp, ino, + (xfs_dinode_t *)dibuff, + &dmb->fsid, sbuf, stat_sz); + if (error) + goto out_free_buffer; + + if (copy_to_user(buffer, sbuf, stat_sz)) { + error = EFAULT; + goto out_free_buffer; + } + + kmem_free(sbuf); + *res = BULKSTAT_RV_DIDONE; + if (ubused) + *ubused = stat_sz; + dmb->laststruct = buffer; + return 0; + + out_free_buffer: + kmem_free(sbuf); + return error; +} + +/* xfs_dm_f_get_eventlist - return the dm_eventset_t mask for inode ip. */ + +STATIC int +xfs_dm_f_get_eventlist( + xfs_inode_t *ip, + dm_right_t right, + u_int nelem, + dm_eventset_t *eventsetp, /* in kernel space! */ + u_int *nelemp) /* in kernel space! */ +{ + dm_eventset_t eventset; + + if (right < DM_RIGHT_SHARED) + return(EACCES); + + /* Note that we MUST return a regular file's managed region bits as + part of the mask because dm_get_eventlist is supposed to return the + union of all managed region flags in those bits. Since we only + support one region, we can just return the bits as they are. For + all other object types, the bits will already be zero. Handy, huh? + */ + + eventset = ip->i_d.di_dmevmask; + + /* Now copy the event mask and event count back to the caller. We + return the lesser of nelem and DM_EVENT_MAX. + */ + + if (nelem > DM_EVENT_MAX) + nelem = DM_EVENT_MAX; + eventset &= (1 << nelem) - 1; + + *eventsetp = eventset; + *nelemp = nelem; + return(0); +} + + +/* xfs_dm_f_set_eventlist - update the dm_eventset_t mask in the inode vp. Only the + bits from zero to maxevent-1 are being replaced; higher bits are preserved. +*/ + +STATIC int +xfs_dm_f_set_eventlist( + xfs_inode_t *ip, + dm_right_t right, + dm_eventset_t *eventsetp, /* in kernel space! */ + u_int maxevent) +{ + dm_eventset_t eventset; + dm_eventset_t max_mask; + dm_eventset_t valid_events; + xfs_trans_t *tp; + xfs_mount_t *mp; + int error; + + if (right < DM_RIGHT_EXCL) + return(EACCES); + + eventset = *eventsetp; + if (maxevent >= sizeof(ip->i_d.di_dmevmask) * NBBY) + return(EINVAL); + max_mask = (1 << maxevent) - 1; + + if (S_ISDIR(ip->i_d.di_mode)) { + valid_events = DM_XFS_VALID_DIRECTORY_EVENTS; + } else { /* file or symlink */ + valid_events = DM_XFS_VALID_FILE_EVENTS; + } + if ((eventset & max_mask) & ~valid_events) + return(EINVAL); + + /* Adjust the event mask so that the managed region bits will not + be altered. + */ + + max_mask &= ~(1 <i_mount; + tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return(error); + } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + ip->i_d.di_dmevmask = (eventset & max_mask) | (ip->i_d.di_dmevmask & ~max_mask); + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + igrab(&ip->i_vnode); + xfs_trans_commit(tp, 0); + + return(0); +} + + +/* xfs_dm_fs_get_eventlist - return the dm_eventset_t mask for filesystem vfsp. */ + +STATIC int +xfs_dm_fs_get_eventlist( + xfs_mount_t *mp, + dm_right_t right, + u_int nelem, + dm_eventset_t *eventsetp, /* in kernel space! */ + u_int *nelemp) /* in kernel space! */ +{ + dm_eventset_t eventset; + + if (right < DM_RIGHT_SHARED) + return(EACCES); + + eventset = mp->m_dmevmask; + + /* Now copy the event mask and event count back to the caller. We + return the lesser of nelem and DM_EVENT_MAX. + */ + + if (nelem > DM_EVENT_MAX) + nelem = DM_EVENT_MAX; + eventset &= (1 << nelem) - 1; + + *eventsetp = eventset; + *nelemp = nelem; + return(0); +} + + +/* xfs_dm_fs_set_eventlist - update the dm_eventset_t mask in the mount structure for + filesystem vfsp. Only the bits from zero to maxevent-1 are being replaced; + higher bits are preserved. +*/ + +STATIC int +xfs_dm_fs_set_eventlist( + xfs_mount_t *mp, + dm_right_t right, + dm_eventset_t *eventsetp, /* in kernel space! */ + u_int maxevent) +{ + dm_eventset_t eventset; + dm_eventset_t max_mask; + + if (right < DM_RIGHT_EXCL) + return(EACCES); + + eventset = *eventsetp; + + if (maxevent >= sizeof(mp->m_dmevmask) * NBBY) + return(EINVAL); + max_mask = (1 << maxevent) - 1; + + if ((eventset & max_mask) & ~DM_XFS_VALID_FS_EVENTS) + return(EINVAL); + + mp->m_dmevmask = (eventset & max_mask) | (mp->m_dmevmask & ~max_mask); + return(0); +} + + +/* Code in this routine must exactly match the logic in xfs_diordwr() in + order for this to work! +*/ + +STATIC int +xfs_dm_direct_ok( + xfs_inode_t *ip, + dm_off_t off, + dm_size_t len, + void __user *bufp) +{ + xfs_mount_t *mp; + + mp = ip->i_mount; + + /* Realtime files can ONLY do direct I/O. */ + + if (XFS_IS_REALTIME_INODE(ip)) + return(1); + + /* If direct I/O is disabled, or if the request is too small, use + buffered I/O. + */ + + if (!dm_min_dio_xfer || len < dm_min_dio_xfer) + return(0); + +#if 0 + /* If the request is not well-formed or is too large, use + buffered I/O. + */ + + if ((__psint_t)bufp & scache_linemask) /* if buffer not aligned */ + return(0); + if (off & mp->m_blockmask) /* if file offset not aligned */ + return(0); + if (len & mp->m_blockmask) /* if xfer length not aligned */ + return(0); + if (len > ctooff(v.v_maxdmasz - 1)) /* if transfer too large */ + return(0); + + /* A valid direct I/O candidate. */ + + return(1); +#else + return(0); +#endif +} + + +/* We need to be able to select various combinations of O_NONBLOCK, + O_DIRECT, and O_SYNC, yet we don't have a file descriptor and we don't have + the file's pathname. All we have is a handle. +*/ + +STATIC int +xfs_dm_rdwr( + struct inode *inode, + uint fflag, + mode_t fmode, + dm_off_t off, + dm_size_t len, + void __user *bufp, + int *rvp) +{ + const struct cred *cred = current_cred(); + xfs_inode_t *ip = XFS_I(inode); + int error; + int oflags; + ssize_t xfer; + struct file *file; + struct dentry *dentry; + + if ((off < 0) || (off > i_size_read(inode)) || !S_ISREG(inode->i_mode)) + return EINVAL; + + if (fmode & FMODE_READ) { + oflags = O_RDONLY; + } else { + oflags = O_WRONLY; + } + + /* + * Build file descriptor flags and I/O flags. O_NONBLOCK is needed so + * that we don't block on mandatory file locks. This is an invisible IO, + * don't change the atime. + */ + + oflags |= O_LARGEFILE | O_NONBLOCK | O_NOATIME; + if (xfs_dm_direct_ok(ip, off, len, bufp)) + oflags |= O_DIRECT; + + if (fflag & O_SYNC) + oflags |= O_SYNC; + + if (inode->i_fop == NULL) { + /* no iput; caller did get, and will do put */ + return EINVAL; + } + + igrab(inode); + + dentry = d_obtain_alias(inode); + if (dentry == NULL) { + iput(inode); + return ENOMEM; + } + + file = dentry_open(dentry, mntget(ip->i_mount->m_vfsmount), oflags, + cred); + if (IS_ERR(file)) { + return -PTR_ERR(file); + } + file->f_mode |= FMODE_NOCMTIME; + + if (fmode & FMODE_READ) { + xfer = file->f_op->read(file, bufp, len, (loff_t*)&off); + } else { + xfer = file->f_op->write(file, bufp, len, (loff_t*)&off); + } + + if (xfer >= 0) { + *rvp = xfer; + error = 0; + } else { + /* xfs_read/xfs_write return negative error--flip it */ + error = -(int)xfer; + } + + fput(file); + return error; +} + +/* ARGSUSED */ +STATIC int +xfs_dm_clear_inherit( + struct inode *inode, + dm_right_t right, + dm_attrname_t __user *attrnamep) +{ + return(-ENOSYS); /* Return negative error to DMAPI */ +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_create_by_handle( + struct inode *inode, + dm_right_t right, + void __user *hanp, + size_t hlen, + char __user *cname) +{ + return(-ENOSYS); /* Return negative error to DMAPI */ +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_downgrade_right( + struct inode *inode, + dm_right_t right, + u_int type) /* DM_FSYS_OBJ or zero */ +{ +#ifdef DEBUG_RIGHTS + char buffer[sizeof(dm_handle_t) * 2 + 1]; + + if (!xfs_vp_to_hexhandle(inode, type, buffer)) { + printf("dm_downgrade_right: old %d new %d type %d handle %s\n", + right, DM_RIGHT_SHARED, type, buffer); + } else { + printf("dm_downgrade_right: old %d new %d type %d handle " + "\n", right, DM_RIGHT_SHARED, type); + } +#endif /* DEBUG_RIGHTS */ + return(0); +} + + +/* Note: xfs_dm_get_allocinfo() makes no attempt to coalesce two adjacent + extents when both are of type DM_EXTENT_RES; this is left to the caller. + XFS guarantees that there will never be two adjacent DM_EXTENT_HOLE extents. + + In order to provide the caller with all extents in a file including + those beyond the file's last byte offset, we have to use the xfs_bmapi() + interface. +*/ + +STATIC int +xfs_dm_get_allocinfo_rvp( + struct inode *inode, + dm_right_t right, + dm_off_t __user *offp, + u_int nelem, + dm_extent_t __user *extentp, + u_int __user *nelemp, + int *rvp) +{ + xfs_inode_t *ip = XFS_I(inode); + xfs_mount_t *mp; /* file system mount point */ + xfs_fileoff_t fsb_offset; + xfs_filblks_t fsb_length; + dm_off_t startoff; + int elem; + xfs_bmbt_irec_t *bmp = NULL; + u_int bmpcnt = 50; + u_int bmpsz = sizeof(xfs_bmbt_irec_t) * bmpcnt; + int error = 0; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + if ((inode->i_mode & S_IFMT) != S_IFREG) + return(-EINVAL); + + if (copy_from_user( &startoff, offp, sizeof(startoff))) + return(-EFAULT); + + mp = ip->i_mount; + ASSERT(mp); + + if (startoff > XFS_MAXIOFFSET(mp)) + return(-EINVAL); + + if (nelem == 0) + return(-EINVAL); + + /* Convert the caller's starting offset into filesystem allocation + units as required by xfs_bmapi(). Round the offset down so that + it is sure to be included in the reply. + */ + + fsb_offset = XFS_B_TO_FSBT(mp, startoff); + fsb_length = XFS_B_TO_FSB(mp, XFS_MAXIOFFSET(mp)) - fsb_offset; + elem = 0; + + if (fsb_length) + bmp = kmem_alloc(bmpsz, KM_SLEEP); + + while (fsb_length && elem < nelem) { + dm_extent_t extent; + xfs_filblks_t fsb_bias; + dm_size_t bias; + int lock; + int num; + int i; + + /* Compute how many getbmap structures to use on the xfs_bmapi + call. + */ + + num = MIN((u_int)(nelem - elem), bmpcnt); + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock = xfs_ilock_map_shared(ip); + + error = xfs_bmapi(NULL, ip, fsb_offset, fsb_length, + XFS_BMAPI_ENTIRE, NULL, 0, bmp, &num, NULL, NULL); + + xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + if (error) { + error = -error; /* Return negative error to DMAPI */ + goto finish_out; + } + + /* Fill in the caller's extents, adjusting the bias in the + first entry if necessary. + */ + + for (i = 0; i < num; i++, extentp++) { + bias = startoff - XFS_FSB_TO_B(mp, bmp[i].br_startoff); + extent.ex_offset = startoff; + extent.ex_length = + XFS_FSB_TO_B(mp, bmp[i].br_blockcount) - bias; + if (bmp[i].br_startblock == HOLESTARTBLOCK) { + extent.ex_type = DM_EXTENT_HOLE; + } else { + extent.ex_type = DM_EXTENT_RES; + } + startoff = extent.ex_offset + extent.ex_length; + + if (copy_to_user( extentp, &extent, sizeof(extent))) { + error = -EFAULT; + goto finish_out; + } + + fsb_bias = fsb_offset - bmp[i].br_startoff; + fsb_offset += bmp[i].br_blockcount - fsb_bias; + fsb_length -= bmp[i].br_blockcount - fsb_bias; + elem++; + } + } + + if (fsb_length == 0) { + startoff = 0; + } + if (copy_to_user( offp, &startoff, sizeof(startoff))) { + error = -EFAULT; + goto finish_out; + } + + if (copy_to_user( nelemp, &elem, sizeof(elem))) { + error = -EFAULT; + goto finish_out; + } + + *rvp = (fsb_length == 0 ? 0 : 1); + +finish_out: + if (bmp) + kmem_free(bmp); + return(error); +} + + +STATIC int +xfs_dm_zero_xstatinfo_link( + dm_xstat_t __user *dxs) +{ + dm_xstat_t *ldxs; + int error = 0; + + if (!dxs) + return 0; + ldxs = kmalloc(sizeof(*ldxs), GFP_KERNEL); + if (!ldxs) + return -ENOMEM; + if (copy_from_user(ldxs, dxs, sizeof(*dxs))) { + error = -EFAULT; + } else { + ldxs->dx_statinfo._link = 0; + if (copy_to_user(dxs, ldxs, sizeof(*dxs))) + error = -EFAULT; + } + kfree(ldxs); + return error; +} + +/* ARGSUSED */ +STATIC int +xfs_dm_get_bulkall_rvp( + struct inode *inode, + dm_right_t right, + u_int mask, + dm_attrname_t __user *attrnamep, + dm_attrloc_t __user *locp, + size_t buflen, + void __user *bufp, /* address of buffer in user space */ + size_t __user *rlenp, /* user space address */ + int *rvalp) +{ + int error, done; + int nelems; + u_int statstruct_sz; + dm_attrloc_t loc; + xfs_mount_t *mp = XFS_I(inode)->i_mount; + dm_attrname_t attrname; + dm_bulkstat_one_t dmb; + + /* Returns negative errors to DMAPI */ + + if (copy_from_user(&attrname, attrnamep, sizeof(attrname)) || + copy_from_user(&loc, locp, sizeof(loc))) + return -EFAULT; + + if (attrname.an_chars[0] == '\0') + return(-EINVAL); + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + /* Because we will write directly to the user's buffer, make sure that + the buffer is properly aligned. + */ + + if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0) + return(-EFAULT); + + /* Size of the handle is constant for this function. + * If there are no files with attributes, then this will be the + * maximum number of inodes we can get. + */ + + statstruct_sz = DM_STAT_SIZE(dm_xstat_t, 0); + statstruct_sz = (statstruct_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + + nelems = buflen / statstruct_sz; + if (nelems < 1) { + if (put_user( statstruct_sz, rlenp )) + return(-EFAULT); + return(-E2BIG); + } + + /* Build the on-disk version of the attribute name. */ + strcpy(dmb.attrname.dan_chars, dmattr_prefix); + strncpy(&dmb.attrname.dan_chars[DMATTR_PREFIXLEN], + attrname.an_chars, DM_ATTR_NAME_SIZE + 1); + dmb.attrname.dan_chars[sizeof(dmb.attrname.dan_chars) - 1] = '\0'; + + /* + * fill the buffer with dm_xstat_t's + */ + + dmb.laststruct = NULL; + memcpy(&dmb.fsid, mp->m_fixedfsid, sizeof(dm_fsid_t)); + error = xfs_bulkstat(mp, (xfs_ino_t *)&loc, &nelems, + xfs_dm_bulkall_one, (void*)&dmb, statstruct_sz, + bufp, BULKSTAT_FG_INLINE, &done); + if (error) + return(-error); /* Return negative error to DMAPI */ + + *rvalp = !done ? 1 : 0; + + if (put_user( statstruct_sz * nelems, rlenp )) + return(-EFAULT); + + if (copy_to_user( locp, &loc, sizeof(loc))) + return(-EFAULT); + /* + * If we didn't do any, we must not have any more to do. + */ + if (nelems < 1) + return(0); + /* + * Set _link in the last struct to zero + */ + return xfs_dm_zero_xstatinfo_link((dm_xstat_t __user *)dmb.laststruct); +} + + +STATIC int +xfs_dm_zero_statinfo_link( + dm_stat_t __user *dxs) +{ + dm_stat_t *ldxs; + int error = 0; + + if (!dxs) + return 0; + ldxs = kmalloc(sizeof(*ldxs), GFP_KERNEL); + if (!ldxs) + return -ENOMEM; + if (copy_from_user(ldxs, dxs, sizeof(*dxs))) { + error = -EFAULT; + } else { + ldxs->_link = 0; + if (copy_to_user(dxs, ldxs, sizeof(*dxs))) + error = -EFAULT; + } + kfree(ldxs); + return error; +} + +/* ARGSUSED */ +STATIC int +xfs_dm_get_bulkattr_rvp( + struct inode *inode, + dm_right_t right, + u_int mask, + dm_attrloc_t __user *locp, + size_t buflen, + void __user *bufp, + size_t __user *rlenp, + int *rvalp) +{ + int error, done; + int nelems; + u_int statstruct_sz; + dm_attrloc_t loc; + xfs_mount_t *mp = XFS_I(inode)->i_mount; + dm_bulkstat_one_t dmb; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + if (copy_from_user( &loc, locp, sizeof(loc))) + return(-EFAULT); + + /* Because we will write directly to the user's buffer, make sure that + the buffer is properly aligned. + */ + + if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0) + return(-EFAULT); + + /* size of the handle is constant for this function */ + + statstruct_sz = DM_STAT_SIZE(dm_stat_t, 0); + statstruct_sz = (statstruct_sz+(DM_STAT_ALIGN-1)) & ~(DM_STAT_ALIGN-1); + + nelems = buflen / statstruct_sz; + if (nelems < 1) { + if (put_user( statstruct_sz, rlenp )) + return(-EFAULT); + return(-E2BIG); + } + + dmb.laststruct = NULL; + memcpy(&dmb.fsid, mp->m_fixedfsid, sizeof(dm_fsid_t)); + error = xfs_bulkstat(mp, (xfs_ino_t *)&loc, &nelems, + xfs_dm_bulkattr_one, (void*)&dmb, + statstruct_sz, bufp, BULKSTAT_FG_INLINE, &done); + if (error) + return(-error); /* Return negative error to DMAPI */ + + *rvalp = !done ? 1 : 0; + + if (put_user( statstruct_sz * nelems, rlenp )) + return(-EFAULT); + + if (copy_to_user( locp, &loc, sizeof(loc))) + return(-EFAULT); + + /* + * If we didn't do any, we must not have any more to do. + */ + if (nelems < 1) + return(0); + /* + * Set _link in the last struct to zero + */ + return xfs_dm_zero_statinfo_link((dm_stat_t __user *)dmb.laststruct); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_get_config( + struct inode *inode, + dm_right_t right, + dm_config_t flagname, + dm_size_t __user *retvalp) +{ + dm_size_t retval; + + /* Returns negative errors to DMAPI */ + + switch (flagname) { + case DM_CONFIG_DTIME_OVERLOAD: + case DM_CONFIG_PERS_ATTRIBUTES: + case DM_CONFIG_PERS_EVENTS: + case DM_CONFIG_PERS_MANAGED_REGIONS: + case DM_CONFIG_PUNCH_HOLE: + case DM_CONFIG_WILL_RETRY: + retval = DM_TRUE; + break; + + case DM_CONFIG_CREATE_BY_HANDLE: /* these will never be done */ + case DM_CONFIG_LOCK_UPGRADE: + case DM_CONFIG_PERS_INHERIT_ATTRIBS: + retval = DM_FALSE; + break; + + case DM_CONFIG_BULKALL: + retval = DM_TRUE; + break; + case DM_CONFIG_MAX_ATTR_ON_DESTROY: + retval = DM_MAX_ATTR_BYTES_ON_DESTROY; + break; + + case DM_CONFIG_MAX_ATTRIBUTE_SIZE: + retval = ATTR_MAX_VALUELEN; + break; + + case DM_CONFIG_MAX_HANDLE_SIZE: + retval = DM_MAX_HANDLE_SIZE; + break; + + case DM_CONFIG_MAX_MANAGED_REGIONS: + retval = 1; + break; + + case DM_CONFIG_TOTAL_ATTRIBUTE_SPACE: + retval = 0x7fffffff; /* actually it's unlimited */ + break; + + default: + return(-EINVAL); + } + + /* Copy the results back to the user. */ + + if (copy_to_user( retvalp, &retval, sizeof(retval))) + return(-EFAULT); + return(0); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_get_config_events( + struct inode *inode, + dm_right_t right, + u_int nelem, + dm_eventset_t __user *eventsetp, + u_int __user *nelemp) +{ + dm_eventset_t eventset; + + /* Returns negative errors to DMAPI */ + + if (nelem == 0) + return(-EINVAL); + + eventset = DM_XFS_SUPPORTED_EVENTS; + + /* Now copy the event mask and event count back to the caller. We + return the lesser of nelem and DM_EVENT_MAX. + */ + + if (nelem > DM_EVENT_MAX) + nelem = DM_EVENT_MAX; + eventset &= (1 << nelem) - 1; + + if (copy_to_user( eventsetp, &eventset, sizeof(eventset))) + return(-EFAULT); + + if (put_user(nelem, nelemp)) + return(-EFAULT); + return(0); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_get_destroy_dmattr( + struct inode *inode, + dm_right_t right, + dm_attrname_t *attrnamep, + char **valuepp, + int *vlenp) +{ + dm_dkattrname_t dkattrname; + int alloc_size; + int value_len; + char *value; + int error; + + /* Returns negative errors to DMAPI */ + + *vlenp = -1; /* assume failure by default */ + + if (attrnamep->an_chars[0] == '\0') + return(-EINVAL); + + /* Build the on-disk version of the attribute name. */ + + strcpy(dkattrname.dan_chars, dmattr_prefix); + strncpy(&dkattrname.dan_chars[DMATTR_PREFIXLEN], + (char *)attrnamep->an_chars, DM_ATTR_NAME_SIZE + 1); + dkattrname.dan_chars[sizeof(dkattrname.dan_chars) - 1] = '\0'; + + /* xfs_attr_get will not return anything if the buffer is too small, + and we don't know how big to make the buffer, so this may take + two tries to get it right. The initial try must use a buffer of + at least XFS_BUG_KLUDGE bytes to prevent buffer overflow because + of a bug in XFS. + */ + + alloc_size = XFS_BUG_KLUDGE; + value = kmalloc(alloc_size, GFP_KERNEL); + if (value == NULL) + return(-ENOMEM); + + error = xfs_attr_get(XFS_I(inode), dkattrname.dan_chars, value, + &value_len, ATTR_ROOT); + if (error == ERANGE) { + kfree(value); + alloc_size = value_len; + value = kmalloc(alloc_size, GFP_KERNEL); + if (value == NULL) + return(-ENOMEM); + + error = xfs_attr_get(XFS_I(inode), dkattrname.dan_chars, value, + &value_len, ATTR_ROOT); + } + if (error) { + kfree(value); + DM_EA_XLATE_ERR(error); + return(-error); /* Return negative error to DMAPI */ + } + + /* The attribute exists and has a value. Note that a value_len of + zero is valid! + */ + + if (value_len == 0) { + kfree(value); + *vlenp = 0; + return(0); + } else if (value_len > DM_MAX_ATTR_BYTES_ON_DESTROY) { + char *value2; + + value2 = kmalloc(DM_MAX_ATTR_BYTES_ON_DESTROY, GFP_KERNEL); + if (value2 == NULL) { + kfree(value); + return(-ENOMEM); + } + memcpy(value2, value, DM_MAX_ATTR_BYTES_ON_DESTROY); + kfree(value); + value = value2; + value_len = DM_MAX_ATTR_BYTES_ON_DESTROY; + } + *vlenp = value_len; + *valuepp = value; + return(0); +} + +/* This code was taken from xfs_fcntl(F_DIOINFO) and modified slightly because + we don't have a flags parameter (no open file). + Taken from xfs_ioctl(XFS_IOC_DIOINFO) on Linux. +*/ + +STATIC int +xfs_dm_get_dioinfo( + struct inode *inode, + dm_right_t right, + dm_dioinfo_t __user *diop) +{ + dm_dioinfo_t dio; + xfs_mount_t *mp; + xfs_inode_t *ip = XFS_I(inode); + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + mp = ip->i_mount; + + dio.d_miniosz = dio.d_mem = MIN_DIO_SIZE(mp); + dio.d_maxiosz = MAX_DIO_SIZE(mp); + dio.d_dio_only = DM_FALSE; + + if (copy_to_user(diop, &dio, sizeof(dio))) + return(-EFAULT); + return(0); +} + +typedef struct dm_readdir_cb { + xfs_mount_t *mp; + char __user *ubuf; + dm_stat_t __user *lastbuf; + size_t spaceleft; + size_t nwritten; + int error; + dm_stat_t kstat; +} dm_readdir_cb_t; + +STATIC int +dm_filldir(void *__buf, const char *name, int namelen, loff_t offset, + u64 ino, unsigned int d_type) +{ + dm_readdir_cb_t *cb = __buf; + dm_stat_t *statp = &cb->kstat; + size_t len; + int error; + int needed; + + /* + * Make sure we have enough space. + */ + needed = dm_stat_size(namelen + 1); + if (cb->spaceleft < needed) { + cb->spaceleft = 0; + return -ENOSPC; + } + + error = -EINVAL; + if (xfs_internal_inum(cb->mp, ino)) + goto out_err; + + memset(statp, 0, dm_stat_size(MAXNAMLEN)); + error = -xfs_dm_bulkattr_iget_one(cb->mp, ino, 0, + statp, needed); + if (error) + goto out_err; + + /* + * On return from bulkstat_one(), stap->_link points + * at the end of the handle in the stat structure. + */ + statp->dt_compname.vd_offset = statp->_link; + statp->dt_compname.vd_length = namelen + 1; + + len = statp->_link; + + /* Word-align the record */ + statp->_link = dm_stat_align(len + namelen + 1); + + error = -EFAULT; + if (copy_to_user(cb->ubuf, statp, len)) + goto out_err; + if (copy_to_user(cb->ubuf + len, name, namelen)) + goto out_err; + if (put_user(0, cb->ubuf + len + namelen)) + goto out_err; + + cb->lastbuf = (dm_stat_t __user *)cb->ubuf; + cb->spaceleft -= statp->_link; + cb->nwritten += statp->_link; + cb->ubuf += statp->_link; + + return 0; + + out_err: + cb->error = error; + return error; +} + +/* Returns negative errors to DMAPI */ +STATIC int +xfs_dm_get_dirattrs_rvp( + struct inode *inode, + dm_right_t right, + u_int mask, + dm_attrloc_t __user *locp, + size_t buflen, + void __user *bufp, + size_t __user *rlenp, + int *rvp) +{ + xfs_inode_t *dp = XFS_I(inode); + xfs_mount_t *mp = dp->i_mount; + dm_readdir_cb_t *cb; + dm_attrloc_t loc; + int error; + + if (right < DM_RIGHT_SHARED) + return -EACCES; + + /* + * Make sure that the buffer is properly aligned. + */ + if (((unsigned long)bufp & (DM_STAT_ALIGN - 1)) != 0) + return -EFAULT; + + if (mask & ~(DM_AT_HANDLE|DM_AT_EMASK|DM_AT_PMANR|DM_AT_PATTR| + DM_AT_DTIME|DM_AT_CFLAG|DM_AT_STAT)) + return -EINVAL; + + if (!S_ISDIR(inode->i_mode)) + return -EINVAL; + + /* + * bufp should be able to fit at least one dm_stat entry including + * dt_handle and full size MAXNAMLEN dt_compname. + */ + if (buflen < dm_stat_size(MAXNAMLEN)) + return -ENOMEM; + + if (copy_from_user(&loc, locp, sizeof(loc))) + return -EFAULT; + + cb = kzalloc(sizeof(*cb) + dm_stat_size(MAXNAMLEN), GFP_KERNEL); + if (!cb) + return -ENOMEM; + + cb->mp = mp; + cb->spaceleft = buflen; + cb->ubuf = bufp; + + mutex_lock(&inode->i_mutex); + error = -ENOENT; + if (!IS_DEADDIR(inode)) { + error = -xfs_readdir(dp, cb, dp->i_size, + (xfs_off_t *)&loc, dm_filldir); + } + mutex_unlock(&inode->i_mutex); + + if (error) + goto out_kfree; + if (cb->error) { + error = cb->error; + goto out_kfree; + } + + error = -EFAULT; + if (cb->lastbuf && put_user(0, &cb->lastbuf->_link)) + goto out_kfree; + if (put_user(cb->nwritten, rlenp)) + goto out_kfree; + if (copy_to_user(locp, &loc, sizeof(loc))) + goto out_kfree; + + if (cb->nwritten) + *rvp = 1; + else + *rvp = 0; + error = 0; + + out_kfree: + kfree(cb); + return error; +} + +STATIC int +xfs_dm_get_dmattr( + struct inode *inode, + dm_right_t right, + dm_attrname_t __user *attrnamep, + size_t buflen, + void __user *bufp, + size_t __user *rlenp) +{ + dm_dkattrname_t name; + char *value; + int value_len; + int alloc_size; + int error; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0) + return(-error); /* Return negative error to DMAPI */ + + /* Allocate a buffer to receive the attribute's value. We allocate + at least one byte even if the caller specified a buflen of zero. + (A buflen of zero is considered valid.) + + Allocating a minimum of XFS_BUG_KLUDGE bytes temporarily works + around a bug within XFS in which in-inode attribute values are not + checked to see if they will fit in the buffer before they are + copied. Since no in-core attribute value can be larger than 256 + bytes (an 8-bit size field), we allocate that minimum size here to + prevent buffer overrun in both the kernel's and user's buffers. + */ + + alloc_size = buflen; + if (alloc_size < XFS_BUG_KLUDGE) + alloc_size = XFS_BUG_KLUDGE; + if (alloc_size > ATTR_MAX_VALUELEN) + alloc_size = ATTR_MAX_VALUELEN; + value = kmem_alloc(alloc_size, KM_SLEEP | KM_LARGE); + + /* Get the attribute's value. */ + + value_len = alloc_size; /* in/out parameter */ + + error = xfs_attr_get(XFS_I(inode), name.dan_chars, value, &value_len, + ATTR_ROOT); + DM_EA_XLATE_ERR(error); + + /* DMAPI requires an errno of ENOENT if an attribute does not exist, + so remap ENOATTR here. + */ + + if (error == ENOATTR) + error = ENOENT; + if (!error && value_len > buflen) + error = E2BIG; + if (!error && copy_to_user(bufp, value, value_len)) + error = EFAULT; + if (!error || error == E2BIG) { + if (put_user(value_len, rlenp)) + error = EFAULT; + } + + kmem_free(value); + return(-error); /* Return negative error to DMAPI */ +} + +STATIC int +xfs_dm_get_eventlist( + struct inode *inode, + dm_right_t right, + u_int type, + u_int nelem, + dm_eventset_t *eventsetp, + u_int *nelemp) +{ + int error; + xfs_inode_t *ip = XFS_I(inode); + + /* Returns negative errors to DMAPI */ + + if (type == DM_FSYS_OBJ) { + error = xfs_dm_fs_get_eventlist(ip->i_mount, right, nelem, + eventsetp, nelemp); + } else { + error = xfs_dm_f_get_eventlist(ip, right, nelem, + eventsetp, nelemp); + } + return(-error); /* Returns negative error to DMAPI */ +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_get_fileattr( + struct inode *inode, + dm_right_t right, + u_int mask, /* not used; always return everything */ + dm_stat_t __user *statp) +{ + dm_stat_t stat; + xfs_inode_t *ip = XFS_I(inode); + xfs_mount_t *mp; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + /* Find the mount point. */ + + mp = ip->i_mount; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + xfs_ip_to_stat(mp, ip->i_ino, ip, &stat); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (copy_to_user( statp, &stat, sizeof(stat))) + return(-EFAULT); + return(0); +} + + +/* We currently only support a maximum of one managed region per file, and + use the DM_EVENT_READ, DM_EVENT_WRITE, and DM_EVENT_TRUNCATE events in + the file's dm_eventset_t event mask to implement the DM_REGION_READ, + DM_REGION_WRITE, and DM_REGION_TRUNCATE flags for that single region. +*/ + +STATIC int +xfs_dm_get_region( + struct inode *inode, + dm_right_t right, + u_int nelem, + dm_region_t __user *regbufp, + u_int __user *nelemp) +{ + dm_eventset_t evmask; + dm_region_t region; + xfs_inode_t *ip = XFS_I(inode); + u_int elem; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + evmask = ip->i_d.di_dmevmask; /* read the mask "atomically" */ + + /* Get the file's current managed region flags out of the + dm_eventset_t mask and use them to build a managed region that + covers the entire file, i.e. set rg_offset and rg_size to zero. + */ + + memset((char *)®ion, 0, sizeof(region)); + + if (evmask & (1 << DM_EVENT_READ)) + region.rg_flags |= DM_REGION_READ; + if (evmask & (1 << DM_EVENT_WRITE)) + region.rg_flags |= DM_REGION_WRITE; + if (evmask & (1 << DM_EVENT_TRUNCATE)) + region.rg_flags |= DM_REGION_TRUNCATE; + + elem = (region.rg_flags ? 1 : 0); + + if (copy_to_user( nelemp, &elem, sizeof(elem))) + return(-EFAULT); + if (elem > nelem) + return(-E2BIG); + if (elem && copy_to_user(regbufp, ®ion, sizeof(region))) + return(-EFAULT); + return(0); +} + + +STATIC int +xfs_dm_getall_dmattr( + struct inode *inode, + dm_right_t right, + size_t buflen, + void __user *bufp, + size_t __user *rlenp) +{ + attrlist_cursor_kern_t cursor; + attrlist_t *attrlist; + dm_attrlist_t __user *ulist; + int *last_link; + int alignment; + int total_size; + int list_size = 8192; /* should be big enough */ + int error; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + /* Verify that the user gave us a buffer that is 4-byte aligned, lock + it down, and work directly within that buffer. As a side-effect, + values of buflen < sizeof(int) return EINVAL. + */ + + alignment = sizeof(int) - 1; + if ((((__psint_t)bufp & alignment) != 0) || + !access_ok(VERIFY_WRITE, bufp, buflen)) { + return(-EFAULT); + } + buflen &= ~alignment; /* round down the alignment */ + + /* Initialize all the structures and variables for the main loop. */ + + memset(&cursor, 0, sizeof(cursor)); + attrlist = (attrlist_t *)kmem_alloc(list_size, KM_SLEEP); + total_size = 0; + ulist = (dm_attrlist_t *)bufp; + last_link = NULL; + + /* Use vop_attr_list to get the names of DMAPI attributes, and use + vop_attr_get to get their values. There is a risk here that the + DMAPI attributes could change between the vop_attr_list and + vop_attr_get calls. If we can detect it, we return EIO to notify + the user. + */ + + do { + int i; + + /* Get a buffer full of attribute names. If there aren't any + more or if we encounter an error, then finish up. + */ + + error = xfs_attr_list(XFS_I(inode), (char *)attrlist, list_size, + ATTR_ROOT, &cursor); + DM_EA_XLATE_ERR(error); + + if (error || attrlist->al_count == 0) + break; + + for (i = 0; i < attrlist->al_count; i++) { + attrlist_ent_t *entry; + char *user_name; + int size_needed; + int value_len; + + /* Skip over all non-DMAPI attributes. If the + attribute name is too long, we assume it is + non-DMAPI even if it starts with the correct + prefix. + */ + + entry = ATTR_ENTRY(attrlist, i); + if (strncmp(entry->a_name, dmattr_prefix, DMATTR_PREFIXLEN)) + continue; + user_name = &entry->a_name[DMATTR_PREFIXLEN]; + if (strlen(user_name) > DM_ATTR_NAME_SIZE) + continue; + + /* We have a valid DMAPI attribute to return. If it + won't fit in the user's buffer, we still need to + keep track of the number of bytes for the user's + next call. + */ + + + size_needed = sizeof(*ulist) + entry->a_valuelen; + size_needed = (size_needed + alignment) & ~alignment; + + total_size += size_needed; + if (total_size > buflen) + continue; + + /* Start by filling in all the fields in the + dm_attrlist_t structure. + */ + + strncpy((char *)ulist->al_name.an_chars, user_name, + DM_ATTR_NAME_SIZE); + ulist->al_data.vd_offset = sizeof(*ulist); + ulist->al_data.vd_length = entry->a_valuelen; + ulist->_link = size_needed; + last_link = &ulist->_link; + + /* Next read the attribute's value into its correct + location after the dm_attrlist structure. Any sort + of error indicates that the data is moving under us, + so we return EIO to let the user know. + */ + + value_len = entry->a_valuelen; + + error = xfs_attr_get(XFS_I(inode), entry->a_name, + (void *)(ulist + 1), &value_len, + ATTR_ROOT); + DM_EA_XLATE_ERR(error); + + if (error || value_len != entry->a_valuelen) { + error = EIO; + break; + } + + ulist = (dm_attrlist_t *)((char *)ulist + ulist->_link); + } + } while (!error && attrlist->al_more); + if (last_link) + *last_link = 0; + + if (!error && total_size > buflen) + error = E2BIG; + if (!error || error == E2BIG) { + if (put_user(total_size, rlenp)) + error = EFAULT; + } + + kmem_free(attrlist); + return(-error); /* Return negative error to DMAPI */ +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_getall_inherit( + struct inode *inode, + dm_right_t right, + u_int nelem, + dm_inherit_t __user *inheritbufp, + u_int __user *nelemp) +{ + return(-ENOSYS); /* Return negative error to DMAPI */ +} + + +/* Initialize location pointer for subsequent dm_get_dirattrs, + dm_get_bulkattr, and dm_get_bulkall calls. The same initialization must + work for inode-based routines (dm_get_dirattrs) and filesystem-based + routines (dm_get_bulkattr and dm_get_bulkall). Filesystem-based functions + call this routine using the filesystem's root inode. +*/ + +/* ARGSUSED */ +STATIC int +xfs_dm_init_attrloc( + struct inode *inode, + dm_right_t right, + dm_attrloc_t __user *locp) +{ + dm_attrloc_t loc = 0; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + if (copy_to_user( locp, &loc, sizeof(loc))) + return(-EFAULT); + return(0); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_mkdir_by_handle( + struct inode *inode, + dm_right_t right, + void __user *hanp, + size_t hlen, + char __user *cname) +{ + return(-ENOSYS); /* Return negative error to DMAPI */ +} + + +/* + * Probe and Punch + * + * Hole punching alignment is based on the underlying device base + * allocation size. Because it is not defined in the DMAPI spec, we + * can align how we choose here. Round inwards (offset up and length + * down) to the block, extent or page size whichever is bigger. Our + * DMAPI implementation rounds the hole geometry strictly inwards. If + * this is not possible, return EINVAL for both for xfs_dm_probe_hole + * and xfs_dm_punch_hole which differs from the DMAPI spec. Note that + * length = 0 is special - it means "punch to EOF" and at that point + * we treat the punch as remove everything past offset (including + * preallocation past EOF). + */ + +STATIC int +xfs_dm_round_hole( + dm_off_t offset, + dm_size_t length, + dm_size_t align, + xfs_fsize_t filesize, + dm_off_t *roff, + dm_size_t *rlen) +{ + + dm_off_t off = offset; + dm_size_t len = length; + + /* Try to round offset up to the nearest boundary */ + *roff = roundup_64(off, align); + if ((*roff >= filesize) || (len && (len < align))) + return -EINVAL; + + if ((len == 0) || ((off + len) == filesize)) { + /* punch to EOF */ + *rlen = 0; + } else { + /* Round length down to the nearest boundary. */ + ASSERT(len >= align); + ASSERT(align > (*roff - off)); + len -= *roff - off; + *rlen = len - do_mod(len, align); + if (*rlen == 0) + return -EINVAL; /* requested length is too small */ + } +#ifdef CONFIG_DMAPI_DEBUG + printk("xfs_dm_round_hole: off %lu, len %ld, align %lu, " + "filesize %llu, roff %ld, rlen %ld\n", + offset, length, align, filesize, *roff, *rlen); +#endif + return 0; /* hole geometry successfully rounded */ +} + +/* ARGSUSED */ +STATIC int +xfs_dm_probe_hole( + struct inode *inode, + dm_right_t right, + dm_off_t off, + dm_size_t len, + dm_off_t __user *roffp, + dm_size_t __user *rlenp) +{ + dm_off_t roff; + dm_size_t rlen; + xfs_inode_t *ip = XFS_I(inode); + xfs_mount_t *mp; + uint lock_flags; + xfs_fsize_t realsize; + dm_size_t align; + int error; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return -EACCES; + + if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) + return -EINVAL; + + mp = ip->i_mount; + lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; + xfs_ilock(ip, lock_flags); + realsize = ip->i_size; + xfs_iunlock(ip, lock_flags); + + if ((off + len) > realsize) + return -E2BIG; + + align = 1 << mp->m_sb.sb_blocklog; + + error = xfs_dm_round_hole(off, len, align, realsize, &roff, &rlen); + if (error) + return error; + + if (copy_to_user( roffp, &roff, sizeof(roff))) + return -EFAULT; + if (copy_to_user( rlenp, &rlen, sizeof(rlen))) + return -EFAULT; + return(0); +} + + +STATIC int +xfs_dm_punch_hole( + struct inode *inode, + dm_right_t right, + dm_off_t off, + dm_size_t len) +{ + xfs_flock64_t bf; + int error = 0; + xfs_inode_t *ip = XFS_I(inode); + xfs_mount_t *mp; + dm_size_t align; + xfs_fsize_t realsize; + dm_off_t roff; + dm_size_t rlen; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return -EACCES; + + /* Make sure there are no leases. */ + error = break_lease(inode, FMODE_WRITE); + if (error) + return -EBUSY; + + error = get_write_access(inode); + if (error) + return -EBUSY; + + mp = ip->i_mount; + + down_rw_sems(inode, DM_SEM_FLAG_WR); + + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + realsize = ip->i_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + align = xfs_get_extsz_hint(ip); + if (align == 0) + align = 1; + + align <<= mp->m_sb.sb_blocklog; + + if ((off + len) > realsize) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + error = -E2BIG; + goto up_and_out; + } + + if ((off + len) == realsize) + len = 0; + + error = xfs_dm_round_hole(off, len, align, realsize, &roff, &rlen); + if (error || (off != roff) || (len != rlen)) { + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + error = -EINVAL; + goto up_and_out; + } + + bf.l_type = 0; + bf.l_whence = 0; + bf.l_start = (xfs_off_t)off; + if (len) { + bf.l_len = len; + } + else { + /* + * When we are punching to EOF, we have to make sure we punch + * the last partial block that contains EOF. Round up + * the length to make sure we punch the block and not just + * zero it. + */ + bf.l_len = roundup_64((realsize - off), mp->m_sb.sb_blocksize); + } + +#ifdef CONFIG_DMAPI_DEBUG + printk("xfs_dm_punch_hole: off %lu, len %ld, align %lu\n", + off, len, align); +#endif + + error = xfs_change_file_space(ip, XFS_IOC_UNRESVSP, &bf, + (xfs_off_t)off, XFS_ATTR_DMI|XFS_ATTR_NOLOCK); + + /* + * if punching to end of file, kill any blocks past EOF that + * may have been (speculatively) preallocated. No point in + * leaving them around if we are migrating the file.... + */ + if (!error && (len == 0)) { + error = xfs_free_eofblocks(mp, ip, XFS_FREE_EOF_NOLOCK); + } + + /* + * negate the error for return here as core XFS functions return + * positive error numbers + */ + if (error) + error = -error; + + /* Let threads in send_data_event know we punched the file. */ + ip->i_d.di_dmstate++; + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + +up_and_out: + up_rw_sems(inode, DM_SEM_FLAG_WR); + put_write_access(inode); + + return error; +} + + +STATIC int +xfs_dm_read_invis_rvp( + struct inode *inode, + dm_right_t right, + dm_off_t off, + dm_size_t len, + void __user *bufp, + int *rvp) +{ + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_SHARED) + return(-EACCES); + + return(-xfs_dm_rdwr(inode, 0, FMODE_READ, off, len, bufp, rvp)); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_release_right( + struct inode *inode, + dm_right_t right, + u_int type) /* DM_FSYS_OBJ or zero */ +{ +#ifdef DEBUG_RIGHTS + char buffer[sizeof(dm_handle_t) * 2 + 1]; + + if (!xfs_vp_to_hexhandle(inode, type, buffer)) { + printf("dm_release_right: old %d type %d handle %s\n", + right, type, buffer); + } else { + printf("dm_release_right: old %d type %d handle " + " \n", right, type); + } +#endif /* DEBUG_RIGHTS */ + return(0); +} + + +STATIC int +xfs_dm_remove_dmattr( + struct inode *inode, + dm_right_t right, + int setdtime, + dm_attrname_t __user *attrnamep) +{ + dm_dkattrname_t name; + int error; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0) + return(-error); /* Return negative error to DMAPI */ + + /* Remove the attribute from the object. */ + + error = xfs_attr_remove(XFS_I(inode), name.dan_chars, setdtime ? + ATTR_ROOT : (ATTR_ROOT|ATTR_KERNOTIME)); + DM_EA_XLATE_ERR(error); + + if (error == ENOATTR) + error = ENOENT; + return(-error); /* Return negative error to DMAPI */ +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_request_right( + struct inode *inode, + dm_right_t right, + u_int type, /* DM_FSYS_OBJ or zero */ + u_int flags, + dm_right_t newright) +{ +#ifdef DEBUG_RIGHTS + char buffer[sizeof(dm_handle_t) * 2 + 1]; + + if (!xfs_vp_to_hexhandle(inode, type, buffer)) { + printf("dm_request_right: old %d new %d type %d flags 0x%x " + "handle %s\n", right, newright, type, flags, buffer); + } else { + printf("dm_request_right: old %d new %d type %d flags 0x%x " + "handle \n", right, newright, type, flags); + } +#endif /* DEBUG_RIGHTS */ + return(0); +} + + +STATIC int +xfs_dm_set_dmattr( + struct inode *inode, + dm_right_t right, + dm_attrname_t __user *attrnamep, + int setdtime, + size_t buflen, + void __user *bufp) +{ + dm_dkattrname_t name; + char *value; + int alloc_size; + int error; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + if ((error = xfs_copyin_attrname(attrnamep, &name)) != 0) + return(-error); /* Return negative error to DMAPI */ + if (buflen > ATTR_MAX_VALUELEN) + return(-E2BIG); + + /* Copy in the attribute's value and store the pair in + the object. We allocate a buffer of at least one byte even if the + caller specified a buflen of zero. (A buflen of zero is considered + valid.) + */ + + alloc_size = (buflen == 0) ? 1 : buflen; + value = kmem_alloc(alloc_size, KM_SLEEP); + if (copy_from_user( value, bufp, buflen)) { + error = EFAULT; + } else { + error = xfs_attr_set(XFS_I(inode), name.dan_chars, value, buflen, + setdtime ? ATTR_ROOT : + (ATTR_ROOT|ATTR_KERNOTIME)); + DM_EA_XLATE_ERR(error); + } + kmem_free(value); + return(-error); /* Return negative error to DMAPI */ +} + +STATIC int +xfs_dm_set_eventlist( + struct inode *inode, + dm_right_t right, + u_int type, + dm_eventset_t *eventsetp, /* in kernel space! */ + u_int maxevent) +{ + int error; + xfs_inode_t *ip = XFS_I(inode); + + /* Returns negative errors to DMAPI */ + + if (type == DM_FSYS_OBJ) { + error = xfs_dm_fs_set_eventlist(ip->i_mount, right, eventsetp, maxevent); + } else { + error = xfs_dm_f_set_eventlist(ip, right, eventsetp, maxevent); + } + return(-error); /* Return negative error to DMAPI */ +} + + +/* + * This turned out not XFS-specific, but leave it here with get_fileattr. + */ + +STATIC int +xfs_dm_set_fileattr( + struct inode *inode, + dm_right_t right, + u_int mask, + dm_fileattr_t __user *statp) +{ + dm_fileattr_t stat; + struct iattr iattr; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + if (copy_from_user( &stat, statp, sizeof(stat))) + return(-EFAULT); + + iattr.ia_valid = 0; + + if (mask & DM_AT_MODE) { + iattr.ia_valid |= ATTR_MODE; + iattr.ia_mode = stat.fa_mode; + } + if (mask & DM_AT_UID) { + iattr.ia_valid |= ATTR_UID; + iattr.ia_uid = stat.fa_uid; + } + if (mask & DM_AT_GID) { + iattr.ia_valid |= ATTR_GID; + iattr.ia_gid = stat.fa_gid; + } + if (mask & DM_AT_ATIME) { + iattr.ia_valid |= ATTR_ATIME; + iattr.ia_atime.tv_sec = stat.fa_atime; + iattr.ia_atime.tv_nsec = 0; + inode->i_atime.tv_sec = stat.fa_atime; + } + if (mask & DM_AT_MTIME) { + iattr.ia_valid |= ATTR_MTIME; + iattr.ia_mtime.tv_sec = stat.fa_mtime; + iattr.ia_mtime.tv_nsec = 0; + } + if (mask & DM_AT_CTIME) { + iattr.ia_valid |= ATTR_CTIME; + iattr.ia_ctime.tv_sec = stat.fa_ctime; + iattr.ia_ctime.tv_nsec = 0; + } + + /* + * DM_AT_DTIME only takes effect if DM_AT_CTIME is not specified. We + * overload ctime to also act as dtime, i.e. DM_CONFIG_DTIME_OVERLOAD. + */ + if ((mask & DM_AT_DTIME) && !(mask & DM_AT_CTIME)) { + iattr.ia_valid |= ATTR_CTIME; + iattr.ia_ctime.tv_sec = stat.fa_dtime; + iattr.ia_ctime.tv_nsec = 0; + } + if (mask & DM_AT_SIZE) { + iattr.ia_valid |= ATTR_SIZE; + iattr.ia_size = stat.fa_size; + } + + return -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_DMI); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_set_inherit( + struct inode *inode, + dm_right_t right, + dm_attrname_t __user *attrnamep, + mode_t mode) +{ + return(-ENOSYS); /* Return negative error to DMAPI */ +} + + +STATIC int +xfs_dm_set_region( + struct inode *inode, + dm_right_t right, + u_int nelem, + dm_region_t __user *regbufp, + dm_boolean_t __user *exactflagp) +{ + xfs_inode_t *ip = XFS_I(inode); + xfs_trans_t *tp; + xfs_mount_t *mp; + dm_region_t region; + dm_eventset_t new_mask; + dm_eventset_t mr_mask; + int error; + u_int exactflag; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + /* If the caller gave us more than one dm_region_t structure, complain. + (He has to call dm_get_config() to find out what our limit is.) + */ + + if (nelem > 1) + return(-E2BIG); + + /* If the user provided a dm_region_t structure, then copy it in, + validate it, and convert its flags to the corresponding bits in a + dm_set_eventlist() event mask. A call with zero regions is + equivalent to clearing all region flags. + */ + + new_mask = 0; + if (nelem == 1) { + if (copy_from_user( ®ion, regbufp, sizeof(region))) + return(-EFAULT); + + if (region.rg_flags & ~(DM_REGION_READ|DM_REGION_WRITE|DM_REGION_TRUNCATE)) + return(-EINVAL); + if (region.rg_flags & DM_REGION_READ) + new_mask |= 1 << DM_EVENT_READ; + if (region.rg_flags & DM_REGION_WRITE) + new_mask |= 1 << DM_EVENT_WRITE; + if (region.rg_flags & DM_REGION_TRUNCATE) + new_mask |= 1 << DM_EVENT_TRUNCATE; + } + mr_mask = (1 << DM_EVENT_READ) | (1 << DM_EVENT_WRITE) | (1 << DM_EVENT_TRUNCATE); + + /* Get the file's existing event mask, clear the old managed region + bits, add in the new ones, and update the file's mask. + */ + + if (new_mask & prohibited_mr_events(inode->i_mapping)) { + /* If the change is simply to remove the READ + * bit, then that's always okay. Otherwise, it's busy. + */ + dm_eventset_t m1; + m1 = ip->i_d.di_dmevmask & ((1 << DM_EVENT_WRITE) | (1 << DM_EVENT_TRUNCATE)); + if (m1 != new_mask) { + return -EBUSY; + } + } + + mp = ip->i_mount; + tp = xfs_trans_alloc(mp, XFS_TRANS_SET_DMATTRS); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES (mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return(-error); /* Return negative error to DMAPI */ + } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + ip->i_d.di_dmevmask = (ip->i_d.di_dmevmask & ~mr_mask) | new_mask; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + igrab(inode); + xfs_trans_commit(tp, 0); + + /* Return the proper value for *exactflagp depending upon whether or not + we "changed" the user's managed region. In other words, if the user + specified a non-zero value for either rg_offset or rg_size, we + round each of those values back to zero. + */ + + if (nelem && (region.rg_offset || region.rg_size)) { + exactflag = DM_FALSE; /* user region was changed */ + } else { + exactflag = DM_TRUE; /* user region was unchanged */ + } + if (copy_to_user( exactflagp, &exactflag, sizeof(exactflag))) + return(-EFAULT); + return(0); +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_symlink_by_handle( + struct inode *inode, + dm_right_t right, + void __user *hanp, + size_t hlen, + char __user *cname, + char __user *path) +{ + return(-ENOSYS); /* Return negative errors to DMAPI */ +} + + +/* + * xfs_dm_sync_by_handle needs to do the same thing as sys_fsync() + */ +STATIC int +xfs_dm_sync_by_handle( + struct inode *inode, + dm_right_t right) +{ + int err, ret; + xfs_inode_t *ip = XFS_I(inode); + + /* Returns negative errors to DMAPI */ + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + /* We need to protect against concurrent writers.. */ + ret = filemap_fdatawrite(inode->i_mapping); + down_rw_sems(inode, DM_FLAGS_IMUX); + err = -xfs_fsync(ip); + if (!ret) + ret = err; + up_rw_sems(inode, DM_FLAGS_IMUX); + err = filemap_fdatawait(inode->i_mapping); + if (!ret) + ret = err; + xfs_iflags_clear(ip, XFS_ITRUNCATED); + return ret; +} + + +/* ARGSUSED */ +STATIC int +xfs_dm_upgrade_right( + struct inode *inode, + dm_right_t right, + u_int type) /* DM_FSYS_OBJ or zero */ +{ +#ifdef DEBUG_RIGHTS + char buffer[sizeof(dm_handle_t) * 2 + 1]; + + if (!xfs_vp_to_hexhandle(inode, type, buffer)) { + printf("dm_upgrade_right: old %d new %d type %d handle %s\n", + right, DM_RIGHT_EXCL, type, buffer); + } else { + printf("dm_upgrade_right: old %d new %d type %d handle " + "\n", right, DM_RIGHT_EXCL, type); + } +#endif /* DEBUG_RIGHTS */ + return(0); +} + + +STATIC int +xfs_dm_write_invis_rvp( + struct inode *inode, + dm_right_t right, + int flags, + dm_off_t off, + dm_size_t len, + void __user *bufp, + int *rvp) +{ + int fflag = 0; + + /* Returns negative errors to DMAPI */ + + if (right < DM_RIGHT_EXCL) + return(-EACCES); + + if (flags & DM_WRITE_SYNC) + fflag |= O_SYNC; + return(-xfs_dm_rdwr(inode, fflag, FMODE_WRITE, off, len, bufp, rvp)); +} + + +STATIC void +xfs_dm_obj_ref_hold( + struct inode *inode) +{ + igrab(inode); +} + + +static fsys_function_vector_t xfs_fsys_vector[DM_FSYS_MAX]; + + +STATIC int +xfs_dm_get_dmapiops( + struct super_block *sb, + void *addr) +{ + static int initialized = 0; + dm_fcntl_vector_t *vecrq; + fsys_function_vector_t *vecp; + int i = 0; + + vecrq = (dm_fcntl_vector_t *)addr; + vecrq->count = + sizeof(xfs_fsys_vector) / sizeof(xfs_fsys_vector[0]); + vecrq->vecp = xfs_fsys_vector; + if (initialized) + return(0); + vecrq->code_level = DM_CLVL_XOPEN; + vecp = xfs_fsys_vector; + + vecp[i].func_no = DM_FSYS_CLEAR_INHERIT; + vecp[i++].u_fc.clear_inherit = xfs_dm_clear_inherit; + vecp[i].func_no = DM_FSYS_CREATE_BY_HANDLE; + vecp[i++].u_fc.create_by_handle = xfs_dm_create_by_handle; + vecp[i].func_no = DM_FSYS_DOWNGRADE_RIGHT; + vecp[i++].u_fc.downgrade_right = xfs_dm_downgrade_right; + vecp[i].func_no = DM_FSYS_GET_ALLOCINFO_RVP; + vecp[i++].u_fc.get_allocinfo_rvp = xfs_dm_get_allocinfo_rvp; + vecp[i].func_no = DM_FSYS_GET_BULKALL_RVP; + vecp[i++].u_fc.get_bulkall_rvp = xfs_dm_get_bulkall_rvp; + vecp[i].func_no = DM_FSYS_GET_BULKATTR_RVP; + vecp[i++].u_fc.get_bulkattr_rvp = xfs_dm_get_bulkattr_rvp; + vecp[i].func_no = DM_FSYS_GET_CONFIG; + vecp[i++].u_fc.get_config = xfs_dm_get_config; + vecp[i].func_no = DM_FSYS_GET_CONFIG_EVENTS; + vecp[i++].u_fc.get_config_events = xfs_dm_get_config_events; + vecp[i].func_no = DM_FSYS_GET_DESTROY_DMATTR; + vecp[i++].u_fc.get_destroy_dmattr = xfs_dm_get_destroy_dmattr; + vecp[i].func_no = DM_FSYS_GET_DIOINFO; + vecp[i++].u_fc.get_dioinfo = xfs_dm_get_dioinfo; + vecp[i].func_no = DM_FSYS_GET_DIRATTRS_RVP; + vecp[i++].u_fc.get_dirattrs_rvp = xfs_dm_get_dirattrs_rvp; + vecp[i].func_no = DM_FSYS_GET_DMATTR; + vecp[i++].u_fc.get_dmattr = xfs_dm_get_dmattr; + vecp[i].func_no = DM_FSYS_GET_EVENTLIST; + vecp[i++].u_fc.get_eventlist = xfs_dm_get_eventlist; + vecp[i].func_no = DM_FSYS_GET_FILEATTR; + vecp[i++].u_fc.get_fileattr = xfs_dm_get_fileattr; + vecp[i].func_no = DM_FSYS_GET_REGION; + vecp[i++].u_fc.get_region = xfs_dm_get_region; + vecp[i].func_no = DM_FSYS_GETALL_DMATTR; + vecp[i++].u_fc.getall_dmattr = xfs_dm_getall_dmattr; + vecp[i].func_no = DM_FSYS_GETALL_INHERIT; + vecp[i++].u_fc.getall_inherit = xfs_dm_getall_inherit; + vecp[i].func_no = DM_FSYS_INIT_ATTRLOC; + vecp[i++].u_fc.init_attrloc = xfs_dm_init_attrloc; + vecp[i].func_no = DM_FSYS_MKDIR_BY_HANDLE; + vecp[i++].u_fc.mkdir_by_handle = xfs_dm_mkdir_by_handle; + vecp[i].func_no = DM_FSYS_PROBE_HOLE; + vecp[i++].u_fc.probe_hole = xfs_dm_probe_hole; + vecp[i].func_no = DM_FSYS_PUNCH_HOLE; + vecp[i++].u_fc.punch_hole = xfs_dm_punch_hole; + vecp[i].func_no = DM_FSYS_READ_INVIS_RVP; + vecp[i++].u_fc.read_invis_rvp = xfs_dm_read_invis_rvp; + vecp[i].func_no = DM_FSYS_RELEASE_RIGHT; + vecp[i++].u_fc.release_right = xfs_dm_release_right; + vecp[i].func_no = DM_FSYS_REMOVE_DMATTR; + vecp[i++].u_fc.remove_dmattr = xfs_dm_remove_dmattr; + vecp[i].func_no = DM_FSYS_REQUEST_RIGHT; + vecp[i++].u_fc.request_right = xfs_dm_request_right; + vecp[i].func_no = DM_FSYS_SET_DMATTR; + vecp[i++].u_fc.set_dmattr = xfs_dm_set_dmattr; + vecp[i].func_no = DM_FSYS_SET_EVENTLIST; + vecp[i++].u_fc.set_eventlist = xfs_dm_set_eventlist; + vecp[i].func_no = DM_FSYS_SET_FILEATTR; + vecp[i++].u_fc.set_fileattr = xfs_dm_set_fileattr; + vecp[i].func_no = DM_FSYS_SET_INHERIT; + vecp[i++].u_fc.set_inherit = xfs_dm_set_inherit; + vecp[i].func_no = DM_FSYS_SET_REGION; + vecp[i++].u_fc.set_region = xfs_dm_set_region; + vecp[i].func_no = DM_FSYS_SYMLINK_BY_HANDLE; + vecp[i++].u_fc.symlink_by_handle = xfs_dm_symlink_by_handle; + vecp[i].func_no = DM_FSYS_SYNC_BY_HANDLE; + vecp[i++].u_fc.sync_by_handle = xfs_dm_sync_by_handle; + vecp[i].func_no = DM_FSYS_UPGRADE_RIGHT; + vecp[i++].u_fc.upgrade_right = xfs_dm_upgrade_right; + vecp[i].func_no = DM_FSYS_WRITE_INVIS_RVP; + vecp[i++].u_fc.write_invis_rvp = xfs_dm_write_invis_rvp; + vecp[i].func_no = DM_FSYS_OBJ_REF_HOLD; + vecp[i++].u_fc.obj_ref_hold = xfs_dm_obj_ref_hold; + + return(0); +} + + +/* xfs_dm_send_mmap_event - send events needed for memory mapping a file. + * + * This is a workaround called for files that are about to be + * mapped. DMAPI events are not being generated at a low enough level + * in the kernel for page reads/writes to generate the correct events. + * So for memory-mapped files we generate read or write events for the + * whole byte range being mapped. If the mmap call can never cause a + * write to the file, then only a read event is sent. + * + * Code elsewhere prevents adding managed regions to a file while it + * is still mapped. + */ + +STATIC int +xfs_dm_send_mmap_event( + struct vm_area_struct *vma, + unsigned int wantflag) +{ + xfs_inode_t *ip; + int error = 0; + dm_eventtype_t max_event = DM_EVENT_READ; + xfs_fsize_t filesize; + xfs_off_t length, end_of_area, evsize, offset; + int iolock; + + if (!vma->vm_file) + return 0; + + ip = XFS_I(vma->vm_file->f_dentry->d_inode); + + if (!S_ISREG(vma->vm_file->f_dentry->d_inode->i_mode) || + !(ip->i_mount->m_flags & XFS_MOUNT_DMAPI)) + return 0; + + /* If they specifically asked for 'read', then give it to them. + * Otherwise, see if it's possible to give them 'write'. + */ + if( wantflag & VM_READ ){ + max_event = DM_EVENT_READ; + } + else if( ! (vma->vm_flags & VM_DENYWRITE) ) { + if((wantflag & VM_WRITE) || (vma->vm_flags & VM_WRITE)) + max_event = DM_EVENT_WRITE; + } + + if( (wantflag & VM_WRITE) && (max_event != DM_EVENT_WRITE) ){ + return -EACCES; + } + + /* Figure out how much of the file is being requested by the user. */ + offset = 0; /* beginning of file, for now */ + length = 0; /* whole file, for now */ + + filesize = ip->i_new_size; + if (filesize < ip->i_size) { + filesize = ip->i_size; + } + + /* Set first byte number beyond the map area. */ + + if (length) { + end_of_area = offset + length; + if (end_of_area > filesize) + end_of_area = filesize; + } else { + end_of_area = filesize; + } + + /* Set the real amount being mapped. */ + evsize = end_of_area - offset; + if (evsize < 0) + evsize = 0; + + if (max_event == DM_EVENT_READ) + iolock = XFS_IOLOCK_SHARED; + else + iolock = XFS_IOLOCK_EXCL; + + xfs_ilock(ip, iolock); + /* If write possible, try a DMAPI write event */ + if (max_event == DM_EVENT_WRITE && DM_EVENT_ENABLED(ip, max_event)) { + error = xfs_dm_send_data_event(max_event, ip, offset, + evsize, 0, &iolock); + goto out_unlock; + } + + /* Try a read event if max_event was != DM_EVENT_WRITE or if it + * was DM_EVENT_WRITE but the WRITE event was not enabled. + */ + if (DM_EVENT_ENABLED(ip, DM_EVENT_READ)) { + error = xfs_dm_send_data_event(DM_EVENT_READ, ip, offset, + evsize, 0, &iolock); + } +out_unlock: + xfs_iunlock(ip, iolock); + return -error; +} + + +STATIC int +xfs_dm_send_destroy_event( + xfs_inode_t *ip, + dm_right_t vp_right) /* always DM_RIGHT_NULL */ +{ + /* Returns positive errors to XFS */ + return -dm_send_destroy_event(&ip->i_vnode, vp_right); +} + + +STATIC int +xfs_dm_send_namesp_event( + dm_eventtype_t event, + struct xfs_mount *mp, + xfs_inode_t *ip1, + dm_right_t vp1_right, + xfs_inode_t *ip2, + dm_right_t vp2_right, + const char *name1, + const char *name2, + mode_t mode, + int retcode, + int flags) +{ + /* Returns positive errors to XFS */ + return -dm_send_namesp_event(event, mp ? mp->m_super : NULL, + &ip1->i_vnode, vp1_right, + ip2 ? &ip2->i_vnode : NULL, vp2_right, + name1, name2, + mode, retcode, flags); +} + +STATIC int +xfs_dm_send_mount_event( + struct xfs_mount *mp, + dm_right_t root_right, + char *mtpt, + char *fsname) +{ + return dm_send_mount_event(mp->m_super, root_right, + NULL, DM_RIGHT_NULL, + mp->m_rootip ? VFS_I(mp->m_rootip) : NULL, + DM_RIGHT_NULL, mtpt, fsname); +} + +STATIC void +xfs_dm_send_unmount_event( + struct xfs_mount *mp, + xfs_inode_t *ip, /* NULL if unmount successful */ + dm_right_t vfsp_right, + mode_t mode, + int retcode, /* errno, if unmount failed */ + int flags) +{ + dm_send_unmount_event(mp->m_super, ip ? &ip->i_vnode : NULL, + vfsp_right, mode, retcode, flags); +} + + +/* + * Data migration operations accessed by the rest of XFS. + * When DMAPI support is configured in, this vector is used. + */ + +xfs_dmops_t xfs_dmcore_xfs = { + .xfs_send_data = xfs_dm_send_data_event, + .xfs_send_mmap = xfs_dm_send_mmap_event, + .xfs_send_destroy = xfs_dm_send_destroy_event, + .xfs_send_namesp = xfs_dm_send_namesp_event, + .xfs_send_mount = xfs_dm_send_mount_event, + .xfs_send_unmount = xfs_dm_send_unmount_event, +}; +EXPORT_SYMBOL(xfs_dmcore_xfs); + +STATIC int +xfs_dm_fh_to_inode( + struct super_block *sb, + struct inode **inode, + dm_fid_t *dmfid) +{ + xfs_mount_t *mp = XFS_M(sb); + xfs_inode_t *ip; + xfs_ino_t ino; + unsigned int igen; + int error; + + *inode = NULL; + + if (!dmfid->dm_fid_len) { + /* filesystem handle */ + *inode = igrab(&mp->m_rootip->i_vnode); + if (!*inode) + return -ENOENT; + return 0; + } + + if (dmfid->dm_fid_len != sizeof(*dmfid) - sizeof(dmfid->dm_fid_len)) + return -EINVAL; + + ino = dmfid->dm_fid_ino; + igen = dmfid->dm_fid_gen; + + /* fail requests for ino 0 gracefully. */ + if (ino == 0) + return -ESTALE; + + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return -error; + if (!ip) + return -EIO; + + if (!ip->i_d.di_mode || ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return -ENOENT; + } + + *inode = &ip->i_vnode; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return 0; +} + +STATIC int +xfs_dm_inode_to_fh( + struct inode *inode, + dm_fid_t *dmfid, + dm_fsid_t *dmfsid) +{ + xfs_inode_t *ip = XFS_I(inode); + + /* Returns negative errors to DMAPI */ + + if (ip->i_mount->m_fixedfsid == NULL) + return -EINVAL; + + dmfid->dm_fid_len = sizeof(dm_fid_t) - sizeof(dmfid->dm_fid_len); + dmfid->dm_fid_pad = 0; + /* + * use memcpy because the inode is a long long and there's no + * assurance that dmfid->dm_fid_ino is properly aligned. + */ + memcpy(&dmfid->dm_fid_ino, &ip->i_ino, sizeof(dmfid->dm_fid_ino)); + dmfid->dm_fid_gen = ip->i_d.di_gen; + + memcpy(dmfsid, ip->i_mount->m_fixedfsid, sizeof(*dmfsid)); + return 0; +} + +STATIC void +xfs_dm_get_fsid( + struct super_block *sb, + dm_fsid_t *fsid) +{ + memcpy(fsid, XFS_M(sb)->m_fixedfsid, sizeof(*fsid)); +} + +/* + * Filesystem operations accessed by the DMAPI core. + */ +static struct filesystem_dmapi_operations xfs_dmapiops = { + .get_fsys_vector = xfs_dm_get_dmapiops, + .fh_to_inode = xfs_dm_fh_to_inode, + .inode_to_fh = xfs_dm_inode_to_fh, + .get_fsid = xfs_dm_get_fsid, +}; + +static int __init +xfs_dm_init(void) +{ + printk(KERN_INFO "SGI XFS Data Management API subsystem\n"); + + dmapi_register(&xfs_fs_type, &xfs_dmapiops); + return 0; +} + +static void __exit +xfs_dm_exit(void) +{ + dmapi_unregister(&xfs_fs_type); +} + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION("SGI XFS dmapi subsystem"); +MODULE_LICENSE("GPL"); + +module_init(xfs_dm_init); +module_exit(xfs_dm_exit); --- /dev/null +++ b/fs/xfs/dmapi/xfs_dm.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DM_H__ +#define __XFS_DM_H__ + +extern struct file_system_type xfs_fs_type; + +#endif /* __XFS_DM_H__ */ --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -47,6 +47,9 @@ #include static const struct vm_operations_struct xfs_file_vm_ops; +#ifdef HAVE_DMAPI +static struct vm_operations_struct xfs_dmapi_file_vm_ops; +#endif /* * xfs_iozero @@ -938,6 +941,23 @@ xfs_file_release( return -xfs_release(XFS_I(inode)); } +#ifdef HAVE_DMAPI +STATIC int +xfs_vm_fault( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct xfs_mount *mp = XFS_M(inode->i_sb); + + ASSERT_ALWAYS(mp->m_flags & XFS_MOUNT_DMAPI); + + if (XFS_SEND_MMAP(mp, vma, 0)) + return VM_FAULT_SIGBUS; + return filemap_fault(vma, vmf); +} +#endif /* HAVE_DMAPI */ + STATIC int xfs_file_readdir( struct file *filp, @@ -978,10 +998,56 @@ xfs_file_mmap( vma->vm_ops = &xfs_file_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; +#ifdef HAVE_DMAPI + if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI) + vma->vm_ops = &xfs_dmapi_file_vm_ops; +#endif /* HAVE_DMAPI */ + file_accessed(filp); return 0; } +#ifdef HAVE_DMAPI +#ifdef HAVE_VMOP_MPROTECT +STATIC int +xfs_vm_mprotect( + struct vm_area_struct *vma, + unsigned int newflags) +{ + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error = 0; + + if (mp->m_flags & XFS_MOUNT_DMAPI) { + if ((vma->vm_flags & VM_MAYSHARE) && + (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE)) + error = XFS_SEND_MMAP(mp, vma, VM_WRITE); + } + return error; +} +#endif /* HAVE_VMOP_MPROTECT */ +#endif /* HAVE_DMAPI */ + +#ifdef HAVE_FOP_OPEN_EXEC +/* If the user is attempting to execute a file that is offline then + * we have to trigger a DMAPI READ event before the file is marked as busy + * otherwise the invisible I/O will not be able to write to the file to bring + * it back online. + */ +STATIC int +xfs_file_open_exec( + struct inode *inode) +{ + struct xfs_mount *mp = XFS_M(inode->i_sb); + struct xfs_inode *ip = XFS_I(inode); + + if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) && + DM_EVENT_ENABLED(ip, DM_EVENT_READ)) + return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL); + return 0; +} +#endif /* HAVE_FOP_OPEN_EXEC */ + /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable @@ -1033,3 +1099,13 @@ static const struct vm_operations_struct .fault = filemap_fault, .page_mkwrite = xfs_vm_page_mkwrite, }; + +#ifdef HAVE_DMAPI +static struct vm_operations_struct xfs_dmapi_file_vm_ops = { + .fault = xfs_vm_fault, + .page_mkwrite = xfs_vm_page_mkwrite, +#ifdef HAVE_VMOP_MPROTECT + .mprotect = xfs_vm_mprotect, +#endif +}; +#endif /* HAVE_DMAPI */ --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ksyms.c @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2004-2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_buf.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_dmapi.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_dir2_data.h" +#include "xfs_dir2_leaf.h" +#include "xfs_dir2_block.h" +#include "xfs_dir2_node.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_inode_item.h" +#include "xfs_buf_item.h" +#include "xfs_extfree_item.h" +#include "xfs_log_priv.h" +#include "xfs_trans_priv.h" +#include "xfs_trans_space.h" +#include "xfs_utils.h" +#include "xfs_iomap.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" + +EXPORT_SYMBOL(xfs_iunlock); +EXPORT_SYMBOL(xfs_attr_remove); +EXPORT_SYMBOL(xfs_iunlock_map_shared); +EXPORT_SYMBOL(xfs_iget); +EXPORT_SYMBOL(xfs_bmapi); +EXPORT_SYMBOL(xfs_internal_inum); +EXPORT_SYMBOL(xfs_attr_set); +EXPORT_SYMBOL(xfs_trans_reserve); +EXPORT_SYMBOL(xfs_trans_ijoin); +EXPORT_SYMBOL(xfs_free_eofblocks); +EXPORT_SYMBOL(kmem_free); +EXPORT_SYMBOL(_xfs_trans_commit); +EXPORT_SYMBOL(xfs_ilock); +EXPORT_SYMBOL(xfs_attr_get); +EXPORT_SYMBOL(xfs_readdir); +EXPORT_SYMBOL(xfs_setattr); +EXPORT_SYMBOL(xfs_trans_alloc); +EXPORT_SYMBOL(xfs_trans_cancel); +EXPORT_SYMBOL(xfs_fsync); +EXPORT_SYMBOL(xfs_iput_new); +EXPORT_SYMBOL(xfs_bulkstat); +EXPORT_SYMBOL(xfs_ilock_map_shared); +EXPORT_SYMBOL(xfs_iput); +EXPORT_SYMBOL(xfs_trans_log_inode); +EXPORT_SYMBOL(xfs_attr_list); +EXPORT_SYMBOL(kmem_alloc); +EXPORT_SYMBOL(xfs_change_file_space); --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -160,6 +160,10 @@ #define xfs_itruncate_data(ip, off) \ (-vmtruncate(VFS_I(ip), (off))) +#undef HAVE_DMAPI +#if defined(CONFIG_XFS_DMAPI) || defined(CONFIG_XFS_DMAPI_MODULE) +#define HAVE_DMAPI +#endif /* Move the kernel do_div definition off to one side */ --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1670,8 +1670,16 @@ xfs_fs_get_sb( void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, + int error; + + error = get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, mnt); + if (!error) { + xfs_mount_t *mp = XFS_M(mnt->mnt_sb); + mp->m_vfsmount = mnt; + } + + return error; } static const struct super_operations xfs_super_operations = { @@ -1689,13 +1697,14 @@ static const struct super_operations xfs .show_options = xfs_fs_show_options, }; -static struct file_system_type xfs_fs_type = { +struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", .get_sb = xfs_fs_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +EXPORT_SYMBOL(xfs_fs_type); STATIC int __init xfs_init_zones(void) --- a/fs/xfs/xfs_dmops.c +++ b/fs/xfs/xfs_dmops.c @@ -40,9 +40,21 @@ int xfs_dmops_get(struct xfs_mount *mp) { if (mp->m_flags & XFS_MOUNT_DMAPI) { - cmn_err(CE_WARN, - "XFS: dmapi support not available in this kernel."); - return EINVAL; + struct xfs_dmops *ops; + + ops = symbol_get(xfs_dmcore_xfs); + if (!ops) { + request_module("xfs_dmapi"); + ops = symbol_get(xfs_dmcore_xfs); + } + + if (!ops) { + cmn_err(CE_WARN, "XFS: no dmapi support available."); + return EINVAL; + } + mp->m_dm_ops = ops; + } else { + mp->m_dm_ops = &xfs_dmcore_stub; } mp->m_dm_ops = &xfs_dmcore_stub; @@ -52,4 +64,6 @@ xfs_dmops_get(struct xfs_mount *mp) void xfs_dmops_put(struct xfs_mount *mp) { + if (mp->m_dm_ops != &xfs_dmcore_stub) + symbol_put(xfs_dmcore_xfs); } --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -39,7 +39,7 @@ #include "xfs_error.h" #include "xfs_btree.h" -STATIC int +int xfs_internal_inum( xfs_mount_t *mp, xfs_ino_t ino) --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -99,6 +99,11 @@ xfs_bulkstat_one( void *dibuff, int *stat); +int +xfs_internal_inum( + xfs_mount_t *mp, + xfs_ino_t ino); + typedef int (*inumbers_fmt_pf)( void __user *ubuffer, /* buffer to write to */ const xfs_inogrp_t *buffer, /* buffer to read from */ --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -259,6 +259,7 @@ typedef struct xfs_mount { __int64_t m_update_flags; /* sb flags we need to update on the next remount,rw */ struct list_head m_mplist; /* inode shrinker mount list */ + struct vfsmount *m_vfsmount; } xfs_mount_t; /* --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -202,3 +202,4 @@ xfs_get_extsz_hint( return extsz; } +EXPORT_SYMBOL(xfs_get_extsz_hint); --- a/fs/xfs/xfs_rw.h +++ b/fs/xfs/xfs_rw.h @@ -45,5 +45,10 @@ extern int xfs_read_buf(struct xfs_mount extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp, xfs_buf_t *bp, xfs_daddr_t blkno); extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); +/* + * Prototypes for functions in xfs_vnodeops.c. + */ +extern int xfs_free_eofblocks(struct xfs_mount *mp, struct xfs_inode *ip, + int flags); #endif /* __XFS_RW_H__ */ --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -593,7 +593,7 @@ xfs_readlink( * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ -STATIC int +int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip,