2374 lines
53 KiB
Diff
2374 lines
53 KiB
Diff
Subject: pv-ops blktap2
|
|
From: https://git.kernel.org/?p=linux/kernel/git/jeremy/xen.git (commit 892d2f052e979cf1916647c752b94cf62ec1c6dc)
|
|
Patch-mainline: n/a
|
|
Acked-by: jbeulich@novell.com
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/Makefile 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,3 @@
|
|
+obj-$(CONFIG_XEN_BLKDEV_TAP) := blktap.o
|
|
+
|
|
+blktap-objs := control.o ring.o device.o request.o sysfs.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/blktap.h 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,209 @@
|
|
+#ifndef _BLKTAP_H_
|
|
+#define _BLKTAP_H_
|
|
+
|
|
+#include <linux/mm.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/cdev.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/scatterlist.h>
|
|
+#include <xen/blkif.h>
|
|
+
|
|
+extern int blktap_debug_level;
|
|
+extern int blktap_ring_major;
|
|
+extern int blktap_device_major;
|
|
+
|
|
+#define BTPRINTK(level, tag, force, _f, _a...) \
|
|
+ do { \
|
|
+ if (blktap_debug_level > level && \
|
|
+ (force || printk_ratelimit())) \
|
|
+ printk(tag "%s: " _f, __func__, ##_a); \
|
|
+ } while (0)
|
|
+
|
|
+#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
|
|
+#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
|
|
+#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
|
|
+#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
|
|
+
|
|
+#define MAX_BLKTAP_DEVICE 1024
|
|
+
|
|
+#define BLKTAP_DEVICE 4
|
|
+#define BLKTAP_DEVICE_CLOSED 5
|
|
+#define BLKTAP_SHUTDOWN_REQUESTED 8
|
|
+
|
|
+/* blktap IOCTLs: */
|
|
+#define BLKTAP2_IOCTL_KICK_FE 1
|
|
+#define BLKTAP2_IOCTL_ALLOC_TAP 200
|
|
+#define BLKTAP2_IOCTL_FREE_TAP 201
|
|
+#define BLKTAP2_IOCTL_CREATE_DEVICE 202
|
|
+#define BLKTAP2_IOCTL_REMOVE_DEVICE 207
|
|
+
|
|
+#define BLKTAP2_MAX_MESSAGE_LEN 256
|
|
+
|
|
+#define BLKTAP2_RING_MESSAGE_CLOSE 3
|
|
+
|
|
+#define BLKTAP_REQUEST_FREE 0
|
|
+#define BLKTAP_REQUEST_PENDING 1
|
|
+
|
|
+/*
|
|
+ * The maximum number of requests that can be outstanding at any time
|
|
+ * is determined by
|
|
+ *
|
|
+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
|
|
+ *
|
|
+ * where mmap_alloc < MAX_DYNAMIC_MEM.
|
|
+ *
|
|
+ * TODO:
|
|
+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
|
|
+ * sysfs.
|
|
+ */
|
|
+#define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
|
|
+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
|
|
+#define MAX_PENDING_REQS BLK_RING_SIZE
|
|
+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
|
|
+#define MMAP_VADDR(_start, _req, _seg) \
|
|
+ (_start + \
|
|
+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
|
|
+ ((_seg) * PAGE_SIZE))
|
|
+
|
|
+struct grant_handle_pair {
|
|
+ grant_handle_t kernel;
|
|
+ grant_handle_t user;
|
|
+};
|
|
+#define INVALID_GRANT_HANDLE 0xFFFF
|
|
+
|
|
+struct blktap_handle {
|
|
+ unsigned int ring;
|
|
+ unsigned int device;
|
|
+ unsigned int minor;
|
|
+};
|
|
+
|
|
+struct blktap_params {
|
|
+ char name[BLKTAP2_MAX_MESSAGE_LEN];
|
|
+ unsigned long long capacity;
|
|
+ unsigned long sector_size;
|
|
+};
|
|
+
|
|
+struct blktap_device {
|
|
+ spinlock_t lock;
|
|
+ struct gendisk *gd;
|
|
+};
|
|
+
|
|
+struct blktap_ring {
|
|
+ struct task_struct *task;
|
|
+
|
|
+ struct vm_area_struct *vma;
|
|
+ struct blkif_front_ring ring;
|
|
+ unsigned long ring_vstart;
|
|
+ unsigned long user_vstart;
|
|
+
|
|
+ int n_pending;
|
|
+ struct blktap_request *pending[MAX_PENDING_REQS];
|
|
+
|
|
+ wait_queue_head_t poll_wait;
|
|
+
|
|
+ dev_t devno;
|
|
+ struct device *dev;
|
|
+};
|
|
+
|
|
+struct blktap_statistics {
|
|
+ unsigned long st_print;
|
|
+ int st_rd_req;
|
|
+ int st_wr_req;
|
|
+ int st_oo_req;
|
|
+ int st_rd_sect;
|
|
+ int st_wr_sect;
|
|
+ s64 st_rd_cnt;
|
|
+ s64 st_rd_sum_usecs;
|
|
+ s64 st_rd_max_usecs;
|
|
+ s64 st_wr_cnt;
|
|
+ s64 st_wr_sum_usecs;
|
|
+ s64 st_wr_max_usecs;
|
|
+};
|
|
+
|
|
+struct blktap_request {
|
|
+ struct blktap *tap;
|
|
+ struct request *rq;
|
|
+ int usr_idx;
|
|
+
|
|
+ int operation;
|
|
+ struct timeval time;
|
|
+
|
|
+ struct scatterlist sg_table[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ int nr_pages;
|
|
+};
|
|
+
|
|
+#define blktap_for_each_sg(_sg, _req, _i) \
|
|
+ for (_sg = (_req)->sg_table, _i = 0; \
|
|
+ _i < (_req)->nr_pages; \
|
|
+ (_sg)++, (_i)++)
|
|
+
|
|
+struct blktap {
|
|
+ int minor;
|
|
+ unsigned long dev_inuse;
|
|
+
|
|
+ struct blktap_ring ring;
|
|
+ struct blktap_device device;
|
|
+ struct blktap_page_pool *pool;
|
|
+
|
|
+ wait_queue_head_t remove_wait;
|
|
+ struct work_struct remove_work;
|
|
+ char name[BLKTAP2_MAX_MESSAGE_LEN];
|
|
+
|
|
+ struct blktap_statistics stats;
|
|
+};
|
|
+
|
|
+struct blktap_page_pool {
|
|
+ struct mempool_s *bufs;
|
|
+ spinlock_t lock;
|
|
+ struct kobject kobj;
|
|
+ wait_queue_head_t wait;
|
|
+};
|
|
+
|
|
+extern struct mutex blktap_lock;
|
|
+extern struct blktap **blktaps;
|
|
+extern int blktap_max_minor;
|
|
+
|
|
+int blktap_control_destroy_tap(struct blktap *);
|
|
+size_t blktap_control_debug(struct blktap *, char *, size_t);
|
|
+
|
|
+int blktap_ring_init(void);
|
|
+void blktap_ring_exit(void);
|
|
+size_t blktap_ring_debug(struct blktap *, char *, size_t);
|
|
+int blktap_ring_create(struct blktap *);
|
|
+int blktap_ring_destroy(struct blktap *);
|
|
+struct blktap_request *blktap_ring_make_request(struct blktap *);
|
|
+void blktap_ring_free_request(struct blktap *,struct blktap_request *);
|
|
+void blktap_ring_submit_request(struct blktap *, struct blktap_request *);
|
|
+int blktap_ring_map_request_segment(struct blktap *, struct blktap_request *, int);
|
|
+int blktap_ring_map_request(struct blktap *, struct blktap_request *);
|
|
+void blktap_ring_unmap_request(struct blktap *, struct blktap_request *);
|
|
+void blktap_ring_set_message(struct blktap *, int);
|
|
+void blktap_ring_kick_user(struct blktap *);
|
|
+
|
|
+int blktap_sysfs_init(void);
|
|
+void blktap_sysfs_exit(void);
|
|
+int blktap_sysfs_create(struct blktap *);
|
|
+void blktap_sysfs_destroy(struct blktap *);
|
|
+
|
|
+int blktap_device_init(void);
|
|
+void blktap_device_exit(void);
|
|
+size_t blktap_device_debug(struct blktap *, char *, size_t);
|
|
+int blktap_device_create(struct blktap *, struct blktap_params *);
|
|
+int blktap_device_destroy(struct blktap *);
|
|
+void blktap_device_destroy_sync(struct blktap *);
|
|
+void blktap_device_run_queue(struct blktap *);
|
|
+void blktap_device_end_request(struct blktap *, struct blktap_request *, int);
|
|
+
|
|
+int blktap_page_pool_init(struct kobject *);
|
|
+void blktap_page_pool_exit(void);
|
|
+struct blktap_page_pool *blktap_page_pool_get(const char *);
|
|
+
|
|
+size_t blktap_request_debug(struct blktap *, char *, size_t);
|
|
+struct blktap_request *blktap_request_alloc(struct blktap *);
|
|
+int blktap_request_get_pages(struct blktap *, struct blktap_request *, int);
|
|
+void blktap_request_free(struct blktap *, struct blktap_request *);
|
|
+void blktap_request_bounce(struct blktap *, struct blktap_request *, int, int);
|
|
+
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/control.c 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,315 @@
|
|
+#include <linux/module.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/device.h>
|
|
+#include <asm/uaccess.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+DEFINE_MUTEX(blktap_lock);
|
|
+
|
|
+struct blktap **blktaps;
|
|
+int blktap_max_minor;
|
|
+static struct blktap_page_pool *default_pool;
|
|
+
|
|
+static struct blktap *
|
|
+blktap_control_get_minor(void)
|
|
+{
|
|
+ int minor;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ tap = kzalloc(sizeof(*tap), GFP_KERNEL);
|
|
+ if (unlikely(!tap))
|
|
+ return NULL;
|
|
+
|
|
+ mutex_lock(&blktap_lock);
|
|
+
|
|
+ for (minor = 0; minor < blktap_max_minor; minor++)
|
|
+ if (!blktaps[minor])
|
|
+ break;
|
|
+
|
|
+ if (minor == MAX_BLKTAP_DEVICE)
|
|
+ goto fail;
|
|
+
|
|
+ if (minor == blktap_max_minor) {
|
|
+ void *p;
|
|
+ int n;
|
|
+
|
|
+ n = min(2 * blktap_max_minor, MAX_BLKTAP_DEVICE);
|
|
+ p = krealloc(blktaps, n * sizeof(blktaps[0]), GFP_KERNEL);
|
|
+ if (!p)
|
|
+ goto fail;
|
|
+
|
|
+ blktaps = p;
|
|
+ minor = blktap_max_minor;
|
|
+ blktap_max_minor = n;
|
|
+
|
|
+ memset(&blktaps[minor], 0, (n - minor) * sizeof(blktaps[0]));
|
|
+ }
|
|
+
|
|
+ tap->minor = minor;
|
|
+ blktaps[minor] = tap;
|
|
+
|
|
+ __module_get(THIS_MODULE);
|
|
+out:
|
|
+ mutex_unlock(&blktap_lock);
|
|
+ return tap;
|
|
+
|
|
+fail:
|
|
+ mutex_unlock(&blktap_lock);
|
|
+ kfree(tap);
|
|
+ tap = NULL;
|
|
+ goto out;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_control_put_minor(struct blktap* tap)
|
|
+{
|
|
+ blktaps[tap->minor] = NULL;
|
|
+ kfree(tap);
|
|
+
|
|
+ module_put(THIS_MODULE);
|
|
+}
|
|
+
|
|
+static struct blktap*
|
|
+blktap_control_create_tap(void)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ int err;
|
|
+
|
|
+ tap = blktap_control_get_minor();
|
|
+ if (!tap)
|
|
+ return NULL;
|
|
+
|
|
+ kobject_get(&default_pool->kobj);
|
|
+ tap->pool = default_pool;
|
|
+
|
|
+ err = blktap_ring_create(tap);
|
|
+ if (err)
|
|
+ goto fail_tap;
|
|
+
|
|
+ err = blktap_sysfs_create(tap);
|
|
+ if (err)
|
|
+ goto fail_ring;
|
|
+
|
|
+ return tap;
|
|
+
|
|
+fail_ring:
|
|
+ blktap_ring_destroy(tap);
|
|
+fail_tap:
|
|
+ blktap_control_put_minor(tap);
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_control_destroy_tap(struct blktap *tap)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = blktap_ring_destroy(tap);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ kobject_put(&tap->pool->kobj);
|
|
+
|
|
+ blktap_sysfs_destroy(tap);
|
|
+
|
|
+ blktap_control_put_minor(tap);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_control_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+
|
|
+ switch (cmd) {
|
|
+ case BLKTAP2_IOCTL_ALLOC_TAP: {
|
|
+ struct blktap_handle h;
|
|
+ void __user *ptr = (void __user*)arg;
|
|
+
|
|
+ tap = blktap_control_create_tap();
|
|
+ if (!tap)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ h.ring = blktap_ring_major;
|
|
+ h.device = blktap_device_major;
|
|
+ h.minor = tap->minor;
|
|
+
|
|
+ if (copy_to_user(ptr, &h, sizeof(h))) {
|
|
+ blktap_control_destroy_tap(tap);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ case BLKTAP2_IOCTL_FREE_TAP: {
|
|
+ int minor = arg;
|
|
+
|
|
+ if (minor > MAX_BLKTAP_DEVICE)
|
|
+ return -EINVAL;
|
|
+
|
|
+ tap = blktaps[minor];
|
|
+ if (!tap)
|
|
+ return -ENODEV;
|
|
+
|
|
+ return blktap_control_destroy_tap(tap);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return -ENOIOCTLCMD;
|
|
+}
|
|
+
|
|
+static struct file_operations blktap_control_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .ioctl = blktap_control_ioctl,
|
|
+};
|
|
+
|
|
+static struct miscdevice blktap_control = {
|
|
+ .minor = MISC_DYNAMIC_MINOR,
|
|
+ .name = "blktap-control",
|
|
+ .fops = &blktap_control_file_operations,
|
|
+};
|
|
+
|
|
+static struct device *control_device;
|
|
+
|
|
+static ssize_t
|
|
+blktap_control_show_default_pool(struct device *device,
|
|
+ struct device_attribute *attr,
|
|
+ char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%s", kobject_name(&default_pool->kobj));
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_control_store_default_pool(struct device *device,
|
|
+ struct device_attribute *attr,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ struct blktap_page_pool *pool, *tmp = default_pool;
|
|
+
|
|
+ pool = blktap_page_pool_get(buf);
|
|
+ if (IS_ERR(pool))
|
|
+ return PTR_ERR(pool);
|
|
+
|
|
+ default_pool = pool;
|
|
+ kobject_put(&tmp->kobj);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+
|
|
+static DEVICE_ATTR(default_pool, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
|
|
+ blktap_control_show_default_pool,
|
|
+ blktap_control_store_default_pool);
|
|
+
|
|
+size_t
|
|
+blktap_control_debug(struct blktap *tap, char *buf, size_t size)
|
|
+{
|
|
+ char *s = buf, *end = buf + size;
|
|
+
|
|
+ s += snprintf(s, end - s,
|
|
+ "tap %u:%u name:'%s' flags:%#08lx\n",
|
|
+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno),
|
|
+ tap->name, tap->dev_inuse);
|
|
+
|
|
+ return s - buf;
|
|
+}
|
|
+
|
|
+static int __init
|
|
+blktap_control_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = misc_register(&blktap_control);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ control_device = blktap_control.this_device;
|
|
+
|
|
+ blktap_max_minor = min(64, MAX_BLKTAP_DEVICE);
|
|
+ blktaps = kzalloc(blktap_max_minor * sizeof(blktaps[0]), GFP_KERNEL);
|
|
+ if (!blktaps) {
|
|
+ BTERR("failed to allocate blktap minor map");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ err = blktap_page_pool_init(&control_device->kobj);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ default_pool = blktap_page_pool_get("default");
|
|
+ if (!default_pool)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = device_create_file(control_device, &dev_attr_default_pool);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_control_exit(void)
|
|
+{
|
|
+ if (default_pool) {
|
|
+ kobject_put(&default_pool->kobj);
|
|
+ default_pool = NULL;
|
|
+ }
|
|
+
|
|
+ blktap_page_pool_exit();
|
|
+
|
|
+ if (blktaps) {
|
|
+ kfree(blktaps);
|
|
+ blktaps = NULL;
|
|
+ }
|
|
+
|
|
+ if (control_device) {
|
|
+ misc_deregister(&blktap_control);
|
|
+ control_device = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_exit(void)
|
|
+{
|
|
+ blktap_control_exit();
|
|
+ blktap_ring_exit();
|
|
+ blktap_sysfs_exit();
|
|
+ blktap_device_exit();
|
|
+}
|
|
+
|
|
+static int __init
|
|
+blktap_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = blktap_device_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_ring_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_sysfs_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_control_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ blktap_exit();
|
|
+ return err;
|
|
+}
|
|
+
|
|
+module_init(blktap_init);
|
|
+module_exit(blktap_exit);
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/device.c 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,564 @@
|
|
+#include <linux/fs.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/cdrom.h>
|
|
+#include <linux/hdreg.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_ioctl.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+int blktap_device_major;
|
|
+
|
|
+#define dev_to_blktap(_dev) container_of(_dev, struct blktap, device)
|
|
+
|
|
+static int
|
|
+blktap_device_open(struct block_device *bdev, fmode_t mode)
|
|
+{
|
|
+ struct gendisk *disk = bdev->bd_disk;
|
|
+ struct blktap_device *tapdev = disk->private_data;
|
|
+
|
|
+ if (!tapdev)
|
|
+ return -ENXIO;
|
|
+
|
|
+ /* NB. we might have bounced a bd trylock by tapdisk. when
|
|
+ * failing for reasons not !tapdev, make sure to kick tapdisk
|
|
+ * out of destroy wait state again. */
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_release(struct gendisk *disk, fmode_t mode)
|
|
+{
|
|
+ struct blktap_device *tapdev = disk->private_data;
|
|
+ struct block_device *bdev = bdget_disk(disk, 0);
|
|
+ struct blktap *tap = dev_to_blktap(tapdev);
|
|
+
|
|
+ bdput(bdev);
|
|
+
|
|
+ if (!bdev->bd_openers) {
|
|
+ set_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse);
|
|
+ blktap_ring_kick_user(tap);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
|
|
+{
|
|
+ /* We don't have real geometry info, but let's at least return
|
|
+ values consistent with the size of the device */
|
|
+ sector_t nsect = get_capacity(bd->bd_disk);
|
|
+ sector_t cylinders = nsect;
|
|
+
|
|
+ hg->heads = 0xff;
|
|
+ hg->sectors = 0x3f;
|
|
+ sector_div(cylinders, hg->heads * hg->sectors);
|
|
+ hg->cylinders = cylinders;
|
|
+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
|
|
+ hg->cylinders = 0xffff;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_ioctl(struct block_device *bd, fmode_t mode,
|
|
+ unsigned command, unsigned long argument)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ switch (command) {
|
|
+ case CDROMMULTISESSION:
|
|
+ BTDBG("FIXME: support multisession CDs later\n");
|
|
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
|
|
+ if (put_user(0, (char __user *)(argument + i)))
|
|
+ return -EFAULT;
|
|
+ return 0;
|
|
+
|
|
+ case SCSI_IOCTL_GET_IDLUN:
|
|
+ if (!access_ok(VERIFY_WRITE, argument,
|
|
+ sizeof(struct scsi_idlun)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ /* return 0 for now. */
|
|
+ __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
|
|
+ __put_user(0,
|
|
+ &((struct scsi_idlun __user *)argument)->host_unique_id);
|
|
+ return 0;
|
|
+
|
|
+ default:
|
|
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
|
|
+ command);*/
|
|
+ return -EINVAL; /* same return as native Linux */
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct block_device_operations blktap_device_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = blktap_device_open,
|
|
+ .release = blktap_device_release,
|
|
+ .ioctl = blktap_device_ioctl,
|
|
+ .getgeo = blktap_device_getgeo
|
|
+};
|
|
+
|
|
+/* NB. __blktap holding the queue lock; blktap where unlocked */
|
|
+
|
|
+static inline struct request*
|
|
+__blktap_next_queued_rq(struct request_queue *q)
|
|
+{
|
|
+ return blk_peek_request(q);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+__blktap_dequeue_rq(struct request *rq)
|
|
+{
|
|
+ blk_start_request(rq);
|
|
+}
|
|
+
|
|
+/* NB. err == 0 indicates success, failures < 0 */
|
|
+
|
|
+static inline void
|
|
+__blktap_end_queued_rq(struct request *rq, int err)
|
|
+{
|
|
+ blk_start_request(rq);
|
|
+ __blk_end_request(rq, err, blk_rq_bytes(rq));
|
|
+}
|
|
+
|
|
+static inline void
|
|
+__blktap_end_rq(struct request *rq, int err)
|
|
+{
|
|
+ __blk_end_request(rq, err, blk_rq_bytes(rq));
|
|
+}
|
|
+
|
|
+static inline void
|
|
+blktap_end_rq(struct request *rq, int err)
|
|
+{
|
|
+ spin_lock_irq(rq->q->queue_lock);
|
|
+ __blktap_end_rq(rq, err);
|
|
+ spin_unlock_irq(rq->q->queue_lock);
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_device_end_request(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ int error)
|
|
+{
|
|
+ struct blktap_device *tapdev = &tap->device;
|
|
+ struct request *rq = request->rq;
|
|
+
|
|
+ blktap_ring_unmap_request(tap, request);
|
|
+
|
|
+ blktap_ring_free_request(tap, request);
|
|
+
|
|
+ dev_dbg(disk_to_dev(tapdev->gd),
|
|
+ "end_request: op=%d error=%d bytes=%d\n",
|
|
+ rq_data_dir(rq), error, blk_rq_bytes(rq));
|
|
+
|
|
+ blktap_end_rq(rq, error);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_make_request(struct blktap *tap, struct request *rq)
|
|
+{
|
|
+ struct blktap_device *tapdev = &tap->device;
|
|
+ struct blktap_request *request;
|
|
+ int write, nsegs;
|
|
+ int err;
|
|
+
|
|
+ request = blktap_ring_make_request(tap);
|
|
+ if (IS_ERR(request)) {
|
|
+ err = PTR_ERR(request);
|
|
+ request = NULL;
|
|
+
|
|
+ if (err == -ENOSPC || err == -ENOMEM)
|
|
+ goto stop;
|
|
+
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ write = rq_data_dir(rq) == WRITE;
|
|
+ nsegs = blk_rq_map_sg(rq->q, rq, request->sg_table);
|
|
+
|
|
+ dev_dbg(disk_to_dev(tapdev->gd),
|
|
+ "make_request: op=%c bytes=%d nsegs=%d\n",
|
|
+ write ? 'w' : 'r', blk_rq_bytes(rq), nsegs);
|
|
+
|
|
+ request->rq = rq;
|
|
+ request->operation = write ? BLKIF_OP_WRITE : BLKIF_OP_READ;
|
|
+
|
|
+ err = blktap_request_get_pages(tap, request, nsegs);
|
|
+ if (err)
|
|
+ goto stop;
|
|
+
|
|
+ err = blktap_ring_map_request(tap, request);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ blktap_ring_submit_request(tap, request);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+stop:
|
|
+ tap->stats.st_oo_req++;
|
|
+ err = -EBUSY;
|
|
+
|
|
+_out:
|
|
+ if (request)
|
|
+ blktap_ring_free_request(tap, request);
|
|
+
|
|
+ return err;
|
|
+fail:
|
|
+ if (printk_ratelimit())
|
|
+ dev_warn(disk_to_dev(tapdev->gd),
|
|
+ "make request: %d, failing\n", err);
|
|
+ goto _out;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from tapdisk context
|
|
+ */
|
|
+void
|
|
+blktap_device_run_queue(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *tapdev = &tap->device;
|
|
+ struct request_queue *q;
|
|
+ struct request *rq;
|
|
+ int err;
|
|
+
|
|
+ if (!tapdev->gd)
|
|
+ return;
|
|
+
|
|
+ q = tapdev->gd->queue;
|
|
+
|
|
+ spin_lock_irq(&tapdev->lock);
|
|
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
|
+
|
|
+ do {
|
|
+ rq = __blktap_next_queued_rq(q);
|
|
+ if (!rq)
|
|
+ break;
|
|
+
|
|
+ if (!blk_fs_request(rq)) {
|
|
+ __blktap_end_queued_rq(rq, -EOPNOTSUPP);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irq(&tapdev->lock);
|
|
+
|
|
+ err = blktap_device_make_request(tap, rq);
|
|
+
|
|
+ spin_lock_irq(&tapdev->lock);
|
|
+
|
|
+ if (err == -EBUSY) {
|
|
+ blk_stop_queue(q);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ __blktap_dequeue_rq(rq);
|
|
+
|
|
+ if (unlikely(err))
|
|
+ __blktap_end_rq(rq, err);
|
|
+ } while (1);
|
|
+
|
|
+ spin_unlock_irq(&tapdev->lock);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_do_request(struct request_queue *rq)
|
|
+{
|
|
+ struct blktap_device *tapdev = rq->queuedata;
|
|
+ struct blktap *tap = dev_to_blktap(tapdev);
|
|
+
|
|
+ blktap_ring_kick_user(tap);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_configure(struct blktap *tap,
|
|
+ struct blktap_params *params)
|
|
+{
|
|
+ struct request_queue *rq;
|
|
+ struct blktap_device *dev = &tap->device;
|
|
+
|
|
+ dev = &tap->device;
|
|
+ rq = dev->gd->queue;
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+
|
|
+ set_capacity(dev->gd, params->capacity);
|
|
+
|
|
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
|
|
+ blk_queue_logical_block_size(rq, params->sector_size);
|
|
+ blk_queue_max_sectors(rq, 512);
|
|
+
|
|
+ /* Each segment in a request is up to an aligned page in size. */
|
|
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
|
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
|
|
+
|
|
+ /* Ensure a merged request will fit in a single I/O ring slot. */
|
|
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+
|
|
+ /* Make sure buffer addresses are sector-aligned. */
|
|
+ blk_queue_dma_alignment(rq, 511);
|
|
+
|
|
+ /* We are reordering, but cacheless. */
|
|
+ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
|
|
+
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_validate_params(struct blktap *tap,
|
|
+ struct blktap_params *params)
|
|
+{
|
|
+ struct device *dev = tap->ring.dev;
|
|
+ int sector_order, name_sz;
|
|
+
|
|
+ sector_order = ffs(params->sector_size) - 1;
|
|
+
|
|
+ if (sector_order < 9 ||
|
|
+ sector_order > 12 ||
|
|
+ params->sector_size != 1U<<sector_order)
|
|
+ goto fail;
|
|
+
|
|
+ if (!params->capacity ||
|
|
+ (params->capacity > ULLONG_MAX >> sector_order))
|
|
+ goto fail;
|
|
+
|
|
+ name_sz = min(sizeof(params->name), sizeof(tap->name));
|
|
+ if (strnlen(params->name, name_sz) >= name_sz)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ params->name[name_sz-1] = 0;
|
|
+ dev_err(dev, "capacity: %llu, sector-size: %lu, name: %s\n",
|
|
+ params->capacity, params->sector_size, params->name);
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_destroy(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *tapdev = &tap->device;
|
|
+ struct block_device *bdev;
|
|
+ struct gendisk *gd;
|
|
+ int err;
|
|
+
|
|
+ gd = tapdev->gd;
|
|
+ if (!gd)
|
|
+ return 0;
|
|
+
|
|
+ bdev = bdget_disk(gd, 0);
|
|
+
|
|
+ err = !mutex_trylock(&bdev->bd_mutex);
|
|
+ if (err) {
|
|
+ /* NB. avoid a deadlock. the last opener syncs the
|
|
+ * bdev holding bd_mutex. */
|
|
+ err = -EBUSY;
|
|
+ goto out_nolock;
|
|
+ }
|
|
+
|
|
+ if (bdev->bd_openers) {
|
|
+ err = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ del_gendisk(gd);
|
|
+ gd->private_data = NULL;
|
|
+
|
|
+ blk_cleanup_queue(gd->queue);
|
|
+
|
|
+ put_disk(gd);
|
|
+ tapdev->gd = NULL;
|
|
+
|
|
+ clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
|
|
+ err = 0;
|
|
+out:
|
|
+ mutex_unlock(&bdev->bd_mutex);
|
|
+out_nolock:
|
|
+ bdput(bdev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_fail_queue(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *tapdev = &tap->device;
|
|
+ struct request_queue *q = tapdev->gd->queue;
|
|
+
|
|
+ spin_lock_irq(&tapdev->lock);
|
|
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
|
|
+
|
|
+ do {
|
|
+ struct request *rq = __blktap_next_queued_rq(q);
|
|
+ if (!rq)
|
|
+ break;
|
|
+
|
|
+ __blktap_end_queued_rq(rq, -EIO);
|
|
+ } while (1);
|
|
+
|
|
+ spin_unlock_irq(&tapdev->lock);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_try_destroy(struct blktap *tap)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = blktap_device_destroy(tap);
|
|
+ if (err)
|
|
+ blktap_device_fail_queue(tap);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_device_destroy_sync(struct blktap *tap)
|
|
+{
|
|
+ wait_event(tap->ring.poll_wait,
|
|
+ !blktap_device_try_destroy(tap));
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_create(struct blktap *tap, struct blktap_params *params)
|
|
+{
|
|
+ int minor, err;
|
|
+ struct gendisk *gd;
|
|
+ struct request_queue *rq;
|
|
+ struct blktap_device *tapdev;
|
|
+
|
|
+ gd = NULL;
|
|
+ rq = NULL;
|
|
+ tapdev = &tap->device;
|
|
+ minor = tap->minor;
|
|
+
|
|
+ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ return -EEXIST;
|
|
+
|
|
+ if (blktap_device_validate_params(tap, params))
|
|
+ return -EINVAL;
|
|
+
|
|
+ gd = alloc_disk(1);
|
|
+ if (!gd) {
|
|
+ err = -ENOMEM;
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (minor < 26) {
|
|
+ sprintf(gd->disk_name, "td%c", 'a' + minor % 26);
|
|
+ } else if (minor < (26 + 1) * 26) {
|
|
+ sprintf(gd->disk_name, "td%c%c",
|
|
+ 'a' + minor / 26 - 1,'a' + minor % 26);
|
|
+ } else {
|
|
+ const unsigned int m1 = (minor / 26 - 1) / 26 - 1;
|
|
+ const unsigned int m2 = (minor / 26 - 1) % 26;
|
|
+ const unsigned int m3 = minor % 26;
|
|
+ sprintf(gd->disk_name, "td%c%c%c",
|
|
+ 'a' + m1, 'a' + m2, 'a' + m3);
|
|
+ }
|
|
+
|
|
+ gd->major = blktap_device_major;
|
|
+ gd->first_minor = minor;
|
|
+ gd->fops = &blktap_device_file_operations;
|
|
+ gd->private_data = tapdev;
|
|
+
|
|
+ spin_lock_init(&tapdev->lock);
|
|
+ rq = blk_init_queue(blktap_device_do_request, &tapdev->lock);
|
|
+ if (!rq) {
|
|
+ err = -ENOMEM;
|
|
+ goto fail;
|
|
+ }
|
|
+ elevator_init(rq, "noop");
|
|
+
|
|
+ gd->queue = rq;
|
|
+ rq->queuedata = tapdev;
|
|
+ tapdev->gd = gd;
|
|
+
|
|
+ blktap_device_configure(tap, params);
|
|
+ add_disk(gd);
|
|
+
|
|
+ if (params->name[0])
|
|
+ strncpy(tap->name, params->name, sizeof(tap->name)-1);
|
|
+
|
|
+ set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
|
|
+
|
|
+ dev_info(disk_to_dev(gd), "sector-size: %u capacity: %llu\n",
|
|
+ queue_logical_block_size(rq),
|
|
+ (unsigned long long)get_capacity(gd));
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ if (gd)
|
|
+ del_gendisk(gd);
|
|
+ if (rq)
|
|
+ blk_cleanup_queue(rq);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+size_t
|
|
+blktap_device_debug(struct blktap *tap, char *buf, size_t size)
|
|
+{
|
|
+ struct gendisk *disk = tap->device.gd;
|
|
+ struct request_queue *q;
|
|
+ struct block_device *bdev;
|
|
+ char *s = buf, *end = buf + size;
|
|
+
|
|
+ if (!disk)
|
|
+ return 0;
|
|
+
|
|
+ q = disk->queue;
|
|
+
|
|
+ s += snprintf(s, end - s,
|
|
+ "disk capacity:%llu sector size:%u\n",
|
|
+ (unsigned long long)get_capacity(disk),
|
|
+ queue_logical_block_size(q));
|
|
+
|
|
+ s += snprintf(s, end - s,
|
|
+ "queue flags:%#lx plugged:%d stopped:%d empty:%d\n",
|
|
+ q->queue_flags,
|
|
+ blk_queue_plugged(q), blk_queue_stopped(q),
|
|
+ elv_queue_empty(q));
|
|
+
|
|
+ bdev = bdget_disk(disk, 0);
|
|
+ if (bdev) {
|
|
+ s += snprintf(s, end - s,
|
|
+ "bdev openers:%d closed:%d\n",
|
|
+ bdev->bd_openers,
|
|
+ test_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse));
|
|
+ bdput(bdev);
|
|
+ }
|
|
+
|
|
+ return s - buf;
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_device_init()
|
|
+{
|
|
+ int major;
|
|
+
|
|
+ /* Dynamically allocate a major for this device */
|
|
+ major = register_blkdev(0, "tapdev");
|
|
+ if (major < 0) {
|
|
+ BTERR("Couldn't register blktap device\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ blktap_device_major = major;
|
|
+ BTINFO("blktap device major %d\n", major);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_device_exit(void)
|
|
+{
|
|
+ if (blktap_device_major)
|
|
+ unregister_blkdev(blktap_device_major, "tapdev");
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/request.c 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,418 @@
|
|
+#include <linux/mempool.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/device.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+/* max pages per shared pool. just to prevent accidental dos. */
|
|
+#define POOL_MAX_PAGES (256*BLKIF_MAX_SEGMENTS_PER_REQUEST)
|
|
+
|
|
+/* default page pool size. when considering to shrink a shared pool,
|
|
+ * note that paused tapdisks may grab a whole lot of pages for a long
|
|
+ * time. */
|
|
+#define POOL_DEFAULT_PAGES (2 * MMAP_PAGES)
|
|
+
|
|
+/* max number of pages allocatable per request. */
|
|
+#define POOL_MAX_REQUEST_PAGES BLKIF_MAX_SEGMENTS_PER_REQUEST
|
|
+
|
|
+/* min request structs per pool. These grow dynamically. */
|
|
+#define POOL_MIN_REQS BLK_RING_SIZE
|
|
+
|
|
+static struct kset *pool_set;
|
|
+
|
|
+#define kobj_to_pool(_kobj) \
|
|
+ container_of(_kobj, struct blktap_page_pool, kobj)
|
|
+
|
|
+static struct kmem_cache *request_cache;
|
|
+static mempool_t *request_pool;
|
|
+
|
|
+static void
|
|
+__page_pool_wake(struct blktap_page_pool *pool)
|
|
+{
|
|
+ mempool_t *mem = pool->bufs;
|
|
+
|
|
+ /*
|
|
+ NB. slightly wasteful to always wait for a full segment
|
|
+ set. but this ensures the next disk makes
|
|
+ progress. presently, the repeated request struct
|
|
+ alloc/release cycles would otherwise keep everyone spinning.
|
|
+ */
|
|
+
|
|
+ if (mem->curr_nr >= POOL_MAX_REQUEST_PAGES)
|
|
+ wake_up(&pool->wait);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_request_get_pages(struct blktap *tap,
|
|
+ struct blktap_request *request, int nr_pages)
|
|
+{
|
|
+ struct blktap_page_pool *pool = tap->pool;
|
|
+ mempool_t *mem = pool->bufs;
|
|
+ struct page *page;
|
|
+
|
|
+ BUG_ON(request->nr_pages != 0);
|
|
+ BUG_ON(nr_pages > POOL_MAX_REQUEST_PAGES);
|
|
+
|
|
+ if (mem->curr_nr < nr_pages)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ /* NB. avoid thundering herds of tapdisks colliding. */
|
|
+ spin_lock(&pool->lock);
|
|
+
|
|
+ if (mem->curr_nr < nr_pages) {
|
|
+ spin_unlock(&pool->lock);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ while (request->nr_pages < nr_pages) {
|
|
+ page = mempool_alloc(mem, GFP_NOWAIT);
|
|
+ BUG_ON(!page);
|
|
+ request->pages[request->nr_pages++] = page;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pool->lock);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_request_put_pages(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ struct blktap_page_pool *pool = tap->pool;
|
|
+ struct page *page;
|
|
+
|
|
+ while (request->nr_pages) {
|
|
+ page = request->pages[--request->nr_pages];
|
|
+ mempool_free(page, pool->bufs);
|
|
+ }
|
|
+}
|
|
+
|
|
+size_t
|
|
+blktap_request_debug(struct blktap *tap, char *buf, size_t size)
|
|
+{
|
|
+ struct blktap_page_pool *pool = tap->pool;
|
|
+ mempool_t *mem = pool->bufs;
|
|
+ char *s = buf, *end = buf + size;
|
|
+
|
|
+ s += snprintf(buf, end - s,
|
|
+ "pool:%s pages:%d free:%d\n",
|
|
+ kobject_name(&pool->kobj),
|
|
+ mem->min_nr, mem->curr_nr);
|
|
+
|
|
+ return s - buf;
|
|
+}
|
|
+
|
|
+struct blktap_request*
|
|
+blktap_request_alloc(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_request *request;
|
|
+
|
|
+ request = mempool_alloc(request_pool, GFP_NOWAIT);
|
|
+ if (request)
|
|
+ request->tap = tap;
|
|
+
|
|
+ return request;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_request_free(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ blktap_request_put_pages(tap, request);
|
|
+
|
|
+ mempool_free(request, request_pool);
|
|
+
|
|
+ __page_pool_wake(tap->pool);
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_request_bounce(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ int seg, int write)
|
|
+{
|
|
+ struct scatterlist *sg = &request->sg_table[seg];
|
|
+ void *s, *p;
|
|
+
|
|
+ BUG_ON(seg >= request->nr_pages);
|
|
+
|
|
+ s = sg_virt(sg);
|
|
+ p = page_address(request->pages[seg]) + sg->offset;
|
|
+
|
|
+ if (write)
|
|
+ memcpy(p, s, sg->length);
|
|
+ else
|
|
+ memcpy(s, p, sg->length);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_request_ctor(void *obj)
|
|
+{
|
|
+ struct blktap_request *request = obj;
|
|
+
|
|
+ memset(request, 0, sizeof(*request));
|
|
+ sg_init_table(request->sg_table, ARRAY_SIZE(request->sg_table));
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_page_pool_resize(struct blktap_page_pool *pool, int target)
|
|
+{
|
|
+ mempool_t *bufs = pool->bufs;
|
|
+ int err;
|
|
+
|
|
+ /* NB. mempool asserts min_nr >= 1 */
|
|
+ target = max(1, target);
|
|
+
|
|
+ err = mempool_resize(bufs, target, GFP_KERNEL);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ __page_pool_wake(pool);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+struct pool_attribute {
|
|
+ struct attribute attr;
|
|
+
|
|
+ ssize_t (*show)(struct blktap_page_pool *pool,
|
|
+ char *buf);
|
|
+
|
|
+ ssize_t (*store)(struct blktap_page_pool *pool,
|
|
+ const char *buf, size_t count);
|
|
+};
|
|
+
|
|
+#define kattr_to_pool_attr(_kattr) \
|
|
+ container_of(_kattr, struct pool_attribute, attr)
|
|
+
|
|
+static ssize_t
|
|
+blktap_page_pool_show_size(struct blktap_page_pool *pool,
|
|
+ char *buf)
|
|
+{
|
|
+ mempool_t *mem = pool->bufs;
|
|
+ return sprintf(buf, "%d", mem->min_nr);
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_page_pool_store_size(struct blktap_page_pool *pool,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ int target;
|
|
+
|
|
+ /*
|
|
+ * NB. target fixup to avoid undesired results. less than a
|
|
+ * full segment set can wedge the disk. much more than a
|
|
+ * couple times the physical queue depth is rarely useful.
|
|
+ */
|
|
+
|
|
+ target = simple_strtoul(buf, NULL, 0);
|
|
+ target = max(POOL_MAX_REQUEST_PAGES, target);
|
|
+ target = min(target, POOL_MAX_PAGES);
|
|
+
|
|
+ return blktap_page_pool_resize(pool, target) ? : size;
|
|
+}
|
|
+
|
|
+static struct pool_attribute blktap_page_pool_attr_size =
|
|
+ __ATTR(size, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH,
|
|
+ blktap_page_pool_show_size,
|
|
+ blktap_page_pool_store_size);
|
|
+
|
|
+static ssize_t
|
|
+blktap_page_pool_show_free(struct blktap_page_pool *pool,
|
|
+ char *buf)
|
|
+{
|
|
+ mempool_t *mem = pool->bufs;
|
|
+ return sprintf(buf, "%d", mem->curr_nr);
|
|
+}
|
|
+
|
|
+static struct pool_attribute blktap_page_pool_attr_free =
|
|
+ __ATTR(free, S_IRUSR|S_IRGRP|S_IROTH,
|
|
+ blktap_page_pool_show_free,
|
|
+ NULL);
|
|
+
|
|
+static struct attribute *blktap_page_pool_attrs[] = {
|
|
+ &blktap_page_pool_attr_size.attr,
|
|
+ &blktap_page_pool_attr_free.attr,
|
|
+ NULL,
|
|
+};
|
|
+
|
|
+static inline struct kobject*
|
|
+__blktap_kset_find_obj(struct kset *kset, const char *name)
|
|
+{
|
|
+ struct kobject *k;
|
|
+ struct kobject *ret = NULL;
|
|
+
|
|
+ spin_lock(&kset->list_lock);
|
|
+ list_for_each_entry(k, &kset->list, entry) {
|
|
+ if (kobject_name(k) && !strcmp(kobject_name(k), name)) {
|
|
+ ret = kobject_get(k);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock(&kset->list_lock);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_page_pool_show_attr(struct kobject *kobj, struct attribute *kattr,
|
|
+ char *buf)
|
|
+{
|
|
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
|
|
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
|
|
+
|
|
+ if (attr->show)
|
|
+ return attr->show(pool, buf);
|
|
+
|
|
+ return -EIO;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_page_pool_store_attr(struct kobject *kobj, struct attribute *kattr,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
|
|
+ struct pool_attribute *attr = kattr_to_pool_attr(kattr);
|
|
+
|
|
+ if (attr->show)
|
|
+ return attr->store(pool, buf, size);
|
|
+
|
|
+ return -EIO;
|
|
+}
|
|
+
|
|
+static struct sysfs_ops blktap_page_pool_sysfs_ops = {
|
|
+ .show = blktap_page_pool_show_attr,
|
|
+ .store = blktap_page_pool_store_attr,
|
|
+};
|
|
+
|
|
+static void
|
|
+blktap_page_pool_release(struct kobject *kobj)
|
|
+{
|
|
+ struct blktap_page_pool *pool = kobj_to_pool(kobj);
|
|
+ mempool_destroy(pool->bufs);
|
|
+ kfree(pool);
|
|
+}
|
|
+
|
|
+struct kobj_type blktap_page_pool_ktype = {
|
|
+ .release = blktap_page_pool_release,
|
|
+ .sysfs_ops = &blktap_page_pool_sysfs_ops,
|
|
+ .default_attrs = blktap_page_pool_attrs,
|
|
+};
|
|
+
|
|
+static void*
|
|
+__mempool_page_alloc(gfp_t gfp_mask, void *pool_data)
|
|
+{
|
|
+ struct page *page;
|
|
+
|
|
+ if (!(gfp_mask & __GFP_WAIT))
|
|
+ return NULL;
|
|
+
|
|
+ page = alloc_page(gfp_mask);
|
|
+ if (page)
|
|
+ SetPageReserved(page);
|
|
+
|
|
+ return page;
|
|
+}
|
|
+
|
|
+static void
|
|
+__mempool_page_free(void *element, void *pool_data)
|
|
+{
|
|
+ struct page *page = element;
|
|
+
|
|
+ ClearPageReserved(page);
|
|
+ put_page(page);
|
|
+}
|
|
+
|
|
+static struct kobject*
|
|
+blktap_page_pool_create(const char *name, int nr_pages)
|
|
+{
|
|
+ struct blktap_page_pool *pool;
|
|
+ int err;
|
|
+
|
|
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
|
|
+ if (!pool)
|
|
+ goto fail;
|
|
+
|
|
+ spin_lock_init(&pool->lock);
|
|
+ init_waitqueue_head(&pool->wait);
|
|
+
|
|
+ pool->bufs = mempool_create(nr_pages,
|
|
+ __mempool_page_alloc, __mempool_page_free,
|
|
+ pool);
|
|
+ if (!pool->bufs)
|
|
+ goto fail_pool;
|
|
+
|
|
+ kobject_init(&pool->kobj, &blktap_page_pool_ktype);
|
|
+ pool->kobj.kset = pool_set;
|
|
+ err = kobject_add(&pool->kobj, &pool_set->kobj, "%s", name);
|
|
+ if (err)
|
|
+ goto fail_bufs;
|
|
+
|
|
+ return &pool->kobj;
|
|
+
|
|
+ kobject_del(&pool->kobj);
|
|
+fail_bufs:
|
|
+ mempool_destroy(pool->bufs);
|
|
+fail_pool:
|
|
+ kfree(pool);
|
|
+fail:
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+struct blktap_page_pool*
|
|
+blktap_page_pool_get(const char *name)
|
|
+{
|
|
+ struct kobject *kobj;
|
|
+
|
|
+ kobj = __blktap_kset_find_obj(pool_set, name);
|
|
+ if (!kobj)
|
|
+ kobj = blktap_page_pool_create(name,
|
|
+ POOL_DEFAULT_PAGES);
|
|
+ if (!kobj)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ return kobj_to_pool(kobj);
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_page_pool_init(struct kobject *parent)
|
|
+{
|
|
+ request_cache =
|
|
+ kmem_cache_create("blktap-request",
|
|
+ sizeof(struct blktap_request), 0,
|
|
+ 0, blktap_request_ctor);
|
|
+ if (!request_cache)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ request_pool =
|
|
+ mempool_create_slab_pool(POOL_MIN_REQS, request_cache);
|
|
+ if (!request_pool)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ pool_set = kset_create_and_add("pools", NULL, parent);
|
|
+ if (!pool_set)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_page_pool_exit(void)
|
|
+{
|
|
+ if (pool_set) {
|
|
+ BUG_ON(!list_empty(&pool_set->list));
|
|
+ kset_unregister(pool_set);
|
|
+ pool_set = NULL;
|
|
+ }
|
|
+
|
|
+ if (request_pool) {
|
|
+ mempool_destroy(request_pool);
|
|
+ request_pool = NULL;
|
|
+ }
|
|
+
|
|
+ if (request_cache) {
|
|
+ kmem_cache_destroy(request_cache);
|
|
+ request_cache = NULL;
|
|
+ }
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/ring.c 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,550 @@
|
|
+
|
|
+#include <linux/device.h>
|
|
+#include <linux/signal.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/blkdev.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+int blktap_ring_major;
|
|
+static struct cdev blktap_ring_cdev;
|
|
+
|
|
+ /*
|
|
+ * BLKTAP - immediately before the mmap area,
|
|
+ * we have a bunch of pages reserved for shared memory rings.
|
|
+ */
|
|
+#define RING_PAGES 1
|
|
+
|
|
+static void
|
|
+blktap_ring_read_response(struct blktap *tap,
|
|
+ const struct blkif_response *rsp)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blktap_request *request;
|
|
+ int usr_idx, err;
|
|
+
|
|
+ request = NULL;
|
|
+
|
|
+ usr_idx = rsp->id;
|
|
+ if (usr_idx < 0 || usr_idx >= MAX_PENDING_REQS) {
|
|
+ err = -ERANGE;
|
|
+ goto invalid;
|
|
+ }
|
|
+
|
|
+ request = ring->pending[usr_idx];
|
|
+
|
|
+ if (!request) {
|
|
+ err = -ESRCH;
|
|
+ goto invalid;
|
|
+ }
|
|
+
|
|
+ if (rsp->operation != request->operation) {
|
|
+ err = -EINVAL;
|
|
+ goto invalid;
|
|
+ }
|
|
+
|
|
+ dev_dbg(ring->dev,
|
|
+ "request %d [%p] response: %d\n",
|
|
+ request->usr_idx, request, rsp->status);
|
|
+
|
|
+ err = rsp->status == BLKIF_RSP_OKAY ? 0 : -EIO;
|
|
+end_request:
|
|
+ blktap_device_end_request(tap, request, err);
|
|
+ return;
|
|
+
|
|
+invalid:
|
|
+ dev_warn(ring->dev,
|
|
+ "invalid response, idx:%d status:%d op:%d/%d: err %d\n",
|
|
+ usr_idx, rsp->status,
|
|
+ rsp->operation, request->operation,
|
|
+ err);
|
|
+ if (request)
|
|
+ goto end_request;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_read_ring(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blkif_response rsp;
|
|
+ RING_IDX rc, rp;
|
|
+
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
+ if (!ring->vma) {
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* for each outstanding message on the ring */
|
|
+ rp = ring->ring.sring->rsp_prod;
|
|
+ rmb();
|
|
+
|
|
+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
|
|
+ memcpy(&rsp, RING_GET_RESPONSE(&ring->ring, rc), sizeof(rsp));
|
|
+ blktap_ring_read_response(tap, &rsp);
|
|
+ }
|
|
+
|
|
+ ring->ring.rsp_cons = rc;
|
|
+
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+}
|
|
+
|
|
+static int blktap_ring_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
+{
|
|
+ return VM_FAULT_SIGBUS;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_ring_fail_pending(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blktap_request *request;
|
|
+ int usr_idx;
|
|
+
|
|
+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
|
|
+ request = ring->pending[usr_idx];
|
|
+ if (!request)
|
|
+ continue;
|
|
+
|
|
+ blktap_device_end_request(tap, request, -EIO);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_ring_vm_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct blktap *tap = vma->vm_private_data;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct page *page = virt_to_page(ring->ring.sring);
|
|
+
|
|
+ blktap_ring_fail_pending(tap);
|
|
+
|
|
+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
|
|
+ ClearPageReserved(page);
|
|
+ __free_page(page);
|
|
+
|
|
+ ring->vma = NULL;
|
|
+
|
|
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ blktap_control_destroy_tap(tap);
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct blktap_ring_vm_operations = {
|
|
+ .close = blktap_ring_vm_close,
|
|
+ .fault = blktap_ring_fault,
|
|
+};
|
|
+
|
|
+int
|
|
+blktap_ring_map_segment(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ int seg)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ unsigned long uaddr;
|
|
+
|
|
+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
|
|
+ return vm_insert_page(ring->vma, uaddr, request->pages[seg]);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_map_request(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ int seg, err = 0;
|
|
+ int write;
|
|
+
|
|
+ write = request->operation == BLKIF_OP_WRITE;
|
|
+
|
|
+ for (seg = 0; seg < request->nr_pages; seg++) {
|
|
+ if (write)
|
|
+ blktap_request_bounce(tap, request, seg, write);
|
|
+
|
|
+ err = blktap_ring_map_segment(tap, request, seg);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (err)
|
|
+ blktap_ring_unmap_request(tap, request);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_ring_unmap_request(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ unsigned long uaddr;
|
|
+ unsigned size;
|
|
+ int seg, read;
|
|
+
|
|
+ uaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, 0);
|
|
+ size = request->nr_pages << PAGE_SHIFT;
|
|
+ read = request->operation == BLKIF_OP_READ;
|
|
+
|
|
+ if (read)
|
|
+ for (seg = 0; seg < request->nr_pages; seg++)
|
|
+ blktap_request_bounce(tap, request, seg, !read);
|
|
+
|
|
+ zap_page_range(ring->vma, uaddr, size, NULL);
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_ring_free_request(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ ring->pending[request->usr_idx] = NULL;
|
|
+ ring->n_pending--;
|
|
+
|
|
+ blktap_request_free(tap, request);
|
|
+}
|
|
+
|
|
+struct blktap_request*
|
|
+blktap_ring_make_request(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blktap_request *request;
|
|
+ int usr_idx;
|
|
+
|
|
+ if (RING_FULL(&ring->ring))
|
|
+ return ERR_PTR(-ENOSPC);
|
|
+
|
|
+ request = blktap_request_alloc(tap);
|
|
+ if (!request)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ for (usr_idx = 0; usr_idx < BLK_RING_SIZE; usr_idx++)
|
|
+ if (!ring->pending[usr_idx])
|
|
+ break;
|
|
+
|
|
+ BUG_ON(usr_idx >= BLK_RING_SIZE);
|
|
+
|
|
+ request->tap = tap;
|
|
+ request->usr_idx = usr_idx;
|
|
+
|
|
+ ring->pending[usr_idx] = request;
|
|
+ ring->n_pending++;
|
|
+
|
|
+ return request;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_ring_submit_request(struct blktap *tap,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blkif_request *breq;
|
|
+ struct scatterlist *sg;
|
|
+ int i, nsecs = 0;
|
|
+
|
|
+ dev_dbg(ring->dev,
|
|
+ "request %d [%p] submit\n", request->usr_idx, request);
|
|
+
|
|
+ breq = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
|
|
+
|
|
+ breq->id = request->usr_idx;
|
|
+ breq->sector_number = blk_rq_pos(request->rq);
|
|
+ breq->handle = 0;
|
|
+ breq->operation = request->operation;
|
|
+ breq->nr_segments = request->nr_pages;
|
|
+
|
|
+ blktap_for_each_sg(sg, request, i) {
|
|
+ struct blkif_request_segment *seg = &breq->seg[i];
|
|
+ int first, count;
|
|
+
|
|
+ count = sg->length >> 9;
|
|
+ first = sg->offset >> 9;
|
|
+
|
|
+ seg->first_sect = first;
|
|
+ seg->last_sect = first + count - 1;
|
|
+
|
|
+ nsecs += count;
|
|
+ }
|
|
+
|
|
+ ring->ring.req_prod_pvt++;
|
|
+
|
|
+ do_gettimeofday(&request->time);
|
|
+
|
|
+
|
|
+ if (request->operation == BLKIF_OP_WRITE) {
|
|
+ tap->stats.st_wr_sect += nsecs;
|
|
+ tap->stats.st_wr_req++;
|
|
+ }
|
|
+
|
|
+ if (request->operation == BLKIF_OP_READ) {
|
|
+ tap->stats.st_rd_sect += nsecs;
|
|
+ tap->stats.st_rd_req++;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct blktap *tap = NULL;
|
|
+ int minor;
|
|
+
|
|
+ minor = iminor(inode);
|
|
+
|
|
+ if (minor < blktap_max_minor)
|
|
+ tap = blktaps[minor];
|
|
+
|
|
+ if (!tap)
|
|
+ return -ENXIO;
|
|
+
|
|
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ return -ENXIO;
|
|
+
|
|
+ if (tap->ring.task)
|
|
+ return -EBUSY;
|
|
+
|
|
+ filp->private_data = tap;
|
|
+ tap->ring.task = current;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+
|
|
+ blktap_device_destroy_sync(tap);
|
|
+
|
|
+ tap->ring.task = NULL;
|
|
+
|
|
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ blktap_control_destroy_tap(tap);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct blkif_sring *sring;
|
|
+ struct page *page = NULL;
|
|
+ int err;
|
|
+
|
|
+ if (ring->vma)
|
|
+ return -EBUSY;
|
|
+
|
|
+ page = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
|
+ if (!page)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ SetPageReserved(page);
|
|
+
|
|
+ err = vm_insert_page(vma, vma->vm_start, page);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ sring = page_address(page);
|
|
+ SHARED_RING_INIT(sring);
|
|
+ FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
|
|
+
|
|
+ ring->ring_vstart = vma->vm_start;
|
|
+ ring->user_vstart = ring->ring_vstart + PAGE_SIZE;
|
|
+
|
|
+ vma->vm_private_data = tap;
|
|
+
|
|
+ vma->vm_flags |= VM_DONTCOPY;
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+
|
|
+ vma->vm_ops = &blktap_ring_vm_operations;
|
|
+
|
|
+ ring->vma = vma;
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ if (page) {
|
|
+ zap_page_range(vma, vma->vm_start, PAGE_SIZE, NULL);
|
|
+ ClearPageReserved(page);
|
|
+ __free_page(page);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
|
|
+
|
|
+ if (!ring->vma || ring->vma->vm_mm != current->mm)
|
|
+ return -EACCES;
|
|
+
|
|
+ switch(cmd) {
|
|
+ case BLKTAP2_IOCTL_KICK_FE:
|
|
+
|
|
+ blktap_read_ring(tap);
|
|
+ return 0;
|
|
+
|
|
+ case BLKTAP2_IOCTL_CREATE_DEVICE: {
|
|
+ struct blktap_params params;
|
|
+ void __user *ptr = (void *)arg;
|
|
+
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (copy_from_user(¶ms, ptr, sizeof(params)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ return blktap_device_create(tap, ¶ms);
|
|
+ }
|
|
+
|
|
+ case BLKTAP2_IOCTL_REMOVE_DEVICE:
|
|
+
|
|
+ return blktap_device_destroy(tap);
|
|
+ }
|
|
+
|
|
+ return -ENOIOCTLCMD;
|
|
+}
|
|
+
|
|
+static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ int work;
|
|
+
|
|
+ poll_wait(filp, &tap->pool->wait, wait);
|
|
+ poll_wait(filp, &ring->poll_wait, wait);
|
|
+
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
+ if (ring->vma && tap->device.gd)
|
|
+ blktap_device_run_queue(tap);
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+
|
|
+ work = ring->ring.req_prod_pvt - ring->ring.sring->req_prod;
|
|
+ RING_PUSH_REQUESTS(&ring->ring);
|
|
+
|
|
+ if (work ||
|
|
+ ring->ring.sring->private.tapif_user.msg ||
|
|
+ test_and_clear_bit(BLKTAP_DEVICE_CLOSED, &tap->dev_inuse))
|
|
+ return POLLIN | POLLRDNORM;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct file_operations blktap_ring_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = blktap_ring_open,
|
|
+ .release = blktap_ring_release,
|
|
+ .ioctl = blktap_ring_ioctl,
|
|
+ .mmap = blktap_ring_mmap,
|
|
+ .poll = blktap_ring_poll,
|
|
+};
|
|
+
|
|
+void
|
|
+blktap_ring_kick_user(struct blktap *tap)
|
|
+{
|
|
+ wake_up(&tap->ring.poll_wait);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_destroy(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ if (ring->task || ring->vma)
|
|
+ return -EBUSY;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_create(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ init_waitqueue_head(&ring->poll_wait);
|
|
+ ring->devno = MKDEV(blktap_ring_major, tap->minor);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+size_t
|
|
+blktap_ring_debug(struct blktap *tap, char *buf, size_t size)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ char *s = buf, *end = buf + size;
|
|
+ int usr_idx;
|
|
+
|
|
+ s += snprintf(s, end - s,
|
|
+ "begin pending:%d\n", ring->n_pending);
|
|
+
|
|
+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
|
|
+ struct blktap_request *request;
|
|
+ struct timeval *time;
|
|
+ int write;
|
|
+
|
|
+ request = ring->pending[usr_idx];
|
|
+ if (!request)
|
|
+ continue;
|
|
+
|
|
+ write = request->operation == BLKIF_OP_WRITE;
|
|
+ time = &request->time;
|
|
+
|
|
+ s += snprintf(s, end - s,
|
|
+ "%02d: usr_idx:%02d "
|
|
+ "op:%c nr_pages:%02d time:%lu.%09lu\n",
|
|
+ usr_idx, request->usr_idx,
|
|
+ write ? 'W' : 'R', request->nr_pages,
|
|
+ time->tv_sec, time->tv_usec);
|
|
+ }
|
|
+
|
|
+ s += snprintf(s, end - s, "end pending\n");
|
|
+
|
|
+ return s - buf;
|
|
+}
|
|
+
|
|
+
|
|
+int __init
|
|
+blktap_ring_init(void)
|
|
+{
|
|
+ dev_t dev = 0;
|
|
+ int err;
|
|
+
|
|
+ cdev_init(&blktap_ring_cdev, &blktap_ring_file_operations);
|
|
+ blktap_ring_cdev.owner = THIS_MODULE;
|
|
+
|
|
+ err = alloc_chrdev_region(&dev, 0, MAX_BLKTAP_DEVICE, "blktap2");
|
|
+ if (err < 0) {
|
|
+ BTERR("error registering ring devices: %d\n", err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ err = cdev_add(&blktap_ring_cdev, dev, MAX_BLKTAP_DEVICE);
|
|
+ if (err) {
|
|
+ BTERR("error adding ring device: %d\n", err);
|
|
+ unregister_chrdev_region(dev, MAX_BLKTAP_DEVICE);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ blktap_ring_major = MAJOR(dev);
|
|
+ BTINFO("blktap ring major: %d\n", blktap_ring_major);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_ring_exit(void)
|
|
+{
|
|
+ if (!blktap_ring_major)
|
|
+ return;
|
|
+
|
|
+ cdev_del(&blktap_ring_cdev);
|
|
+ unregister_chrdev_region(MKDEV(blktap_ring_major, 0),
|
|
+ MAX_BLKTAP_DEVICE);
|
|
+
|
|
+ blktap_ring_major = 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2-new/sysfs.c 2011-02-24 13:49:49.000000000 +0100
|
|
@@ -0,0 +1,288 @@
|
|
+#include <linux/types.h>
|
|
+#include <linux/device.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/genhd.h>
|
|
+#include <linux/blkdev.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+int blktap_debug_level = 1;
|
|
+
|
|
+static struct class *class;
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_set_name(struct device *dev, struct device_attribute *attr, const char *buf, size_t size)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+
|
|
+ tap = dev_get_drvdata(dev);
|
|
+ if (!tap)
|
|
+ return 0;
|
|
+
|
|
+ if (size >= BLKTAP2_MAX_MESSAGE_LEN)
|
|
+ return -ENAMETOOLONG;
|
|
+
|
|
+ if (strnlen(buf, size) != size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ strcpy(tap->name, buf);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_get_name(struct device *dev, struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ ssize_t size;
|
|
+
|
|
+ tap = dev_get_drvdata(dev);
|
|
+ if (!tap)
|
|
+ return 0;
|
|
+
|
|
+ if (tap->name[0])
|
|
+ size = sprintf(buf, "%s\n", tap->name);
|
|
+ else
|
|
+ size = sprintf(buf, "%d\n", tap->minor);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+static DEVICE_ATTR(name, S_IRUGO|S_IWUSR,
|
|
+ blktap_sysfs_get_name, blktap_sysfs_set_name);
|
|
+
|
|
+static void
|
|
+blktap_sysfs_remove_work(struct work_struct *work)
|
|
+{
|
|
+ struct blktap *tap
|
|
+ = container_of(work, struct blktap, remove_work);
|
|
+ blktap_control_destroy_tap(tap);
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_remove_device(struct device *dev,
|
|
+ struct device_attribute *attr,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ int err;
|
|
+
|
|
+ tap = dev_get_drvdata(dev);
|
|
+ if (!tap)
|
|
+ return size;
|
|
+
|
|
+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ goto wait;
|
|
+
|
|
+ if (tap->ring.vma) {
|
|
+ struct blkif_sring *sring = tap->ring.ring.sring;
|
|
+ sring->private.tapif_user.msg = BLKTAP2_RING_MESSAGE_CLOSE;
|
|
+ blktap_ring_kick_user(tap);
|
|
+ } else {
|
|
+ INIT_WORK(&tap->remove_work, blktap_sysfs_remove_work);
|
|
+ schedule_work(&tap->remove_work);
|
|
+ }
|
|
+wait:
|
|
+ err = wait_event_interruptible(tap->remove_wait,
|
|
+ !dev_get_drvdata(dev));
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ return size;
|
|
+}
|
|
+static DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_debug_device(struct device *dev, struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ char *s = buf, *end = buf + PAGE_SIZE;
|
|
+
|
|
+ tap = dev_get_drvdata(dev);
|
|
+ if (!tap)
|
|
+ return 0;
|
|
+
|
|
+ s += blktap_control_debug(tap, s, end - s);
|
|
+
|
|
+ s += blktap_request_debug(tap, s, end - s);
|
|
+
|
|
+ s += blktap_device_debug(tap, s, end - s);
|
|
+
|
|
+ s += blktap_ring_debug(tap, s, end - s);
|
|
+
|
|
+ return s - buf;
|
|
+}
|
|
+static DEVICE_ATTR(debug, S_IRUGO, blktap_sysfs_debug_device, NULL);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_task(struct device *dev, struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ ssize_t rv = 0;
|
|
+
|
|
+ tap = dev_get_drvdata(dev);
|
|
+ if (!tap)
|
|
+ return 0;
|
|
+
|
|
+ if (tap->ring.task)
|
|
+ rv = sprintf(buf, "%d\n", tap->ring.task->pid);
|
|
+
|
|
+ return rv;
|
|
+}
|
|
+static DEVICE_ATTR(task, S_IRUGO, blktap_sysfs_show_task, NULL);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_pool(struct device *dev,
|
|
+ struct device_attribute *attr,
|
|
+ char *buf)
|
|
+{
|
|
+ struct blktap *tap = dev_get_drvdata(dev);
|
|
+ return sprintf(buf, "%s", kobject_name(&tap->pool->kobj));
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_store_pool(struct device *dev,
|
|
+ struct device_attribute *attr,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ struct blktap *tap = dev_get_drvdata(dev);
|
|
+ struct blktap_page_pool *pool, *tmp = tap->pool;
|
|
+
|
|
+ if (tap->device.gd)
|
|
+ return -EBUSY;
|
|
+
|
|
+ pool = blktap_page_pool_get(buf);
|
|
+ if (IS_ERR(pool))
|
|
+ return PTR_ERR(pool);
|
|
+
|
|
+ tap->pool = pool;
|
|
+ kobject_put(&tmp->kobj);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+DEVICE_ATTR(pool, S_IRUSR|S_IWUSR,
|
|
+ blktap_sysfs_show_pool, blktap_sysfs_store_pool);
|
|
+
|
|
+int
|
|
+blktap_sysfs_create(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct device *dev;
|
|
+ int err = 0;
|
|
+
|
|
+ init_waitqueue_head(&tap->remove_wait);
|
|
+
|
|
+ dev = device_create(class, NULL, ring->devno,
|
|
+ tap, "blktap%d", tap->minor);
|
|
+ if (IS_ERR(dev))
|
|
+ err = PTR_ERR(dev);
|
|
+ if (!err)
|
|
+ err = device_create_file(dev, &dev_attr_name);
|
|
+ if (!err)
|
|
+ err = device_create_file(dev, &dev_attr_remove);
|
|
+ if (!err)
|
|
+ err = device_create_file(dev, &dev_attr_debug);
|
|
+ if (!err)
|
|
+ err = device_create_file(dev, &dev_attr_task);
|
|
+ if (!err)
|
|
+ err = device_create_file(dev, &dev_attr_pool);
|
|
+ if (!err)
|
|
+ ring->dev = dev;
|
|
+ else
|
|
+ device_unregister(dev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_sysfs_destroy(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ struct device *dev;
|
|
+
|
|
+ dev = ring->dev;
|
|
+
|
|
+ if (!dev)
|
|
+ return;
|
|
+
|
|
+ dev_set_drvdata(dev, NULL);
|
|
+ wake_up(&tap->remove_wait);
|
|
+
|
|
+ device_unregister(dev);
|
|
+ ring->dev = NULL;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_verbosity(struct class *class, char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%d\n", blktap_debug_level);
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
|
|
+{
|
|
+ int level;
|
|
+
|
|
+ if (sscanf(buf, "%d", &level) == 1) {
|
|
+ blktap_debug_level = level;
|
|
+ return size;
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+static CLASS_ATTR(verbosity, S_IRUGO|S_IWUSR,
|
|
+ blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_devices(struct class *class, char *buf)
|
|
+{
|
|
+ int i, ret;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ mutex_lock(&blktap_lock);
|
|
+
|
|
+ ret = 0;
|
|
+ for (i = 0; i < blktap_max_minor; i++) {
|
|
+ tap = blktaps[i];
|
|
+ if (!tap)
|
|
+ continue;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ continue;
|
|
+
|
|
+ ret += sprintf(buf + ret, "%d %s\n", tap->minor, tap->name);
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&blktap_lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+static CLASS_ATTR(devices, S_IRUGO, blktap_sysfs_show_devices, NULL);
|
|
+
|
|
+void
|
|
+blktap_sysfs_exit(void)
|
|
+{
|
|
+ if (class)
|
|
+ class_destroy(class);
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_sysfs_init(void)
|
|
+{
|
|
+ struct class *cls;
|
|
+ int err = 0;
|
|
+
|
|
+ cls = class_create(THIS_MODULE, "blktap2");
|
|
+ if (IS_ERR(cls))
|
|
+ err = PTR_ERR(cls);
|
|
+ if (!err)
|
|
+ err = class_create_file(cls, &class_attr_verbosity);
|
|
+ if (!err)
|
|
+ err = class_create_file(cls, &class_attr_devices);
|
|
+ if (!err)
|
|
+ class = cls;
|
|
+ else
|
|
+ class_destroy(cls);
|
|
+
|
|
+ return err;
|
|
+}
|