59007 lines
1.6 MiB
59007 lines
1.6 MiB
Subject: xen3 xen-drivers
|
|
From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1073:8fe973d8fb98)
|
|
Patch-mainline: n/a
|
|
Acked-by: jbeulich@novell.com
|
|
|
|
List of files not needed anymore, e.g. because there being a suitable
|
|
upstream variant (and hence removed from this patch), for reference
|
|
and in case upstream wants to take the forward porting patches:
|
|
2.6.26/drivers/xen/core/features.c
|
|
2.6.26/drivers/xen/core/xencomm.c
|
|
2.6.31/drivers/xen/evtchn/Makefile
|
|
2.6.31/drivers/xen/evtchn/evtchn.c
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/balloon/Makefile 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,2 @@
|
|
+
|
|
+obj-y := balloon.o sysfs.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/balloon/balloon.c 2010-03-31 09:56:02.000000000 +0200
|
|
@@ -0,0 +1,757 @@
|
|
+/******************************************************************************
|
|
+ * balloon.c
|
|
+ *
|
|
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
|
|
+ *
|
|
+ * Copyright (c) 2003, B Dragovic
|
|
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
|
|
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mman.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/pagemap.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <xen/xen_proc.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <xen/interface/memory.h>
|
|
+#include <asm/maddr.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/tlb.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/list.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include "common.h"
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_PROC_FS
|
|
+static struct proc_dir_entry *balloon_pde;
|
|
+#endif
|
|
+
|
|
+static DEFINE_MUTEX(balloon_mutex);
|
|
+
|
|
+/*
|
|
+ * Protects atomic reservation decrease/increase against concurrent increases.
|
|
+ * Also protects non-atomic updates of current_pages and driver_pages, and
|
|
+ * balloon lists.
|
|
+ */
|
|
+DEFINE_SPINLOCK(balloon_lock);
|
|
+
|
|
+struct balloon_stats balloon_stats;
|
|
+
|
|
+/* We increase/decrease in batches which fit in a page */
|
|
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
|
|
+
|
|
+/* VM /proc information for memory */
|
|
+extern unsigned long totalram_pages;
|
|
+
|
|
+#ifndef MODULE
|
|
+extern unsigned long totalhigh_pages;
|
|
+#define inc_totalhigh_pages() (totalhigh_pages++)
|
|
+#define dec_totalhigh_pages() (totalhigh_pages--)
|
|
+#else
|
|
+#define inc_totalhigh_pages() ((void)0)
|
|
+#define dec_totalhigh_pages() ((void)0)
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * In HVM guests accounting here uses the Xen visible values, but the kernel
|
|
+ * determined totalram_pages value shouldn't get altered. Since totalram_pages
|
|
+ * includes neither the kernel static image nor any memory allocated prior to
|
|
+ * or from the bootmem allocator, we have to synchronize the two values.
|
|
+ */
|
|
+static unsigned long __read_mostly totalram_bias;
|
|
+#else
|
|
+#define totalram_bias 0
|
|
+#endif
|
|
+
|
|
+/* List of ballooned pages, threaded through the mem_map array. */
|
|
+static LIST_HEAD(ballooned_pages);
|
|
+
|
|
+/* Main work function, always executed in process context. */
|
|
+static void balloon_process(void *unused);
|
|
+static DECLARE_WORK(balloon_worker, balloon_process, NULL);
|
|
+static struct timer_list balloon_timer;
|
|
+
|
|
+/* When ballooning out (allocating memory to return to Xen) we don't really
|
|
+ want the kernel to try too hard since that can trigger the oom killer. */
|
|
+#define GFP_BALLOON \
|
|
+ (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
|
|
+
|
|
+#define PAGE_TO_LIST(p) (&(p)->lru)
|
|
+#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
|
|
+#define UNLIST_PAGE(p) \
|
|
+ do { \
|
|
+ list_del(PAGE_TO_LIST(p)); \
|
|
+ PAGE_TO_LIST(p)->next = NULL; \
|
|
+ PAGE_TO_LIST(p)->prev = NULL; \
|
|
+ } while(0)
|
|
+
|
|
+#define IPRINTK(fmt, args...) \
|
|
+ printk(KERN_INFO "xen_mem: " fmt, ##args)
|
|
+#define WPRINTK(fmt, args...) \
|
|
+ printk(KERN_WARNING "xen_mem: " fmt, ##args)
|
|
+
|
|
+/* balloon_append: add the given page to the balloon. */
|
|
+static void balloon_append(struct page *page)
|
|
+{
|
|
+ /* Lowmem is re-populated first, so highmem pages go at list tail. */
|
|
+ if (PageHighMem(page)) {
|
|
+ list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
|
|
+ bs.balloon_high++;
|
|
+ dec_totalhigh_pages();
|
|
+ } else {
|
|
+ list_add(PAGE_TO_LIST(page), &ballooned_pages);
|
|
+ bs.balloon_low++;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
|
|
+static struct page *balloon_retrieve(void)
|
|
+{
|
|
+ struct page *page;
|
|
+
|
|
+ if (list_empty(&ballooned_pages))
|
|
+ return NULL;
|
|
+
|
|
+ page = LIST_TO_PAGE(ballooned_pages.next);
|
|
+ UNLIST_PAGE(page);
|
|
+
|
|
+ if (PageHighMem(page)) {
|
|
+ bs.balloon_high--;
|
|
+ inc_totalhigh_pages();
|
|
+ }
|
|
+ else
|
|
+ bs.balloon_low--;
|
|
+
|
|
+ return page;
|
|
+}
|
|
+
|
|
+static struct page *balloon_first_page(void)
|
|
+{
|
|
+ if (list_empty(&ballooned_pages))
|
|
+ return NULL;
|
|
+ return LIST_TO_PAGE(ballooned_pages.next);
|
|
+}
|
|
+
|
|
+static struct page *balloon_next_page(struct page *page)
|
|
+{
|
|
+ struct list_head *next = PAGE_TO_LIST(page)->next;
|
|
+ if (next == &ballooned_pages)
|
|
+ return NULL;
|
|
+ return LIST_TO_PAGE(next);
|
|
+}
|
|
+
|
|
+static inline void balloon_free_page(struct page *page)
|
|
+{
|
|
+#ifndef MODULE
|
|
+ if (put_page_testzero(page))
|
|
+ free_cold_page(page);
|
|
+#else
|
|
+ /* free_cold_page() is not being exported. */
|
|
+ __free_page(page);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void balloon_alarm(unsigned long unused)
|
|
+{
|
|
+ schedule_work(&balloon_worker);
|
|
+}
|
|
+
|
|
+static unsigned long current_target(void)
|
|
+{
|
|
+ unsigned long target = bs.target_pages;
|
|
+ if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
|
|
+ target = bs.current_pages + bs.balloon_low + bs.balloon_high;
|
|
+ return target;
|
|
+}
|
|
+
|
|
+static unsigned long minimum_target(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+#define max_pfn num_physpages
|
|
+#endif
|
|
+ unsigned long min_pages, curr_pages = current_target();
|
|
+
|
|
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
|
|
+ /* Simple continuous piecewiese linear function:
|
|
+ * max MiB -> min MiB gradient
|
|
+ * 0 0
|
|
+ * 16 16
|
|
+ * 32 24
|
|
+ * 128 72 (1/2)
|
|
+ * 512 168 (1/4)
|
|
+ * 2048 360 (1/8)
|
|
+ * 8192 552 (1/32)
|
|
+ * 32768 1320
|
|
+ * 131072 4392
|
|
+ */
|
|
+ if (max_pfn < MB2PAGES(128))
|
|
+ min_pages = MB2PAGES(8) + (max_pfn >> 1);
|
|
+ else if (max_pfn < MB2PAGES(512))
|
|
+ min_pages = MB2PAGES(40) + (max_pfn >> 2);
|
|
+ else if (max_pfn < MB2PAGES(2048))
|
|
+ min_pages = MB2PAGES(104) + (max_pfn >> 3);
|
|
+ else
|
|
+ min_pages = MB2PAGES(296) + (max_pfn >> 5);
|
|
+#undef MB2PAGES
|
|
+
|
|
+ /* Don't enforce growth */
|
|
+ return min(min_pages, curr_pages);
|
|
+#ifndef CONFIG_XEN
|
|
+#undef max_pfn
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int increase_reservation(unsigned long nr_pages)
|
|
+{
|
|
+ unsigned long pfn, i, flags;
|
|
+ struct page *page;
|
|
+ long rc;
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .address_bits = 0,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+
|
|
+ if (nr_pages > ARRAY_SIZE(frame_list))
|
|
+ nr_pages = ARRAY_SIZE(frame_list);
|
|
+
|
|
+ balloon_lock(flags);
|
|
+
|
|
+ page = balloon_first_page();
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ BUG_ON(page == NULL);
|
|
+ frame_list[i] = page_to_pfn(page);;
|
|
+ page = balloon_next_page(page);
|
|
+ }
|
|
+
|
|
+ set_xen_guest_handle(reservation.extent_start, frame_list);
|
|
+ reservation.nr_extents = nr_pages;
|
|
+ rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
|
|
+ if (rc < 0)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < rc; i++) {
|
|
+ page = balloon_retrieve();
|
|
+ BUG_ON(page == NULL);
|
|
+
|
|
+ pfn = page_to_pfn(page);
|
|
+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
|
|
+ phys_to_machine_mapping_valid(pfn));
|
|
+
|
|
+ set_phys_to_machine(pfn, frame_list[i]);
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ /* Link back into the page tables if not highmem. */
|
|
+ if (pfn < max_low_pfn) {
|
|
+ int ret;
|
|
+ ret = HYPERVISOR_update_va_mapping(
|
|
+ (unsigned long)__va(pfn << PAGE_SHIFT),
|
|
+ pfn_pte_ma(frame_list[i], PAGE_KERNEL),
|
|
+ 0);
|
|
+ BUG_ON(ret);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /* Relinquish the page back to the allocator. */
|
|
+ ClearPageReserved(page);
|
|
+ init_page_count(page);
|
|
+ balloon_free_page(page);
|
|
+ }
|
|
+
|
|
+ bs.current_pages += rc;
|
|
+ totalram_pages = bs.current_pages - totalram_bias;
|
|
+
|
|
+ out:
|
|
+ balloon_unlock(flags);
|
|
+
|
|
+ return rc < 0 ? rc : rc != nr_pages;
|
|
+}
|
|
+
|
|
+static int decrease_reservation(unsigned long nr_pages)
|
|
+{
|
|
+ unsigned long pfn, i, flags;
|
|
+ struct page *page;
|
|
+ void *v;
|
|
+ int need_sleep = 0;
|
|
+ int ret;
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .address_bits = 0,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+
|
|
+ if (nr_pages > ARRAY_SIZE(frame_list))
|
|
+ nr_pages = ARRAY_SIZE(frame_list);
|
|
+
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ if ((page = alloc_page(GFP_BALLOON)) == NULL) {
|
|
+ nr_pages = i;
|
|
+ need_sleep = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pfn = page_to_pfn(page);
|
|
+ frame_list[i] = pfn_to_mfn(pfn);
|
|
+
|
|
+ if (!PageHighMem(page)) {
|
|
+ v = phys_to_virt(pfn << PAGE_SHIFT);
|
|
+ scrub_pages(v, 1);
|
|
+#ifdef CONFIG_XEN
|
|
+ ret = HYPERVISOR_update_va_mapping(
|
|
+ (unsigned long)v, __pte_ma(0), 0);
|
|
+ BUG_ON(ret);
|
|
+#endif
|
|
+ }
|
|
+#ifdef CONFIG_XEN_SCRUB_PAGES
|
|
+ else {
|
|
+ v = kmap(page);
|
|
+ scrub_pages(v, 1);
|
|
+ kunmap(page);
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ /* Ensure that ballooned highmem pages don't have kmaps. */
|
|
+ kmap_flush_unused();
|
|
+ flush_tlb_all();
|
|
+#endif
|
|
+
|
|
+ balloon_lock(flags);
|
|
+
|
|
+ /* No more mappings: invalidate P2M and add to balloon. */
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ pfn = mfn_to_pfn(frame_list[i]);
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ balloon_append(pfn_to_page(pfn));
|
|
+ }
|
|
+
|
|
+ set_xen_guest_handle(reservation.extent_start, frame_list);
|
|
+ reservation.nr_extents = nr_pages;
|
|
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
|
|
+ BUG_ON(ret != nr_pages);
|
|
+
|
|
+ bs.current_pages -= nr_pages;
|
|
+ totalram_pages = bs.current_pages - totalram_bias;
|
|
+
|
|
+ balloon_unlock(flags);
|
|
+
|
|
+ return need_sleep;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * We avoid multiple worker processes conflicting via the balloon mutex.
|
|
+ * We may of course race updates of the target counts (which are protected
|
|
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
|
|
+ * recover from these in time.
|
|
+ */
|
|
+static void balloon_process(void *unused)
|
|
+{
|
|
+ int need_sleep = 0;
|
|
+ long credit;
|
|
+
|
|
+ mutex_lock(&balloon_mutex);
|
|
+
|
|
+ do {
|
|
+ credit = current_target() - bs.current_pages;
|
|
+ if (credit > 0)
|
|
+ need_sleep = (increase_reservation(credit) != 0);
|
|
+ if (credit < 0)
|
|
+ need_sleep = (decrease_reservation(-credit) != 0);
|
|
+
|
|
+#ifndef CONFIG_PREEMPT
|
|
+ if (need_resched())
|
|
+ schedule();
|
|
+#endif
|
|
+ } while ((credit != 0) && !need_sleep);
|
|
+
|
|
+ /* Schedule more work if there is some still to be done. */
|
|
+ if (current_target() != bs.current_pages)
|
|
+ mod_timer(&balloon_timer, jiffies + HZ);
|
|
+
|
|
+ mutex_unlock(&balloon_mutex);
|
|
+}
|
|
+
|
|
+/* Resets the Xen limit, sets new target, and kicks off processing. */
|
|
+void balloon_set_new_target(unsigned long target)
|
|
+{
|
|
+ /* No need for lock. Not read-modify-write updates. */
|
|
+ bs.target_pages = max(target, minimum_target());
|
|
+ schedule_work(&balloon_worker);
|
|
+}
|
|
+
|
|
+static struct xenbus_watch target_watch =
|
|
+{
|
|
+ .node = "memory/target"
|
|
+};
|
|
+
|
|
+/* React to a change in the target key */
|
|
+static void watch_target(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ unsigned long long new_target;
|
|
+ int err;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
|
|
+ if (err != 1) {
|
|
+ /* This is ok (for domain0 at least) - so just return */
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* The given memory/target value is in KiB, so it needs converting to
|
|
+ * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
|
|
+ */
|
|
+ balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
|
|
+}
|
|
+
|
|
+static int balloon_init_watcher(struct notifier_block *notifier,
|
|
+ unsigned long event,
|
|
+ void *data)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = register_xenbus_watch(&target_watch);
|
|
+ if (err)
|
|
+ printk(KERN_ERR "Failed to set balloon watcher\n");
|
|
+
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_PROC_FS
|
|
+static int balloon_write(struct file *file, const char __user *buffer,
|
|
+ unsigned long count, void *data)
|
|
+{
|
|
+ char memstring[64], *endchar;
|
|
+ unsigned long long target_bytes;
|
|
+
|
|
+ if (!capable(CAP_SYS_ADMIN))
|
|
+ return -EPERM;
|
|
+
|
|
+ if (count <= 1)
|
|
+ return -EBADMSG; /* runt */
|
|
+ if (count > sizeof(memstring))
|
|
+ return -EFBIG; /* too long */
|
|
+
|
|
+ if (copy_from_user(memstring, buffer, count))
|
|
+ return -EFAULT;
|
|
+ memstring[sizeof(memstring)-1] = '\0';
|
|
+
|
|
+ target_bytes = memparse(memstring, &endchar);
|
|
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+static int balloon_read(char *page, char **start, off_t off,
|
|
+ int count, int *eof, void *data)
|
|
+{
|
|
+ int len;
|
|
+
|
|
+ len = sprintf(
|
|
+ page,
|
|
+ "Current allocation: %8lu kB\n"
|
|
+ "Requested target: %8lu kB\n"
|
|
+ "Low-mem balloon: %8lu kB\n"
|
|
+ "High-mem balloon: %8lu kB\n"
|
|
+ "Driver pages: %8lu kB\n",
|
|
+ PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
|
|
+ PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
|
|
+ PAGES2KB(bs.driver_pages));
|
|
+
|
|
+
|
|
+ *eof = 1;
|
|
+ return len;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static struct notifier_block xenstore_notifier;
|
|
+
|
|
+static int __init balloon_init(void)
|
|
+{
|
|
+#if !defined(CONFIG_XEN)
|
|
+# ifndef XENMEM_get_pod_target
|
|
+# define XENMEM_get_pod_target 17
|
|
+ typedef struct xen_pod_target {
|
|
+ uint64_t target_pages;
|
|
+ uint64_t tot_pages;
|
|
+ uint64_t pod_cache_pages;
|
|
+ uint64_t pod_entries;
|
|
+ domid_t domid;
|
|
+ } xen_pod_target_t;
|
|
+# endif
|
|
+ xen_pod_target_t pod_target = { .domid = DOMID_SELF };
|
|
+ int rc;
|
|
+#elif defined(CONFIG_X86)
|
|
+ unsigned long pfn;
|
|
+ struct page *page;
|
|
+#endif
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ IPRINTK("Initialising balloon driver.\n");
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
|
|
+ totalram_pages = bs.current_pages;
|
|
+#else
|
|
+ rc = HYPERVISOR_memory_op(XENMEM_get_pod_target, &pod_target);
|
|
+ /*
|
|
+ * Xen prior to 3.4.0 masks the memory_op command to 4 bits, thus
|
|
+ * converting XENMEM_get_pod_target to XENMEM_decrease_reservation.
|
|
+ * Fortunately this results in a request with all input fields zero,
|
|
+ * but (due to the way bit 4 and upwards get interpreted) a starting
|
|
+ * extent of 1. When start_extent > nr_extents (>= in newer Xen), we
|
|
+ * simply get start_extent returned.
|
|
+ */
|
|
+ totalram_bias = HYPERVISOR_memory_op(rc != -ENOSYS && rc != 1
|
|
+ ? XENMEM_maximum_reservation : XENMEM_current_reservation,
|
|
+ &pod_target.domid);
|
|
+ if ((long)totalram_bias != -ENOSYS) {
|
|
+ BUG_ON(totalram_bias < totalram_pages);
|
|
+ bs.current_pages = totalram_bias;
|
|
+ totalram_bias -= totalram_pages;
|
|
+ } else {
|
|
+ totalram_bias = 0;
|
|
+ bs.current_pages = totalram_pages;
|
|
+ }
|
|
+#endif
|
|
+ bs.target_pages = bs.current_pages;
|
|
+ bs.balloon_low = 0;
|
|
+ bs.balloon_high = 0;
|
|
+ bs.driver_pages = 0UL;
|
|
+
|
|
+ init_timer(&balloon_timer);
|
|
+ balloon_timer.data = 0;
|
|
+ balloon_timer.function = balloon_alarm;
|
|
+
|
|
+#ifdef CONFIG_PROC_FS
|
|
+ if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
|
|
+ WPRINTK("Unable to create /proc/xen/balloon.\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ balloon_pde->read_proc = balloon_read;
|
|
+ balloon_pde->write_proc = balloon_write;
|
|
+#endif
|
|
+ balloon_sysfs_init();
|
|
+
|
|
+#if defined(CONFIG_X86) && defined(CONFIG_XEN)
|
|
+ /* Initialise the balloon with excess memory space. */
|
|
+ for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
|
|
+ page = pfn_to_page(pfn);
|
|
+ if (!PageReserved(page))
|
|
+ balloon_append(page);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ target_watch.callback = watch_target;
|
|
+ xenstore_notifier.notifier_call = balloon_init_watcher;
|
|
+
|
|
+ register_xenstore_notifier(&xenstore_notifier);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+subsys_initcall(balloon_init);
|
|
+
|
|
+static void __exit balloon_exit(void)
|
|
+{
|
|
+ balloon_sysfs_exit();
|
|
+ /* XXX - release balloon here */
|
|
+}
|
|
+
|
|
+module_exit(balloon_exit);
|
|
+
|
|
+void balloon_update_driver_allowance(long delta)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ balloon_lock(flags);
|
|
+ bs.driver_pages += delta;
|
|
+ balloon_unlock(flags);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+static int dealloc_pte_fn(
|
|
+ pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
|
|
+{
|
|
+ unsigned long mfn = pte_mfn(*pte);
|
|
+ int ret;
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .nr_extents = 1,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+ set_xen_guest_handle(reservation.extent_start, &mfn);
|
|
+ set_pte_at(&init_mm, addr, pte, __pte_ma(0));
|
|
+ set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
|
|
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
|
|
+ BUG_ON(ret != 1);
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+struct page **alloc_empty_pages_and_pagevec(int nr_pages)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ void *v;
|
|
+ struct page *page, **pagevec;
|
|
+ int i, ret;
|
|
+
|
|
+ pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
|
|
+ if (pagevec == NULL)
|
|
+ return NULL;
|
|
+
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ balloon_lock(flags);
|
|
+ page = balloon_first_page();
|
|
+ if (page && !PageHighMem(page)) {
|
|
+ UNLIST_PAGE(page);
|
|
+ bs.balloon_low--;
|
|
+ balloon_unlock(flags);
|
|
+ pagevec[i] = page;
|
|
+ continue;
|
|
+ }
|
|
+ balloon_unlock(flags);
|
|
+
|
|
+ page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
|
|
+ if (page == NULL)
|
|
+ goto err;
|
|
+
|
|
+ v = page_address(page);
|
|
+ scrub_pages(v, 1);
|
|
+
|
|
+ balloon_lock(flags);
|
|
+
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ unsigned long gmfn = page_to_pfn(page);
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .nr_extents = 1,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+ set_xen_guest_handle(reservation.extent_start, &gmfn);
|
|
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
+ &reservation);
|
|
+ if (ret == 1)
|
|
+ ret = 0; /* success */
|
|
+ } else {
|
|
+#ifdef CONFIG_XEN
|
|
+ ret = apply_to_page_range(&init_mm, (unsigned long)v,
|
|
+ PAGE_SIZE, dealloc_pte_fn,
|
|
+ NULL);
|
|
+#else
|
|
+ /* Cannot handle non-auto translate mode. */
|
|
+ ret = 1;
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ if (ret != 0) {
|
|
+ balloon_unlock(flags);
|
|
+ balloon_free_page(page);
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ totalram_pages = --bs.current_pages - totalram_bias;
|
|
+
|
|
+ balloon_unlock(flags);
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ schedule_work(&balloon_worker);
|
|
+#ifdef CONFIG_XEN
|
|
+ flush_tlb_all();
|
|
+#endif
|
|
+ return pagevec;
|
|
+
|
|
+ err:
|
|
+ balloon_lock(flags);
|
|
+ while (--i >= 0)
|
|
+ balloon_append(pagevec[i]);
|
|
+ balloon_unlock(flags);
|
|
+ kfree(pagevec);
|
|
+ pagevec = NULL;
|
|
+ goto out;
|
|
+}
|
|
+
|
|
+void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int i;
|
|
+
|
|
+ if (pagevec == NULL)
|
|
+ return;
|
|
+
|
|
+ balloon_lock(flags);
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ BUG_ON(page_count(pagevec[i]) != 1);
|
|
+ balloon_append(pagevec[i]);
|
|
+ }
|
|
+ balloon_unlock(flags);
|
|
+
|
|
+ kfree(pagevec);
|
|
+
|
|
+ schedule_work(&balloon_worker);
|
|
+}
|
|
+
|
|
+void balloon_release_driver_page(struct page *page)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ balloon_lock(flags);
|
|
+ balloon_append(page);
|
|
+ bs.driver_pages--;
|
|
+ balloon_unlock(flags);
|
|
+
|
|
+ schedule_work(&balloon_worker);
|
|
+}
|
|
+
|
|
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
|
|
+EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
|
|
+EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
|
|
+EXPORT_SYMBOL_GPL(balloon_release_driver_page);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/balloon/common.h 2009-06-09 15:01:37.000000000 +0200
|
|
@@ -0,0 +1,56 @@
|
|
+/******************************************************************************
|
|
+ * balloon/common.h
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_BALLOON_COMMON_H__
|
|
+#define __XEN_BALLOON_COMMON_H__
|
|
+
|
|
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
|
|
+
|
|
+struct balloon_stats {
|
|
+ /* We aim for 'current allocation' == 'target allocation'. */
|
|
+ unsigned long current_pages;
|
|
+ unsigned long target_pages;
|
|
+ /*
|
|
+ * Drivers may alter the memory reservation independently, but they
|
|
+ * must inform the balloon driver so we avoid hitting the hard limit.
|
|
+ */
|
|
+ unsigned long driver_pages;
|
|
+ /* Number of pages in high- and low-memory balloons. */
|
|
+ unsigned long balloon_low;
|
|
+ unsigned long balloon_high;
|
|
+};
|
|
+
|
|
+extern struct balloon_stats balloon_stats;
|
|
+#define bs balloon_stats
|
|
+
|
|
+int balloon_sysfs_init(void);
|
|
+void balloon_sysfs_exit(void);
|
|
+
|
|
+void balloon_set_new_target(unsigned long target);
|
|
+
|
|
+#endif /* __XEN_BALLOON_COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/balloon/sysfs.c 2009-06-09 15:01:37.000000000 +0200
|
|
@@ -0,0 +1,167 @@
|
|
+/******************************************************************************
|
|
+ * balloon/sysfs.c
|
|
+ *
|
|
+ * Xen balloon driver - sysfs interfaces.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/capability.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/stat.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/sysdev.h>
|
|
+#include "common.h"
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#define BALLOON_CLASS_NAME "xen_memory"
|
|
+
|
|
+#define BALLOON_SHOW(name, format, args...) \
|
|
+ static ssize_t show_##name(struct sys_device *dev, \
|
|
+ char *buf) \
|
|
+ { \
|
|
+ return sprintf(buf, format, ##args); \
|
|
+ } \
|
|
+ static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
|
|
+
|
|
+BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(bs.current_pages));
|
|
+BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(bs.balloon_low));
|
|
+BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
|
|
+BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
|
|
+
|
|
+static ssize_t show_target_kb(struct sys_device *dev, char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
|
|
+}
|
|
+
|
|
+static ssize_t store_target_kb(struct sys_device *dev,
|
|
+ const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ char memstring[64], *endchar;
|
|
+ unsigned long long target_bytes;
|
|
+
|
|
+ if (!capable(CAP_SYS_ADMIN))
|
|
+ return -EPERM;
|
|
+
|
|
+ if (count <= 1)
|
|
+ return -EBADMSG; /* runt */
|
|
+ if (count > sizeof(memstring))
|
|
+ return -EFBIG; /* too long */
|
|
+ strcpy(memstring, buf);
|
|
+
|
|
+ target_bytes = memparse(memstring, &endchar);
|
|
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
|
|
+ show_target_kb, store_target_kb);
|
|
+
|
|
+static struct sysdev_attribute *balloon_attrs[] = {
|
|
+ &attr_target_kb,
|
|
+};
|
|
+
|
|
+static struct attribute *balloon_info_attrs[] = {
|
|
+ &attr_current_kb.attr,
|
|
+ &attr_low_kb.attr,
|
|
+ &attr_high_kb.attr,
|
|
+ &attr_driver_kb.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group balloon_info_group = {
|
|
+ .name = "info",
|
|
+ .attrs = balloon_info_attrs,
|
|
+};
|
|
+
|
|
+static struct sysdev_class balloon_sysdev_class = {
|
|
+ set_kset_name(BALLOON_CLASS_NAME),
|
|
+};
|
|
+
|
|
+static struct sys_device balloon_sysdev;
|
|
+
|
|
+static int __init register_balloon(struct sys_device *sysdev)
|
|
+{
|
|
+ int i, error;
|
|
+
|
|
+ error = sysdev_class_register(&balloon_sysdev_class);
|
|
+ if (error)
|
|
+ return error;
|
|
+
|
|
+ sysdev->id = 0;
|
|
+ sysdev->cls = &balloon_sysdev_class;
|
|
+
|
|
+ error = sysdev_register(sysdev);
|
|
+ if (error) {
|
|
+ sysdev_class_unregister(&balloon_sysdev_class);
|
|
+ return error;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
|
|
+ error = sysdev_create_file(sysdev, balloon_attrs[i]);
|
|
+ if (error)
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
|
|
+ if (error)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ while (--i >= 0)
|
|
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
|
|
+ sysdev_unregister(sysdev);
|
|
+ sysdev_class_unregister(&balloon_sysdev_class);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+static __exit void unregister_balloon(struct sys_device *sysdev)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
|
|
+ for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
|
|
+ sysdev_remove_file(sysdev, balloon_attrs[i]);
|
|
+ sysdev_unregister(sysdev);
|
|
+ sysdev_class_unregister(&balloon_sysdev_class);
|
|
+}
|
|
+
|
|
+int __init balloon_sysfs_init(void)
|
|
+{
|
|
+ return register_balloon(&balloon_sysdev);
|
|
+}
|
|
+
|
|
+void __exit balloon_sysfs_exit(void)
|
|
+{
|
|
+ unregister_balloon(&balloon_sysdev);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/Makefile 2009-06-09 15:01:37.000000000 +0200
|
|
@@ -0,0 +1,4 @@
|
|
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
|
|
+obj-$(CONFIG_XEN_BLKBACK_PAGEMAP) += blkback-pagemap.o
|
|
+
|
|
+blkbk-y := blkback.o xenbus.o interface.o vbd.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/blkback.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,672 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/blkif/backend/main.c
|
|
+ *
|
|
+ * Back-end of the driver for virtual block devices. This portion of the
|
|
+ * driver exports a 'unified' block-device interface that can be accessed
|
|
+ * by any operating system that implements a compatible front end. A
|
|
+ * reference front-end implementation can be found in:
|
|
+ * arch/xen/drivers/blkif/frontend
|
|
+ *
|
|
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
|
+ * Copyright (c) 2005, Christopher Clark
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/delay.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include "common.h"
|
|
+
|
|
+/*
|
|
+ * These are rather arbitrary. They are fairly large because adjacent requests
|
|
+ * pulled from a communication ring are quite likely to end up being part of
|
|
+ * the same scatter/gather request at the disc.
|
|
+ *
|
|
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
|
|
+ *
|
|
+ * This will increase the chances of being able to write whole tracks.
|
|
+ * 64 should be enough to keep us competitive with Linux.
|
|
+ */
|
|
+static int blkif_reqs = 64;
|
|
+module_param_named(reqs, blkif_reqs, int, 0);
|
|
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
|
|
+
|
|
+/* Run-time switchable: /sys/module/blkback/parameters/ */
|
|
+static unsigned int log_stats = 0;
|
|
+static unsigned int debug_lvl = 0;
|
|
+module_param(log_stats, int, 0644);
|
|
+module_param(debug_lvl, int, 0644);
|
|
+
|
|
+/*
|
|
+ * Each outstanding request that we've passed to the lower device layers has a
|
|
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
|
|
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
|
|
+ * response queued for it, with the saved 'id' passed back.
|
|
+ */
|
|
+typedef struct {
|
|
+ blkif_t *blkif;
|
|
+ u64 id;
|
|
+ int nr_pages;
|
|
+ atomic_t pendcnt;
|
|
+ unsigned short operation;
|
|
+ int status;
|
|
+ struct list_head free_list;
|
|
+} pending_req_t;
|
|
+
|
|
+static pending_req_t *pending_reqs;
|
|
+static struct list_head pending_free;
|
|
+static DEFINE_SPINLOCK(pending_free_lock);
|
|
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
|
|
+
|
|
+#define BLKBACK_INVALID_HANDLE (~0)
|
|
+
|
|
+static struct page **pending_pages;
|
|
+static grant_handle_t *pending_grant_handles;
|
|
+
|
|
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
|
|
+{
|
|
+ return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
|
+}
|
|
+
|
|
+#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
|
|
+
|
|
+static inline unsigned long vaddr(pending_req_t *req, int seg)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(pending_page(req, seg));
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+#define pending_handle(_req, _seg) \
|
|
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
|
+
|
|
+
|
|
+static int do_block_io_op(blkif_t *blkif);
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ blkif_request_t *req,
|
|
+ pending_req_t *pending_req);
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st);
|
|
+
|
|
+/******************************************************************
|
|
+ * misc small helpers
|
|
+ */
|
|
+static pending_req_t* alloc_req(void)
|
|
+{
|
|
+ pending_req_t *req = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ if (!list_empty(&pending_free)) {
|
|
+ req = list_entry(pending_free.next, pending_req_t, free_list);
|
|
+ list_del(&req->free_list);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ return req;
|
|
+}
|
|
+
|
|
+static void free_req(pending_req_t *req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int was_empty;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ was_empty = list_empty(&pending_free);
|
|
+ list_add(&req->free_list, &pending_free);
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ if (was_empty)
|
|
+ wake_up(&pending_free_wq);
|
|
+}
|
|
+
|
|
+static void unplug_queue(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->plug == NULL)
|
|
+ return;
|
|
+ if (blkif->plug->unplug_fn)
|
|
+ blkif->plug->unplug_fn(blkif->plug);
|
|
+ blk_put_queue(blkif->plug);
|
|
+ blkif->plug = NULL;
|
|
+}
|
|
+
|
|
+static void plug_queue(blkif_t *blkif, struct block_device *bdev)
|
|
+{
|
|
+ request_queue_t *q = bdev_get_queue(bdev);
|
|
+
|
|
+ if (q == blkif->plug)
|
|
+ return;
|
|
+ unplug_queue(blkif);
|
|
+ blk_get_queue(q);
|
|
+ blkif->plug = q;
|
|
+}
|
|
+
|
|
+static void fast_flush_area(pending_req_t *req)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int i, invcount = 0;
|
|
+ grant_handle_t handle;
|
|
+ int ret;
|
|
+
|
|
+ for (i = 0; i < req->nr_pages; i++) {
|
|
+ handle = pending_handle(req, i);
|
|
+ if (handle == BLKBACK_INVALID_HANDLE)
|
|
+ continue;
|
|
+ blkback_pagemap_clear(pending_page(req, i));
|
|
+ gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+ pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
|
|
+ invcount++;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
+ BUG_ON(ret);
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * SCHEDULER FUNCTIONS
|
|
+ */
|
|
+
|
|
+static void print_stats(blkif_t *blkif)
|
|
+{
|
|
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n",
|
|
+ current->comm, blkif->st_oo_req,
|
|
+ blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req);
|
|
+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
|
+ blkif->st_rd_req = 0;
|
|
+ blkif->st_wr_req = 0;
|
|
+ blkif->st_oo_req = 0;
|
|
+}
|
|
+
|
|
+int blkif_schedule(void *arg)
|
|
+{
|
|
+ blkif_t *blkif = arg;
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+
|
|
+ blkif_get(blkif);
|
|
+
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: started\n", current->comm);
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ if (try_to_freeze())
|
|
+ continue;
|
|
+ if (unlikely(vbd->size != vbd_size(vbd)))
|
|
+ vbd_resize(blkif);
|
|
+
|
|
+ wait_event_interruptible(
|
|
+ blkif->wq,
|
|
+ blkif->waiting_reqs || kthread_should_stop());
|
|
+ wait_event_interruptible(
|
|
+ pending_free_wq,
|
|
+ !list_empty(&pending_free) || kthread_should_stop());
|
|
+
|
|
+ blkif->waiting_reqs = 0;
|
|
+ smp_mb(); /* clear flag *before* checking for work */
|
|
+
|
|
+ if (do_block_io_op(blkif))
|
|
+ blkif->waiting_reqs = 1;
|
|
+ unplug_queue(blkif);
|
|
+
|
|
+ if (log_stats && time_after(jiffies, blkif->st_print))
|
|
+ print_stats(blkif);
|
|
+ }
|
|
+
|
|
+ if (log_stats)
|
|
+ print_stats(blkif);
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
|
|
+
|
|
+ blkif->xenblkd = NULL;
|
|
+ blkif_put(blkif);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
|
|
+ */
|
|
+
|
|
+static void __end_block_io_op(pending_req_t *pending_req, int error)
|
|
+{
|
|
+ /* An error fails the entire request. */
|
|
+ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) &&
|
|
+ (error == -EOPNOTSUPP)) {
|
|
+ DPRINTK("blkback: write barrier op failed, not supported\n");
|
|
+ blkback_barrier(XBT_NIL, pending_req->blkif->be, 0);
|
|
+ pending_req->status = BLKIF_RSP_EOPNOTSUPP;
|
|
+ } else if (error) {
|
|
+ DPRINTK("Buffer not up-to-date at end of operation, "
|
|
+ "error=%d\n", error);
|
|
+ pending_req->status = BLKIF_RSP_ERROR;
|
|
+ }
|
|
+
|
|
+ if (atomic_dec_and_test(&pending_req->pendcnt)) {
|
|
+ fast_flush_area(pending_req);
|
|
+ make_response(pending_req->blkif, pending_req->id,
|
|
+ pending_req->operation, pending_req->status);
|
|
+ blkif_put(pending_req->blkif);
|
|
+ free_req(pending_req);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
|
|
+{
|
|
+ if (bio->bi_size != 0)
|
|
+ return 1;
|
|
+ __end_block_io_op(bio->bi_private, error);
|
|
+ bio_put(bio);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+
|
|
+/******************************************************************************
|
|
+ * NOTIFICATION FROM GUEST OS.
|
|
+ */
|
|
+
|
|
+static void blkif_notify_work(blkif_t *blkif)
|
|
+{
|
|
+ blkif->waiting_reqs = 1;
|
|
+ wake_up(&blkif->wq);
|
|
+}
|
|
+
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ blkif_notify_work(dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
|
|
+ */
|
|
+
|
|
+static int do_block_io_op(blkif_t *blkif)
|
|
+{
|
|
+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
|
|
+ blkif_request_t req;
|
|
+ pending_req_t *pending_req;
|
|
+ RING_IDX rc, rp;
|
|
+ int more_to_do = 0;
|
|
+
|
|
+ rc = blk_rings->common.req_cons;
|
|
+ rp = blk_rings->common.sring->req_prod;
|
|
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
|
|
+
|
|
+ while ((rc != rp)) {
|
|
+
|
|
+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
|
|
+ break;
|
|
+
|
|
+ if (kthread_should_stop()) {
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pending_req = alloc_req();
|
|
+ if (NULL == pending_req) {
|
|
+ blkif->st_oo_req++;
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.req_cons = ++rc; /* before make_response() */
|
|
+
|
|
+ /* Apply all sanity checks to /private copy/ of request. */
|
|
+ barrier();
|
|
+
|
|
+ switch (req.operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ blkif->st_rd_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE_BARRIER:
|
|
+ blkif->st_br_req++;
|
|
+ /* fall through */
|
|
+ case BLKIF_OP_WRITE:
|
|
+ blkif->st_wr_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+ default:
|
|
+ /* A good sign something is wrong: sleep for a while to
|
|
+ * avoid excessive CPU consumption by a bad guest. */
|
|
+ msleep(1);
|
|
+ DPRINTK("error: unknown block io operation [%d]\n",
|
|
+ req.operation);
|
|
+ make_response(blkif, req.id, req.operation,
|
|
+ BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Yield point for this unbounded loop. */
|
|
+ cond_resched();
|
|
+ }
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ blkif_request_t *req,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
|
|
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ struct phys_req preq;
|
|
+ struct {
|
|
+ unsigned long buf; unsigned int nsec;
|
|
+ } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int nseg;
|
|
+ struct bio *bio = NULL;
|
|
+ int ret, i;
|
|
+ int operation;
|
|
+
|
|
+ switch (req->operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ operation = READ;
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE:
|
|
+ operation = WRITE;
|
|
+ break;
|
|
+ case BLKIF_OP_WRITE_BARRIER:
|
|
+ operation = WRITE_BARRIER;
|
|
+ break;
|
|
+ default:
|
|
+ operation = 0; /* make gcc happy */
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ /* Check that number of segments is sane. */
|
|
+ nseg = req->nr_segments;
|
|
+ if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
|
|
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
|
|
+ DPRINTK("Bad number of segments in request (%d)\n", nseg);
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ preq.dev = req->handle;
|
|
+ preq.sector_number = req->sector_number;
|
|
+ preq.nr_sects = 0;
|
|
+
|
|
+ pending_req->blkif = blkif;
|
|
+ pending_req->id = req->id;
|
|
+ pending_req->operation = req->operation;
|
|
+ pending_req->status = BLKIF_RSP_OKAY;
|
|
+ pending_req->nr_pages = nseg;
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ uint32_t flags;
|
|
+
|
|
+ seg[i].nsec = req->seg[i].last_sect -
|
|
+ req->seg[i].first_sect + 1;
|
|
+
|
|
+ if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
|
|
+ (req->seg[i].last_sect < req->seg[i].first_sect))
|
|
+ goto fail_response;
|
|
+ preq.nr_sects += seg[i].nsec;
|
|
+
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (operation != READ)
|
|
+ flags |= GNTMAP_readonly;
|
|
+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
|
|
+ req->seg[i].gref, blkif->domid);
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ if (unlikely(map[i].status == GNTST_eagain))
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &map[i])
|
|
+ if (unlikely(map[i].status != GNTST_okay)) {
|
|
+ DPRINTK("invalid buffer -- could not remap it\n");
|
|
+ map[i].handle = BLKBACK_INVALID_HANDLE;
|
|
+ ret = 1;
|
|
+ } else {
|
|
+ blkback_pagemap_set(vaddr_pagenr(pending_req, i),
|
|
+ pending_page(pending_req, i),
|
|
+ blkif->domid, req->handle,
|
|
+ req->seg[i].gref);
|
|
+ }
|
|
+
|
|
+ pending_handle(pending_req, i) = map[i].handle;
|
|
+
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ set_phys_to_machine(
|
|
+ page_to_pfn(pending_page(pending_req, i)),
|
|
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
|
|
+ seg[i].buf = map[i].dev_bus_addr |
|
|
+ (req->seg[i].first_sect << 9);
|
|
+ }
|
|
+
|
|
+ if (ret)
|
|
+ goto fail_flush;
|
|
+
|
|
+ if (vbd_translate(&preq, blkif, operation) != 0) {
|
|
+ DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n",
|
|
+ operation == READ ? "read" : "write",
|
|
+ preq.sector_number,
|
|
+ preq.sector_number + preq.nr_sects, preq.dev);
|
|
+ goto fail_flush;
|
|
+ }
|
|
+
|
|
+ plug_queue(blkif, preq.bdev);
|
|
+ atomic_set(&pending_req->pendcnt, 1);
|
|
+ blkif_get(blkif);
|
|
+
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ if (((int)preq.sector_number|(int)seg[i].nsec) &
|
|
+ ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
|
|
+ DPRINTK("Misaligned I/O request from domain %d",
|
|
+ blkif->domid);
|
|
+ goto fail_put_bio;
|
|
+ }
|
|
+
|
|
+ while ((bio == NULL) ||
|
|
+ (bio_add_page(bio,
|
|
+ pending_page(pending_req, i),
|
|
+ seg[i].nsec << 9,
|
|
+ seg[i].buf & ~PAGE_MASK) == 0)) {
|
|
+ if (bio) {
|
|
+ atomic_inc(&pending_req->pendcnt);
|
|
+ submit_bio(operation, bio);
|
|
+ }
|
|
+
|
|
+ bio = bio_alloc(GFP_KERNEL, nseg-i);
|
|
+ if (unlikely(bio == NULL))
|
|
+ goto fail_put_bio;
|
|
+
|
|
+ bio->bi_bdev = preq.bdev;
|
|
+ bio->bi_private = pending_req;
|
|
+ bio->bi_end_io = end_block_io_op;
|
|
+ bio->bi_sector = preq.sector_number;
|
|
+ }
|
|
+
|
|
+ preq.sector_number += seg[i].nsec;
|
|
+ }
|
|
+
|
|
+ if (!bio) {
|
|
+ BUG_ON(operation != WRITE_BARRIER);
|
|
+ bio = bio_alloc(GFP_KERNEL, 0);
|
|
+ if (unlikely(bio == NULL))
|
|
+ goto fail_put_bio;
|
|
+
|
|
+ bio->bi_bdev = preq.bdev;
|
|
+ bio->bi_private = pending_req;
|
|
+ bio->bi_end_io = end_block_io_op;
|
|
+ bio->bi_sector = -1;
|
|
+ }
|
|
+
|
|
+ submit_bio(operation, bio);
|
|
+
|
|
+ if (operation == READ)
|
|
+ blkif->st_rd_sect += preq.nr_sects;
|
|
+ else if (operation == WRITE || operation == WRITE_BARRIER)
|
|
+ blkif->st_wr_sect += preq.nr_sects;
|
|
+
|
|
+ return;
|
|
+
|
|
+ fail_flush:
|
|
+ fast_flush_area(pending_req);
|
|
+ fail_response:
|
|
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ msleep(1); /* back off a bit */
|
|
+ return;
|
|
+
|
|
+ fail_put_bio:
|
|
+ __end_block_io_op(pending_req, -EINVAL);
|
|
+ if (bio)
|
|
+ bio_put(bio);
|
|
+ unplug_queue(blkif);
|
|
+ msleep(1); /* back off a bit */
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
|
|
+ */
|
|
+
|
|
+
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st)
|
|
+{
|
|
+ blkif_response_t resp;
|
|
+ unsigned long flags;
|
|
+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
|
|
+ int more_to_do = 0;
|
|
+ int notify;
|
|
+
|
|
+ resp.id = id;
|
|
+ resp.operation = op;
|
|
+ resp.status = st;
|
|
+
|
|
+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
|
|
+ /* Place on the response ring for the relevant domain. */
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.rsp_prod_pvt++;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
|
+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
|
|
+ /*
|
|
+ * Tail check for pending requests. Allows frontend to avoid
|
|
+ * notifications if requests are already in flight (lower
|
|
+ * overheads and promotes batching).
|
|
+ */
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
|
|
+
|
|
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
|
|
+ more_to_do = 1;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
|
|
+
|
|
+ if (more_to_do)
|
|
+ blkif_notify_work(blkif);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(blkif->irq);
|
|
+}
|
|
+
|
|
+static int __init blkif_init(void)
|
|
+{
|
|
+ int i, mmap_pages;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+
|
|
+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
|
|
+ blkif_reqs, GFP_KERNEL);
|
|
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
|
|
+ mmap_pages, GFP_KERNEL);
|
|
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
|
|
+
|
|
+ if (blkback_pagemap_init(mmap_pages))
|
|
+ goto out_of_memory;
|
|
+
|
|
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
|
|
+ goto out_of_memory;
|
|
+
|
|
+ for (i = 0; i < mmap_pages; i++)
|
|
+ pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
|
|
+
|
|
+ blkif_interface_init();
|
|
+
|
|
+ memset(pending_reqs, 0, sizeof(pending_reqs));
|
|
+ INIT_LIST_HEAD(&pending_free);
|
|
+
|
|
+ for (i = 0; i < blkif_reqs; i++)
|
|
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
|
|
+
|
|
+ blkif_xenbus_init();
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ out_of_memory:
|
|
+ kfree(pending_reqs);
|
|
+ kfree(pending_grant_handles);
|
|
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
|
|
+ printk("%s: out of memory\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+module_init(blkif_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/blkback-pagemap.c 2009-06-09 15:01:37.000000000 +0200
|
|
@@ -0,0 +1,96 @@
|
|
+#include <linux/module.h>
|
|
+#include "blkback-pagemap.h"
|
|
+
|
|
+static int blkback_pagemap_size;
|
|
+static struct blkback_pagemap *blkback_pagemap;
|
|
+
|
|
+static inline int
|
|
+blkback_pagemap_entry_clear(struct blkback_pagemap *map)
|
|
+{
|
|
+ static struct blkback_pagemap zero;
|
|
+ return !memcmp(map, &zero, sizeof(zero));
|
|
+}
|
|
+
|
|
+int
|
|
+blkback_pagemap_init(int pages)
|
|
+{
|
|
+ blkback_pagemap = kzalloc(pages * sizeof(struct blkback_pagemap),
|
|
+ GFP_KERNEL);
|
|
+ if (!blkback_pagemap)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ blkback_pagemap_size = pages;
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(blkback_pagemap_init);
|
|
+
|
|
+void
|
|
+blkback_pagemap_set(int idx, struct page *page,
|
|
+ domid_t domid, busid_t busid, grant_ref_t gref)
|
|
+{
|
|
+ struct blkback_pagemap *entry;
|
|
+
|
|
+ BUG_ON(!blkback_pagemap);
|
|
+ BUG_ON(idx >= blkback_pagemap_size);
|
|
+
|
|
+ SetPageBlkback(page);
|
|
+ set_page_private(page, idx);
|
|
+
|
|
+ entry = blkback_pagemap + idx;
|
|
+ if (!blkback_pagemap_entry_clear(entry)) {
|
|
+ printk("overwriting pagemap %d: d %u b %u g %u\n",
|
|
+ idx, entry->domid, entry->busid, entry->gref);
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ entry->domid = domid;
|
|
+ entry->busid = busid;
|
|
+ entry->gref = gref;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(blkback_pagemap_set);
|
|
+
|
|
+void
|
|
+blkback_pagemap_clear(struct page *page)
|
|
+{
|
|
+ int idx;
|
|
+ struct blkback_pagemap *entry;
|
|
+
|
|
+ idx = (int)page_private(page);
|
|
+
|
|
+ BUG_ON(!blkback_pagemap);
|
|
+ BUG_ON(!PageBlkback(page));
|
|
+ BUG_ON(idx >= blkback_pagemap_size);
|
|
+
|
|
+ entry = blkback_pagemap + idx;
|
|
+ if (blkback_pagemap_entry_clear(entry)) {
|
|
+ printk("clearing empty pagemap %d\n", idx);
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ memset(entry, 0, sizeof(*entry));
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(blkback_pagemap_clear);
|
|
+
|
|
+struct blkback_pagemap
|
|
+blkback_pagemap_read(struct page *page)
|
|
+{
|
|
+ int idx;
|
|
+ struct blkback_pagemap *entry;
|
|
+
|
|
+ idx = (int)page_private(page);
|
|
+
|
|
+ BUG_ON(!blkback_pagemap);
|
|
+ BUG_ON(!PageBlkback(page));
|
|
+ BUG_ON(idx >= blkback_pagemap_size);
|
|
+
|
|
+ entry = blkback_pagemap + idx;
|
|
+ if (blkback_pagemap_entry_clear(entry)) {
|
|
+ printk("reading empty pagemap %d\n", idx);
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ return *entry;
|
|
+}
|
|
+EXPORT_SYMBOL(blkback_pagemap_read);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/blkback-pagemap.h 2009-06-09 15:01:37.000000000 +0200
|
|
@@ -0,0 +1,37 @@
|
|
+#ifndef _BLKBACK_PAGEMAP_H_
|
|
+#define _BLKBACK_PAGEMAP_H_
|
|
+
|
|
+#include <linux/mm.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+
|
|
+typedef unsigned int busid_t;
|
|
+
|
|
+struct blkback_pagemap {
|
|
+ domid_t domid;
|
|
+ busid_t busid;
|
|
+ grant_ref_t gref;
|
|
+};
|
|
+
|
|
+#if defined(CONFIG_XEN_BLKBACK_PAGEMAP) || defined(CONFIG_XEN_BLKBACK_PAGEMAP_MODULE)
|
|
+
|
|
+int blkback_pagemap_init(int);
|
|
+void blkback_pagemap_set(int, struct page *, domid_t, busid_t, grant_ref_t);
|
|
+void blkback_pagemap_clear(struct page *);
|
|
+struct blkback_pagemap blkback_pagemap_read(struct page *);
|
|
+
|
|
+#else /* CONFIG_XEN_BLKBACK_PAGEMAP */
|
|
+
|
|
+static inline int blkback_pagemap_init(int pages) { return 0; }
|
|
+static inline void blkback_pagemap_set(int idx, struct page *page, domid_t dom,
|
|
+ busid_t bus, grant_ref_t gnt) {}
|
|
+static inline void blkback_pagemap_clear(struct page *page) {}
|
|
+static inline struct blkback_pagemap blkback_pagemap_read(struct page *page)
|
|
+{
|
|
+ BUG();
|
|
+ return (struct blkback_pagemap){-1, -1, -1};
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_XEN_BLKBACK_PAGEMAP */
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/common.h 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,153 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __BLKIF__BACKEND__COMMON_H__
|
|
+#define __BLKIF__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/wait.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/blkif.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include "blkback-pagemap.h"
|
|
+
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+struct vbd {
|
|
+ blkif_vdev_t handle; /* what the domain refers to this vbd as */
|
|
+ unsigned char readonly; /* Non-zero -> read-only */
|
|
+ unsigned char type; /* VDISK_xxx */
|
|
+ u32 pdevice; /* phys device that this vbd maps to */
|
|
+ struct block_device *bdev;
|
|
+ sector_t size; /* Cached size parameter */
|
|
+};
|
|
+
|
|
+struct backend_info;
|
|
+
|
|
+typedef struct blkif_st {
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+ /* Physical parameters of the comms window. */
|
|
+ unsigned int irq;
|
|
+ /* Comms information. */
|
|
+ enum blkif_protocol blk_protocol;
|
|
+ blkif_back_rings_t blk_rings;
|
|
+ struct vm_struct *blk_ring_area;
|
|
+ /* The VBD attached to this interface. */
|
|
+ struct vbd vbd;
|
|
+ /* Back pointer to the backend_info. */
|
|
+ struct backend_info *be;
|
|
+ /* Private fields. */
|
|
+ spinlock_t blk_ring_lock;
|
|
+ atomic_t refcnt;
|
|
+
|
|
+ wait_queue_head_t wq;
|
|
+ struct task_struct *xenblkd;
|
|
+ unsigned int waiting_reqs;
|
|
+ request_queue_t *plug;
|
|
+
|
|
+ /* statistics */
|
|
+ unsigned long st_print;
|
|
+ int st_rd_req;
|
|
+ int st_wr_req;
|
|
+ int st_oo_req;
|
|
+ int st_br_req;
|
|
+ int st_rd_sect;
|
|
+ int st_wr_sect;
|
|
+
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+
|
|
+ grant_handle_t shmem_handle;
|
|
+ grant_ref_t shmem_ref;
|
|
+} blkif_t;
|
|
+
|
|
+struct backend_info
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+ blkif_t *blkif;
|
|
+ struct xenbus_watch backend_watch;
|
|
+ unsigned major;
|
|
+ unsigned minor;
|
|
+ char *mode;
|
|
+};
|
|
+
|
|
+blkif_t *blkif_alloc(domid_t domid);
|
|
+void blkif_disconnect(blkif_t *blkif);
|
|
+void blkif_free(blkif_t *blkif);
|
|
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
|
|
+void vbd_resize(blkif_t *blkif);
|
|
+
|
|
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define blkif_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ wake_up(&(_b)->waiting_to_free);\
|
|
+ } while (0)
|
|
+
|
|
+/* Create a vbd. */
|
|
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
|
|
+ unsigned minor, int readonly, int cdrom);
|
|
+void vbd_free(struct vbd *vbd);
|
|
+
|
|
+unsigned long long vbd_size(struct vbd *vbd);
|
|
+unsigned int vbd_info(struct vbd *vbd);
|
|
+unsigned long vbd_secsize(struct vbd *vbd);
|
|
+
|
|
+struct phys_req {
|
|
+ unsigned short dev;
|
|
+ unsigned short nr_sects;
|
|
+ struct block_device *bdev;
|
|
+ blkif_sector_t sector_number;
|
|
+};
|
|
+
|
|
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation);
|
|
+
|
|
+void blkif_interface_init(void);
|
|
+
|
|
+void blkif_xenbus_init(void);
|
|
+
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+int blkif_schedule(void *arg);
|
|
+
|
|
+int blkback_barrier(struct xenbus_transaction xbt,
|
|
+ struct backend_info *be, int state);
|
|
+
|
|
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/interface.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,183 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/blkif/backend/interface.c
|
|
+ *
|
|
+ * Block-device interface management.
|
|
+ *
|
|
+ * Copyright (c) 2004, Keir Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <xen/evtchn.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+static kmem_cache_t *blkif_cachep;
|
|
+
|
|
+blkif_t *blkif_alloc(domid_t domid)
|
|
+{
|
|
+ blkif_t *blkif;
|
|
+
|
|
+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
|
|
+ if (!blkif)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ memset(blkif, 0, sizeof(*blkif));
|
|
+ blkif->domid = domid;
|
|
+ spin_lock_init(&blkif->blk_ring_lock);
|
|
+ atomic_set(&blkif->refcnt, 1);
|
|
+ init_waitqueue_head(&blkif->wq);
|
|
+ blkif->st_print = jiffies;
|
|
+ init_waitqueue_head(&blkif->waiting_to_free);
|
|
+
|
|
+ return blkif;
|
|
+}
|
|
+
|
|
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, shared_page, blkif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status == GNTST_okay) {
|
|
+ blkif->shmem_ref = shared_page;
|
|
+ blkif->shmem_handle = op.handle;
|
|
+ ret = 0;
|
|
+ } else {
|
|
+ DPRINTK(" Grant table operation failure %d!\n", (int)op.status);
|
|
+ ret = -EINVAL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_page(blkif_t *blkif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, blkif->shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ /* Already connected through? */
|
|
+ if (blkif->irq)
|
|
+ return 0;
|
|
+
|
|
+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = map_frontend_page(blkif, shared_page);
|
|
+ if (err) {
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ {
|
|
+ blkif_sring_t *sring;
|
|
+ sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ {
|
|
+ blkif_x86_32_sring_t *sring_x86_32;
|
|
+ sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ {
|
|
+ blkif_x86_64_sring_t *sring_x86_64;
|
|
+ sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif);
|
|
+ if (err < 0)
|
|
+ {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ return err;
|
|
+ }
|
|
+ blkif->irq = err;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void blkif_disconnect(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->xenblkd) {
|
|
+ kthread_stop(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ }
|
|
+
|
|
+ atomic_dec(&blkif->refcnt);
|
|
+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
|
|
+ atomic_inc(&blkif->refcnt);
|
|
+
|
|
+ if (blkif->irq) {
|
|
+ unbind_from_irqhandler(blkif->irq, blkif);
|
|
+ blkif->irq = 0;
|
|
+ }
|
|
+
|
|
+ if (blkif->blk_rings.common.sring) {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void blkif_free(blkif_t *blkif)
|
|
+{
|
|
+ if (!atomic_dec_and_test(&blkif->refcnt))
|
|
+ BUG();
|
|
+ kmem_cache_free(blkif_cachep, blkif);
|
|
+}
|
|
+
|
|
+void __init blkif_interface_init(void)
|
|
+{
|
|
+ blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t),
|
|
+ 0, 0, NULL, NULL);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/vbd.c 2010-03-22 12:00:53.000000000 +0100
|
|
@@ -0,0 +1,161 @@
|
|
+/******************************************************************************
|
|
+ * blkback/vbd.c
|
|
+ *
|
|
+ * Routines for managing virtual block devices (VBDs).
|
|
+ *
|
|
+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#define vbd_sz(_v) ((_v)->bdev->bd_part ? \
|
|
+ (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk))
|
|
+
|
|
+unsigned long long vbd_size(struct vbd *vbd)
|
|
+{
|
|
+ return vbd_sz(vbd);
|
|
+}
|
|
+
|
|
+unsigned int vbd_info(struct vbd *vbd)
|
|
+{
|
|
+ return vbd->type | (vbd->readonly?VDISK_READONLY:0);
|
|
+}
|
|
+
|
|
+unsigned long vbd_secsize(struct vbd *vbd)
|
|
+{
|
|
+ return bdev_hardsect_size(vbd->bdev);
|
|
+}
|
|
+
|
|
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
|
|
+ unsigned minor, int readonly, int cdrom)
|
|
+{
|
|
+ struct vbd *vbd;
|
|
+ struct block_device *bdev;
|
|
+
|
|
+ vbd = &blkif->vbd;
|
|
+ vbd->handle = handle;
|
|
+ vbd->readonly = readonly;
|
|
+ vbd->type = 0;
|
|
+
|
|
+ vbd->pdevice = MKDEV(major, minor);
|
|
+
|
|
+ bdev = open_by_devnum(vbd->pdevice,
|
|
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
|
|
+
|
|
+ if (IS_ERR(bdev)) {
|
|
+ DPRINTK("vbd_creat: device %08x could not be opened.\n",
|
|
+ vbd->pdevice);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ vbd->bdev = bdev;
|
|
+ vbd->size = vbd_size(vbd);
|
|
+
|
|
+ if (vbd->bdev->bd_disk == NULL) {
|
|
+ DPRINTK("vbd_creat: device %08x doesn't exist.\n",
|
|
+ vbd->pdevice);
|
|
+ vbd_free(vbd);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
|
|
+ vbd->type |= VDISK_CDROM;
|
|
+ if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
|
|
+ vbd->type |= VDISK_REMOVABLE;
|
|
+
|
|
+ DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
|
|
+ handle, blkif->domid);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void vbd_free(struct vbd *vbd)
|
|
+{
|
|
+ if (vbd->bdev)
|
|
+ blkdev_put(vbd->bdev);
|
|
+ vbd->bdev = NULL;
|
|
+}
|
|
+
|
|
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
|
|
+{
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+ int rc = -EACCES;
|
|
+
|
|
+ if ((operation != READ) && vbd->readonly)
|
|
+ goto out;
|
|
+
|
|
+ if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
|
|
+ goto out;
|
|
+
|
|
+ req->dev = vbd->pdevice;
|
|
+ req->bdev = vbd->bdev;
|
|
+ rc = 0;
|
|
+
|
|
+ out:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+void vbd_resize(blkif_t *blkif)
|
|
+{
|
|
+ struct vbd *vbd = &blkif->vbd;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ struct xenbus_device *dev = blkif->be->dev;
|
|
+ unsigned long long new_size = vbd_size(vbd);
|
|
+
|
|
+ printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size);
|
|
+ vbd->size = new_size;
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error starting transaction");
|
|
+ return;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu",
|
|
+ vbd_size(vbd));
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error writing new size");
|
|
+ goto abort;
|
|
+ }
|
|
+ /*
|
|
+ * Write the current state; we will use this to synchronize
|
|
+ * the front-end. If the current state is "connected" the
|
|
+ * front-end will get the new size information online.
|
|
+ */
|
|
+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "Error writing the state");
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ printk(KERN_WARNING "Error ending transaction");
|
|
+abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkback/xenbus.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,557 @@
|
|
+/* Xenbus code for blkif backend
|
|
+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+ Copyright (C) 2005 XenSource Ltd
|
|
+
|
|
+ This program is free software; you can redistribute it and/or modify
|
|
+ it under the terms of the GNU General Public License as published by
|
|
+ the Free Software Foundation; either version 2 of the License, or
|
|
+ (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ GNU General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+*/
|
|
+
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kthread.h>
|
|
+#include "common.h"
|
|
+
|
|
+#undef DPRINTK
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \
|
|
+ __FUNCTION__, __LINE__, ##args)
|
|
+
|
|
+static DEFINE_RWLOCK(sysfs_read_lock);
|
|
+
|
|
+static void connect(struct backend_info *);
|
|
+static int connect_ring(struct backend_info *);
|
|
+static void backend_changed(struct xenbus_watch *, const char **,
|
|
+ unsigned int);
|
|
+
|
|
+static int blkback_name(blkif_t *blkif, char *buf)
|
|
+{
|
|
+ char *devpath, *devname;
|
|
+ struct xenbus_device *dev = blkif->be->dev;
|
|
+
|
|
+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
|
|
+ if (IS_ERR(devpath))
|
|
+ return PTR_ERR(devpath);
|
|
+
|
|
+ if ((devname = strstr(devpath, "/dev/")) != NULL)
|
|
+ devname += strlen("/dev/");
|
|
+ else
|
|
+ devname = devpath;
|
|
+
|
|
+ snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname);
|
|
+ kfree(devpath);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void update_blkif_status(blkif_t *blkif)
|
|
+{
|
|
+ int err;
|
|
+ char name[TASK_COMM_LEN];
|
|
+
|
|
+ /* Not ready to connect? */
|
|
+ if (!blkif->irq || !blkif->vbd.bdev)
|
|
+ return;
|
|
+
|
|
+ /* Already connected? */
|
|
+ if (blkif->be->dev->state == XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ /* Attempt to connect: exit if we fail to. */
|
|
+ connect(blkif->be);
|
|
+ if (blkif->be->dev->state != XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ err = blkback_name(blkif, name);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(blkif->be->dev, err, "block flush");
|
|
+ return;
|
|
+ }
|
|
+ invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
|
|
+
|
|
+ blkif->xenblkd = kthread_run(blkif_schedule, blkif, name);
|
|
+ if (IS_ERR(blkif->xenblkd)) {
|
|
+ err = PTR_ERR(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/****************************************************************
|
|
+ * sysfs interface for VBD I/O requests
|
|
+ */
|
|
+
|
|
+#define VBD_SHOW(name, format, args...) \
|
|
+ static ssize_t show_##name(struct device *_dev, \
|
|
+ struct device_attribute *attr, \
|
|
+ char *buf) \
|
|
+ { \
|
|
+ ssize_t ret = -ENODEV; \
|
|
+ struct xenbus_device *dev; \
|
|
+ struct backend_info *be; \
|
|
+ \
|
|
+ if (!get_device(_dev)) \
|
|
+ return ret; \
|
|
+ dev = to_xenbus_device(_dev); \
|
|
+ read_lock(&sysfs_read_lock); \
|
|
+ if ((be = dev->dev.driver_data) != NULL) \
|
|
+ ret = sprintf(buf, format, ##args); \
|
|
+ read_unlock(&sysfs_read_lock); \
|
|
+ put_device(_dev); \
|
|
+ return ret; \
|
|
+ } \
|
|
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
|
+
|
|
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
|
|
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
|
|
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
|
|
+VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req);
|
|
+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
|
|
+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
|
|
+
|
|
+static struct attribute *vbdstat_attrs[] = {
|
|
+ &dev_attr_oo_req.attr,
|
|
+ &dev_attr_rd_req.attr,
|
|
+ &dev_attr_wr_req.attr,
|
|
+ &dev_attr_br_req.attr,
|
|
+ &dev_attr_rd_sect.attr,
|
|
+ &dev_attr_wr_sect.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group vbdstat_group = {
|
|
+ .name = "statistics",
|
|
+ .attrs = vbdstat_attrs,
|
|
+};
|
|
+
|
|
+VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
|
|
+VBD_SHOW(mode, "%s\n", be->mode);
|
|
+
|
|
+int xenvbd_sysfs_addif(struct xenbus_device *dev)
|
|
+{
|
|
+ int error;
|
|
+
|
|
+ error = device_create_file(&dev->dev, &dev_attr_physical_device);
|
|
+ if (error)
|
|
+ goto fail1;
|
|
+
|
|
+ error = device_create_file(&dev->dev, &dev_attr_mode);
|
|
+ if (error)
|
|
+ goto fail2;
|
|
+
|
|
+ error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group);
|
|
+ if (error)
|
|
+ goto fail3;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
|
|
+fail2: device_remove_file(&dev->dev, &dev_attr_mode);
|
|
+fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+void xenvbd_sysfs_delif(struct xenbus_device *dev)
|
|
+{
|
|
+ sysfs_remove_group(&dev->dev.kobj, &vbdstat_group);
|
|
+ device_remove_file(&dev->dev, &dev_attr_mode);
|
|
+ device_remove_file(&dev->dev, &dev_attr_physical_device);
|
|
+}
|
|
+
|
|
+static int blkback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("");
|
|
+
|
|
+ write_lock(&sysfs_read_lock);
|
|
+ if (be->major || be->minor)
|
|
+ xenvbd_sysfs_delif(dev);
|
|
+
|
|
+ if (be->backend_watch.node) {
|
|
+ unregister_xenbus_watch(&be->backend_watch);
|
|
+ kfree(be->backend_watch.node);
|
|
+ be->backend_watch.node = NULL;
|
|
+ }
|
|
+
|
|
+ if (be->blkif) {
|
|
+ blkif_disconnect(be->blkif);
|
|
+ vbd_free(&be->blkif->vbd);
|
|
+ blkif_free(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ }
|
|
+
|
|
+ kfree(be);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ write_unlock(&sysfs_read_lock);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int blkback_barrier(struct xenbus_transaction xbt,
|
|
+ struct backend_info *be, int state)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ int err;
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
|
|
+ "%d", state);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "writing feature-barrier");
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures, and watch the store waiting for the hotplug scripts to tell us
|
|
+ * the device's physical major and minor numbers. Switch to InitWait.
|
|
+ */
|
|
+static int blkback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ be->dev = dev;
|
|
+ dev->dev.driver_data = be;
|
|
+
|
|
+ be->blkif = blkif_alloc(dev->otherend_id);
|
|
+ if (IS_ERR(be->blkif)) {
|
|
+ err = PTR_ERR(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating block interface");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ /* setup back pointer */
|
|
+ be->blkif->be = be;
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->nodename, "physical-device",
|
|
+ &be->backend_watch, backend_changed);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ DPRINTK("failed");
|
|
+ blkback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the hotplug scripts have placed the physical-device
|
|
+ * node. Read it and the mode node, and create a vbd. If the frontend is
|
|
+ * ready, connect.
|
|
+ */
|
|
+static void backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int err;
|
|
+ unsigned major;
|
|
+ unsigned minor;
|
|
+ struct backend_info *be
|
|
+ = container_of(watch, struct backend_info, backend_watch);
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ int cdrom = 0;
|
|
+ char *device_type;
|
|
+
|
|
+ DPRINTK("");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
|
|
+ &major, &minor);
|
|
+ if (XENBUS_EXIST_ERR(err)) {
|
|
+ /* Since this watch will fire once immediately after it is
|
|
+ registered, we expect this. Ignore it, and wait for the
|
|
+ hotplug scripts. */
|
|
+ return;
|
|
+ }
|
|
+ if (err != 2) {
|
|
+ xenbus_dev_fatal(dev, err, "reading physical-device");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if ((be->major || be->minor) &&
|
|
+ ((be->major != major) || (be->minor != minor))) {
|
|
+ printk(KERN_WARNING
|
|
+ "blkback: changing physical device (from %x:%x to "
|
|
+ "%x:%x) not supported.\n", be->major, be->minor,
|
|
+ major, minor);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
|
|
+ if (IS_ERR(be->mode)) {
|
|
+ err = PTR_ERR(be->mode);
|
|
+ be->mode = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "reading mode");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
|
|
+ if (!IS_ERR(device_type)) {
|
|
+ cdrom = strcmp(device_type, "cdrom") == 0;
|
|
+ kfree(device_type);
|
|
+ }
|
|
+
|
|
+ if (be->major == 0 && be->minor == 0) {
|
|
+ /* Front end dir is a number, which is used as the handle. */
|
|
+
|
|
+ char *p = strrchr(dev->otherend, '/') + 1;
|
|
+ long handle = simple_strtoul(p, NULL, 0);
|
|
+
|
|
+ be->major = major;
|
|
+ be->minor = minor;
|
|
+
|
|
+ err = vbd_create(be->blkif, handle, major, minor,
|
|
+ (NULL == strchr(be->mode, 'w')), cdrom);
|
|
+ if (err) {
|
|
+ be->major = be->minor = 0;
|
|
+ xenbus_dev_fatal(dev, err, "creating vbd structure");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = xenvbd_sysfs_addif(dev);
|
|
+ if (err) {
|
|
+ vbd_free(&be->blkif->vbd);
|
|
+ be->major = be->minor = 0;
|
|
+ xenbus_dev_fatal(dev, err, "creating sysfs entries");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* We're potentially connected now */
|
|
+ update_blkif_status(be->blkif);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the frontend's state changes.
|
|
+ */
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s", xenbus_strstate(frontend_state));
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
|
|
+ __FUNCTION__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateConnected:
|
|
+ /* Ensure we connect even when two watches fire in
|
|
+ close successsion and we miss the intermediate value
|
|
+ of frontend_state. */
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ /* Enforce precondition before potential leak point.
|
|
+ * blkif_disconnect() is idempotent.
|
|
+ */
|
|
+ blkif_disconnect(be->blkif);
|
|
+
|
|
+ err = connect_ring(be);
|
|
+ if (err)
|
|
+ break;
|
|
+ update_blkif_status(be->blkif);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ blkif_disconnect(be->blkif);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ /* implies blkif_disconnect() via blkback_remove() */
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Connection ** */
|
|
+
|
|
+
|
|
+/**
|
|
+ * Write the physical details regarding the block device to the store, and
|
|
+ * switch to Connected state.
|
|
+ */
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ DPRINTK("%s", dev->otherend);
|
|
+
|
|
+ /* Supply the information about the device the frontend needs */
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = blkback_barrier(xbt, be, 1);
|
|
+ if (err)
|
|
+ goto abort;
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
|
|
+ vbd_size(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/sectors",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ /* FIXME: use a typename instead */
|
|
+ err = xenbus_printf(xbt, dev->nodename, "info", "%u",
|
|
+ vbd_info(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/info",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
|
|
+ vbd_secsize(&be->blkif->vbd));
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "writing %s/sector-size",
|
|
+ dev->nodename);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "ending transaction");
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "switching to Connected state",
|
|
+ dev->nodename);
|
|
+
|
|
+ return;
|
|
+ abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_ring(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ char protocol[64] = "";
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s", dev->otherend);
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
|
+ "%63s", protocol, NULL);
|
|
+ if (err)
|
|
+ strcpy(protocol, "unspecified, assuming native");
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
|
+ else {
|
|
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
|
+ return -1;
|
|
+ }
|
|
+ printk(KERN_INFO
|
|
+ "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
|
|
+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
|
|
+
|
|
+ /* Map the shared frame, irq etc. */
|
|
+ err = blkif_map(be->blkif, ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
|
|
+ ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id blkback_ids[] = {
|
|
+ { "vbd" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver blkback = {
|
|
+ .name = "vbd",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = blkback_ids,
|
|
+ .probe = blkback_probe,
|
|
+ .remove = blkback_remove,
|
|
+ .otherend_changed = frontend_changed
|
|
+};
|
|
+
|
|
+
|
|
+void blkif_xenbus_init(void)
|
|
+{
|
|
+ xenbus_register_backend(&blkback);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkfront/Makefile 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,5 @@
|
|
+
|
|
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND) := xenblk.o
|
|
+
|
|
+xenblk-objs := blkfront.o vbd.o
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkfront/blkfront.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,967 @@
|
|
+/******************************************************************************
|
|
+ * blkfront.c
|
|
+ *
|
|
+ * XenLinux virtual block-device driver.
|
|
+ *
|
|
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
|
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
|
|
+ * Copyright (c) 2004, Christian Limpach
|
|
+ * Copyright (c) 2004, Andrew Warfield
|
|
+ * Copyright (c) 2005, Christopher Clark
|
|
+ * Copyright (c) 2005, XenSource Ltd
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include "block.h"
|
|
+#include <linux/cdrom.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/scatterlist.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/interface/io/protocols.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <asm/maddr.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#define BLKIF_STATE_DISCONNECTED 0
|
|
+#define BLKIF_STATE_CONNECTED 1
|
|
+#define BLKIF_STATE_SUSPENDED 2
|
|
+
|
|
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
|
|
+ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
|
|
+#define GRANT_INVALID_REF 0
|
|
+
|
|
+static void connect(struct blkfront_info *);
|
|
+static void blkfront_closing(struct blkfront_info *);
|
|
+static int blkfront_remove(struct xenbus_device *);
|
|
+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
|
|
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
|
|
+
|
|
+static void kick_pending_request_queues(struct blkfront_info *);
|
|
+
|
|
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+static void blkif_restart_queue(void *arg);
|
|
+static void blkif_recover(struct blkfront_info *);
|
|
+static void blkif_completion(struct blk_shadow *);
|
|
+static void blkif_free(struct blkfront_info *, int);
|
|
+
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures and the ring buffer for communication with the backend, and
|
|
+ * inform the backend of the appropriate details for those. Switch to
|
|
+ * Initialised state.
|
|
+ */
|
|
+static int blkfront_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err, vdevice, i;
|
|
+ struct blkfront_info *info;
|
|
+
|
|
+ /* FIXME: Use dynamic device id if this is not set. */
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
|
|
+ "virtual-device", "%i", &vdevice);
|
|
+ if (err != 1) {
|
|
+ /* go looking in the extended area instead */
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext",
|
|
+ "%i", &vdevice);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading virtual-device");
|
|
+ return err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
|
|
+ if (!info) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ info->xbdev = dev;
|
|
+ info->vdevice = vdevice;
|
|
+ info->connected = BLKIF_STATE_DISCONNECTED;
|
|
+ INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
|
|
+
|
|
+ for (i = 0; i < BLK_RING_SIZE; i++)
|
|
+ info->shadow[i].req.id = i+1;
|
|
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
|
+
|
|
+ /* Front end dir is a number, which is used as the id. */
|
|
+ info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
|
|
+ dev->dev.driver_data = info;
|
|
+
|
|
+ err = talk_to_backend(dev, info);
|
|
+ if (err) {
|
|
+ kfree(info);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
|
|
+ * driver restart. We tear down our blkif structure and recreate it, but
|
|
+ * leave the device-layer structures intact so that this is transparent to the
|
|
+ * rest of the kernel.
|
|
+ */
|
|
+static int blkfront_resume(struct xenbus_device *dev)
|
|
+{
|
|
+ struct blkfront_info *info = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("blkfront_resume: %s\n", dev->nodename);
|
|
+
|
|
+ blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
|
|
+
|
|
+ err = talk_to_backend(dev, info);
|
|
+ if (info->connected == BLKIF_STATE_SUSPENDED && !err)
|
|
+ blkif_recover(info);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Common code used when first setting up, and when resuming. */
|
|
+static int talk_to_backend(struct xenbus_device *dev,
|
|
+ struct blkfront_info *info)
|
|
+{
|
|
+ const char *message = NULL;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+
|
|
+ /* Create shared ring, alloc event channel. */
|
|
+ err = setup_blkring(dev, info);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ goto destroy_blkring;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "ring-ref","%u", info->ring_ref);
|
|
+ if (err) {
|
|
+ message = "writing ring-ref";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
|
|
+ irq_to_evtchn_port(info->irq));
|
|
+ if (err) {
|
|
+ message = "writing event-channel";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
|
|
+ XEN_IO_PROTO_ABI_NATIVE);
|
|
+ if (err) {
|
|
+ message = "writing protocol";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto destroy_blkring;
|
|
+ }
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateInitialised);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ abort_transaction:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ if (message)
|
|
+ xenbus_dev_fatal(dev, err, "%s", message);
|
|
+ destroy_blkring:
|
|
+ blkif_free(info, 0);
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int setup_blkring(struct xenbus_device *dev,
|
|
+ struct blkfront_info *info)
|
|
+{
|
|
+ blkif_sring_t *sring;
|
|
+ int err;
|
|
+
|
|
+ info->ring_ref = GRANT_INVALID_REF;
|
|
+
|
|
+ sring = (blkif_sring_t *)__get_free_page(GFP_NOIO | __GFP_HIGH);
|
|
+ if (!sring) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ SHARED_RING_INIT(sring);
|
|
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
|
+
|
|
+ memset(info->sg, 0, sizeof(info->sg));
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long)sring);
|
|
+ info->ring.sring = NULL;
|
|
+ goto fail;
|
|
+ }
|
|
+ info->ring_ref = err;
|
|
+
|
|
+ err = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
|
|
+ if (err <= 0) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "bind_listening_port_to_irqhandler");
|
|
+ goto fail;
|
|
+ }
|
|
+ info->irq = err;
|
|
+
|
|
+ return 0;
|
|
+fail:
|
|
+ blkif_free(info, 0);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the backend's state changes.
|
|
+ */
|
|
+static void backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ struct blkfront_info *info = dev->dev.driver_data;
|
|
+ struct block_device *bd;
|
|
+
|
|
+ DPRINTK("blkfront:backend_changed.\n");
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitWait:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ connect(info);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ bd = bdget(info->dev);
|
|
+ if (bd == NULL) {
|
|
+ xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
|
|
+ down(&bd->bd_sem);
|
|
+#else
|
|
+ mutex_lock(&bd->bd_mutex);
|
|
+#endif
|
|
+ if (info->users > 0)
|
|
+ xenbus_dev_error(dev, -EBUSY,
|
|
+ "Device in use; refusing to close");
|
|
+ else
|
|
+ blkfront_closing(info);
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
|
|
+ up(&bd->bd_sem);
|
|
+#else
|
|
+ mutex_unlock(&bd->bd_mutex);
|
|
+#endif
|
|
+ bdput(bd);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Connection ** */
|
|
+
|
|
+
|
|
+/*
|
|
+ * Invoked when the backend is finally 'ready' (and has told produced
|
|
+ * the details about the physical device - #sectors, size, etc).
|
|
+ */
|
|
+static void connect(struct blkfront_info *info)
|
|
+{
|
|
+ unsigned long long sectors;
|
|
+ unsigned long sector_size;
|
|
+ unsigned int binfo;
|
|
+ int err;
|
|
+
|
|
+ switch (info->connected) {
|
|
+ case BLKIF_STATE_CONNECTED:
|
|
+ /*
|
|
+ * Potentially, the back-end may be signalling
|
|
+ * a capacity change; update the capacity.
|
|
+ */
|
|
+ err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
|
+ "sectors", "%Lu", §ors);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ return;
|
|
+ printk(KERN_INFO "Setting capacity to %Lu\n",
|
|
+ sectors);
|
|
+ set_capacity(info->gd, sectors);
|
|
+
|
|
+ /* fall through */
|
|
+ case BLKIF_STATE_SUSPENDED:
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
|
+ "sectors", "%Lu", §ors,
|
|
+ "info", "%u", &binfo,
|
|
+ "sector-size", "%lu", §or_size,
|
|
+ NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(info->xbdev, err,
|
|
+ "reading backend fields at %s",
|
|
+ info->xbdev->otherend);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
|
+ "feature-barrier", "%lu", &info->feature_barrier,
|
|
+ NULL);
|
|
+ if (err)
|
|
+ info->feature_barrier = 0;
|
|
+
|
|
+ err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
|
+ info->xbdev->otherend);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = xlvbd_sysfs_addif(info);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(info->xbdev, err, "xlvbd_sysfs_addif at %s",
|
|
+ info->xbdev->otherend);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
|
+
|
|
+ /* Kick pending requests. */
|
|
+ spin_lock_irq(&blkif_io_lock);
|
|
+ info->connected = BLKIF_STATE_CONNECTED;
|
|
+ kick_pending_request_queues(info);
|
|
+ spin_unlock_irq(&blkif_io_lock);
|
|
+
|
|
+ add_disk(info->gd);
|
|
+
|
|
+ info->is_ready = 1;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Handle the change of state of the backend to Closing. We must delete our
|
|
+ * device-layer structures now, to ensure that writes are flushed through to
|
|
+ * the backend. Once is this done, we can switch to Closed in
|
|
+ * acknowledgement.
|
|
+ */
|
|
+static void blkfront_closing(struct blkfront_info *info)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ DPRINTK("blkfront_closing: %d removed\n", info->vdevice);
|
|
+
|
|
+ if (info->rq == NULL)
|
|
+ goto out;
|
|
+
|
|
+ spin_lock_irqsave(&blkif_io_lock, flags);
|
|
+ /* No more blkif_request(). */
|
|
+ blk_stop_queue(info->rq);
|
|
+ /* No more gnttab callback work. */
|
|
+ gnttab_cancel_free_callback(&info->callback);
|
|
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
|
|
+
|
|
+ /* Flush gnttab callback work. Must be done with no locks held. */
|
|
+ flush_scheduled_work();
|
|
+
|
|
+ xlvbd_sysfs_delif(info);
|
|
+
|
|
+ xlvbd_del(info);
|
|
+
|
|
+ out:
|
|
+ if (info->xbdev)
|
|
+ xenbus_frontend_closed(info->xbdev);
|
|
+}
|
|
+
|
|
+
|
|
+static int blkfront_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct blkfront_info *info = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
|
|
+
|
|
+ blkif_free(info, 0);
|
|
+
|
|
+ if(info->users == 0)
|
|
+ kfree(info);
|
|
+ else
|
|
+ info->xbdev = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static inline int GET_ID_FROM_FREELIST(
|
|
+ struct blkfront_info *info)
|
|
+{
|
|
+ unsigned long free = info->shadow_free;
|
|
+ BUG_ON(free >= BLK_RING_SIZE);
|
|
+ info->shadow_free = info->shadow[free].req.id;
|
|
+ info->shadow[free].req.id = 0x0fffffee; /* debug */
|
|
+ return free;
|
|
+}
|
|
+
|
|
+static inline void ADD_ID_TO_FREELIST(
|
|
+ struct blkfront_info *info, unsigned long id)
|
|
+{
|
|
+ info->shadow[id].req.id = info->shadow_free;
|
|
+ info->shadow[id].request = 0;
|
|
+ info->shadow_free = id;
|
|
+}
|
|
+
|
|
+static inline void flush_requests(struct blkfront_info *info)
|
|
+{
|
|
+ int notify;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
|
|
+
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(info->irq);
|
|
+}
|
|
+
|
|
+static void kick_pending_request_queues(struct blkfront_info *info)
|
|
+{
|
|
+ if (!RING_FULL(&info->ring)) {
|
|
+ /* Re-enable calldowns. */
|
|
+ blk_start_queue(info->rq);
|
|
+ /* Kick things off immediately. */
|
|
+ do_blkif_request(info->rq);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void blkif_restart_queue(void *arg)
|
|
+{
|
|
+ struct blkfront_info *info = (struct blkfront_info *)arg;
|
|
+ spin_lock_irq(&blkif_io_lock);
|
|
+ if (info->connected == BLKIF_STATE_CONNECTED)
|
|
+ kick_pending_request_queues(info);
|
|
+ spin_unlock_irq(&blkif_io_lock);
|
|
+}
|
|
+
|
|
+static void blkif_restart_queue_callback(void *arg)
|
|
+{
|
|
+ struct blkfront_info *info = (struct blkfront_info *)arg;
|
|
+ schedule_work(&info->work);
|
|
+}
|
|
+
|
|
+int blkif_open(struct inode *inode, struct file *filep)
|
|
+{
|
|
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+
|
|
+ if (!info->xbdev)
|
|
+ return -ENODEV;
|
|
+ info->users++;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int blkif_release(struct inode *inode, struct file *filep)
|
|
+{
|
|
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+ info->users--;
|
|
+ if (info->users == 0) {
|
|
+ /* Check whether we have been instructed to close. We will
|
|
+ have ignored this request initially, as the device was
|
|
+ still mounted. */
|
|
+ struct xenbus_device * dev = info->xbdev;
|
|
+
|
|
+ if (!dev) {
|
|
+ blkfront_closing(info);
|
|
+ kfree(info);
|
|
+ } else if (xenbus_read_driver_state(dev->otherend)
|
|
+ == XenbusStateClosing && info->is_ready)
|
|
+ blkfront_closing(info);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int blkif_ioctl(struct inode *inode, struct file *filep,
|
|
+ unsigned command, unsigned long argument)
|
|
+{
|
|
+ struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+ int i;
|
|
+
|
|
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
|
|
+ command, (long)argument, inode->i_rdev);
|
|
+
|
|
+ switch (command) {
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
|
|
+ case HDIO_GETGEO: {
|
|
+ struct block_device *bd = inode->i_bdev;
|
|
+ struct hd_geometry geo;
|
|
+ int ret;
|
|
+
|
|
+ if (!argument)
|
|
+ return -EINVAL;
|
|
+
|
|
+ geo.start = get_start_sect(bd);
|
|
+ ret = blkif_getgeo(bd, &geo);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
|
|
+ sizeof(geo)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
+ case CDROMMULTISESSION:
|
|
+ DPRINTK("FIXME: support multisession CDs later\n");
|
|
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
|
|
+ if (put_user(0, (char __user *)(argument + i)))
|
|
+ return -EFAULT;
|
|
+ return 0;
|
|
+
|
|
+ case CDROM_GET_CAPABILITY: {
|
|
+ struct gendisk *gd = info->gd;
|
|
+ if (gd->flags & GENHD_FL_CD)
|
|
+ return 0;
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ default:
|
|
+ if (info->mi && info->gd) {
|
|
+ switch (info->mi->major) {
|
|
+ case SCSI_DISK0_MAJOR:
|
|
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
|
|
+ case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
|
|
+ case SCSI_CDROM_MAJOR:
|
|
+ return scsi_cmd_ioctl(filep, info->gd, command,
|
|
+ (void __user *)argument);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
|
|
+ command);*/
|
|
+ return -EINVAL; /* same return as native Linux */
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
|
|
+{
|
|
+ /* We don't have real geometry info, but let's at least return
|
|
+ values consistent with the size of the device */
|
|
+ sector_t nsect = get_capacity(bd->bd_disk);
|
|
+ sector_t cylinders = nsect;
|
|
+
|
|
+ hg->heads = 0xff;
|
|
+ hg->sectors = 0x3f;
|
|
+ sector_div(cylinders, hg->heads * hg->sectors);
|
|
+ hg->cylinders = cylinders;
|
|
+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
|
|
+ hg->cylinders = 0xffff;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * blkif_queue_request
|
|
+ *
|
|
+ * request block io
|
|
+ *
|
|
+ * id: for guest use only.
|
|
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
|
|
+ * buffer: buffer to read/write into. this should be a
|
|
+ * virtual address in the guest os.
|
|
+ */
|
|
+static int blkif_queue_request(struct request *req)
|
|
+{
|
|
+ struct blkfront_info *info = req->rq_disk->private_data;
|
|
+ unsigned long buffer_mfn;
|
|
+ blkif_request_t *ring_req;
|
|
+ unsigned long id;
|
|
+ unsigned int fsect, lsect;
|
|
+ int i, ref;
|
|
+ grant_ref_t gref_head;
|
|
+ struct scatterlist *sg;
|
|
+
|
|
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
|
+ return 1;
|
|
+
|
|
+ if (gnttab_alloc_grant_references(
|
|
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
|
|
+ gnttab_request_free_callback(
|
|
+ &info->callback,
|
|
+ blkif_restart_queue_callback,
|
|
+ info,
|
|
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ /* Fill out a communications ring structure. */
|
|
+ ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
|
+ id = GET_ID_FROM_FREELIST(info);
|
|
+ info->shadow[id].request = (unsigned long)req;
|
|
+
|
|
+ ring_req->id = id;
|
|
+ ring_req->sector_number = (blkif_sector_t)req->sector;
|
|
+ ring_req->handle = info->handle;
|
|
+
|
|
+ ring_req->operation = rq_data_dir(req) ?
|
|
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
|
|
+ if (blk_barrier_rq(req))
|
|
+ ring_req->operation = BLKIF_OP_WRITE_BARRIER;
|
|
+
|
|
+ ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
|
|
+ BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ for (i = 0; i < ring_req->nr_segments; ++i) {
|
|
+ sg = info->sg + i;
|
|
+ buffer_mfn = page_to_phys(sg->page) >> PAGE_SHIFT;
|
|
+ fsect = sg->offset >> 9;
|
|
+ lsect = fsect + (sg->length >> 9) - 1;
|
|
+ /* install a grant reference. */
|
|
+ ref = gnttab_claim_grant_reference(&gref_head);
|
|
+ BUG_ON(ref == -ENOSPC);
|
|
+
|
|
+ gnttab_grant_foreign_access_ref(
|
|
+ ref,
|
|
+ info->xbdev->otherend_id,
|
|
+ buffer_mfn,
|
|
+ rq_data_dir(req) ? GTF_readonly : 0 );
|
|
+
|
|
+ info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
|
|
+ ring_req->seg[i] =
|
|
+ (struct blkif_request_segment) {
|
|
+ .gref = ref,
|
|
+ .first_sect = fsect,
|
|
+ .last_sect = lsect };
|
|
+ }
|
|
+
|
|
+ info->ring.req_prod_pvt++;
|
|
+
|
|
+ /* Keep a private copy so we can reissue requests when recovering. */
|
|
+ info->shadow[id].req = *ring_req;
|
|
+
|
|
+ gnttab_free_grant_references(gref_head);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * do_blkif_request
|
|
+ * read a block; request is in a request queue
|
|
+ */
|
|
+void do_blkif_request(request_queue_t *rq)
|
|
+{
|
|
+ struct blkfront_info *info = NULL;
|
|
+ struct request *req;
|
|
+ int queued;
|
|
+
|
|
+ DPRINTK("Entered do_blkif_request\n");
|
|
+
|
|
+ queued = 0;
|
|
+
|
|
+ while ((req = elv_next_request(rq)) != NULL) {
|
|
+ info = req->rq_disk->private_data;
|
|
+ if (!blk_fs_request(req)) {
|
|
+ end_request(req, 0);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (RING_FULL(&info->ring))
|
|
+ goto wait;
|
|
+
|
|
+ DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
|
|
+ "(%u/%li) buffer:%p [%s]\n",
|
|
+ req, req->cmd, (long long)req->sector,
|
|
+ req->current_nr_sectors,
|
|
+ req->nr_sectors, req->buffer,
|
|
+ rq_data_dir(req) ? "write" : "read");
|
|
+
|
|
+
|
|
+ blkdev_dequeue_request(req);
|
|
+ if (blkif_queue_request(req)) {
|
|
+ blk_requeue_request(rq, req);
|
|
+ wait:
|
|
+ /* Avoid pointless unplugs. */
|
|
+ blk_stop_queue(rq);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ queued++;
|
|
+ }
|
|
+
|
|
+ if (queued != 0)
|
|
+ flush_requests(info);
|
|
+}
|
|
+
|
|
+
|
|
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+{
|
|
+ struct request *req;
|
|
+ blkif_response_t *bret;
|
|
+ RING_IDX i, rp;
|
|
+ unsigned long flags;
|
|
+ struct blkfront_info *info = (struct blkfront_info *)dev_id;
|
|
+ int uptodate;
|
|
+
|
|
+ spin_lock_irqsave(&blkif_io_lock, flags);
|
|
+
|
|
+ if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
|
|
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
|
|
+ return IRQ_HANDLED;
|
|
+ }
|
|
+
|
|
+ again:
|
|
+ rp = info->ring.sring->rsp_prod;
|
|
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
|
|
+
|
|
+ for (i = info->ring.rsp_cons; i != rp; i++) {
|
|
+ unsigned long id;
|
|
+ int ret;
|
|
+
|
|
+ bret = RING_GET_RESPONSE(&info->ring, i);
|
|
+ id = bret->id;
|
|
+ req = (struct request *)info->shadow[id].request;
|
|
+
|
|
+ blkif_completion(&info->shadow[id]);
|
|
+
|
|
+ ADD_ID_TO_FREELIST(info, id);
|
|
+
|
|
+ uptodate = (bret->status == BLKIF_RSP_OKAY);
|
|
+ switch (bret->operation) {
|
|
+ case BLKIF_OP_WRITE_BARRIER:
|
|
+ if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
|
|
+ printk("blkfront: %s: write barrier op failed\n",
|
|
+ info->gd->disk_name);
|
|
+ uptodate = -EOPNOTSUPP;
|
|
+ info->feature_barrier = 0;
|
|
+ xlvbd_barrier(info);
|
|
+ }
|
|
+ /* fall through */
|
|
+ case BLKIF_OP_READ:
|
|
+ case BLKIF_OP_WRITE:
|
|
+ if (unlikely(bret->status != BLKIF_RSP_OKAY))
|
|
+ DPRINTK("Bad return from blkdev data "
|
|
+ "request: %x\n", bret->status);
|
|
+
|
|
+ ret = end_that_request_first(req, uptodate,
|
|
+ req->hard_nr_sectors);
|
|
+ BUG_ON(ret);
|
|
+ end_that_request_last(req, uptodate);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ }
|
|
+
|
|
+ info->ring.rsp_cons = i;
|
|
+
|
|
+ if (i != info->ring.req_prod_pvt) {
|
|
+ int more_to_do;
|
|
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
|
|
+ if (more_to_do)
|
|
+ goto again;
|
|
+ } else
|
|
+ info->ring.sring->rsp_event = i + 1;
|
|
+
|
|
+ kick_pending_request_queues(info);
|
|
+
|
|
+ spin_unlock_irqrestore(&blkif_io_lock, flags);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static void blkif_free(struct blkfront_info *info, int suspend)
|
|
+{
|
|
+ /* Prevent new requests being issued until we fix things up. */
|
|
+ spin_lock_irq(&blkif_io_lock);
|
|
+ info->connected = suspend ?
|
|
+ BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
|
|
+ /* No more blkif_request(). */
|
|
+ if (info->rq)
|
|
+ blk_stop_queue(info->rq);
|
|
+ /* No more gnttab callback work. */
|
|
+ gnttab_cancel_free_callback(&info->callback);
|
|
+ spin_unlock_irq(&blkif_io_lock);
|
|
+
|
|
+ /* Flush gnttab callback work. Must be done with no locks held. */
|
|
+ flush_scheduled_work();
|
|
+
|
|
+ /* Free resources associated with old device channel. */
|
|
+ if (info->ring_ref != GRANT_INVALID_REF) {
|
|
+ gnttab_end_foreign_access(info->ring_ref,
|
|
+ (unsigned long)info->ring.sring);
|
|
+ info->ring_ref = GRANT_INVALID_REF;
|
|
+ info->ring.sring = NULL;
|
|
+ }
|
|
+ if (info->irq)
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = 0;
|
|
+}
|
|
+
|
|
+static void blkif_completion(struct blk_shadow *s)
|
|
+{
|
|
+ int i;
|
|
+ for (i = 0; i < s->req.nr_segments; i++)
|
|
+ gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
|
|
+}
|
|
+
|
|
+static void blkif_recover(struct blkfront_info *info)
|
|
+{
|
|
+ int i;
|
|
+ blkif_request_t *req;
|
|
+ struct blk_shadow *copy;
|
|
+ int j;
|
|
+
|
|
+ /* Stage 1: Make a safe copy of the shadow state. */
|
|
+ copy = kmalloc(sizeof(info->shadow), GFP_NOIO | __GFP_NOFAIL | __GFP_HIGH);
|
|
+ memcpy(copy, info->shadow, sizeof(info->shadow));
|
|
+
|
|
+ /* Stage 2: Set up free list. */
|
|
+ memset(&info->shadow, 0, sizeof(info->shadow));
|
|
+ for (i = 0; i < BLK_RING_SIZE; i++)
|
|
+ info->shadow[i].req.id = i+1;
|
|
+ info->shadow_free = info->ring.req_prod_pvt;
|
|
+ info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
|
+
|
|
+ /* Stage 3: Find pending requests and requeue them. */
|
|
+ for (i = 0; i < BLK_RING_SIZE; i++) {
|
|
+ /* Not in use? */
|
|
+ if (copy[i].request == 0)
|
|
+ continue;
|
|
+
|
|
+ /* Grab a request slot and copy shadow state into it. */
|
|
+ req = RING_GET_REQUEST(
|
|
+ &info->ring, info->ring.req_prod_pvt);
|
|
+ *req = copy[i].req;
|
|
+
|
|
+ /* We get a new request id, and must reset the shadow state. */
|
|
+ req->id = GET_ID_FROM_FREELIST(info);
|
|
+ memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
|
|
+
|
|
+ /* Rewrite any grant references invalidated by susp/resume. */
|
|
+ for (j = 0; j < req->nr_segments; j++)
|
|
+ gnttab_grant_foreign_access_ref(
|
|
+ req->seg[j].gref,
|
|
+ info->xbdev->otherend_id,
|
|
+ pfn_to_mfn(info->shadow[req->id].frame[j]),
|
|
+ rq_data_dir((struct request *)
|
|
+ info->shadow[req->id].request) ?
|
|
+ GTF_readonly : 0);
|
|
+ info->shadow[req->id].req = *req;
|
|
+
|
|
+ info->ring.req_prod_pvt++;
|
|
+ }
|
|
+
|
|
+ kfree(copy);
|
|
+
|
|
+ (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
|
|
+
|
|
+ spin_lock_irq(&blkif_io_lock);
|
|
+
|
|
+ /* Now safe for us to use the shared ring */
|
|
+ info->connected = BLKIF_STATE_CONNECTED;
|
|
+
|
|
+ /* Send off requeued requests */
|
|
+ flush_requests(info);
|
|
+
|
|
+ /* Kick any other new requests queued since we resumed */
|
|
+ kick_pending_request_queues(info);
|
|
+
|
|
+ spin_unlock_irq(&blkif_io_lock);
|
|
+}
|
|
+
|
|
+int blkfront_is_ready(struct xenbus_device *dev)
|
|
+{
|
|
+ struct blkfront_info *info = dev->dev.driver_data;
|
|
+
|
|
+ return info->is_ready && info->xbdev;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id blkfront_ids[] = {
|
|
+ { "vbd" },
|
|
+ { "" }
|
|
+};
|
|
+MODULE_ALIAS("xen:vbd");
|
|
+
|
|
+static struct xenbus_driver blkfront = {
|
|
+ .name = "vbd",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = blkfront_ids,
|
|
+ .probe = blkfront_probe,
|
|
+ .remove = blkfront_remove,
|
|
+ .resume = blkfront_resume,
|
|
+ .otherend_changed = backend_changed,
|
|
+ .is_ready = blkfront_is_ready,
|
|
+};
|
|
+
|
|
+
|
|
+static int __init xlblk_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ return xenbus_register_frontend(&blkfront);
|
|
+}
|
|
+module_init(xlblk_init);
|
|
+
|
|
+
|
|
+static void __exit xlblk_exit(void)
|
|
+{
|
|
+ return xenbus_unregister_driver(&blkfront);
|
|
+}
|
|
+module_exit(xlblk_exit);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkfront/block.h 2010-02-24 13:13:46.000000000 +0100
|
|
@@ -0,0 +1,160 @@
|
|
+/******************************************************************************
|
|
+ * block.h
|
|
+ *
|
|
+ * Shared definitions between all levels of XenLinux Virtual block devices.
|
|
+ *
|
|
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
|
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
|
|
+ * Copyright (c) 2004-2005, Christian Limpach
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_DRIVERS_BLOCK_H__
|
|
+#define __XEN_DRIVERS_BLOCK_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/hdreg.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/major.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/io/blkif.h>
|
|
+#include <xen/interface/io/ring.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/uaccess.h>
|
|
+
|
|
+#define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
|
|
+
|
|
+#if 0
|
|
+#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
|
|
+#else
|
|
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
|
|
+#endif
|
|
+
|
|
+struct xlbd_type_info
|
|
+{
|
|
+ int partn_shift;
|
|
+ int disks_per_major;
|
|
+ char *devname;
|
|
+ char *diskname;
|
|
+};
|
|
+
|
|
+struct xlbd_major_info
|
|
+{
|
|
+ int major;
|
|
+ int index;
|
|
+ int usage;
|
|
+ struct xlbd_type_info *type;
|
|
+ struct xlbd_minor_state *minors;
|
|
+};
|
|
+
|
|
+struct blk_shadow {
|
|
+ blkif_request_t req;
|
|
+ unsigned long request;
|
|
+ unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+};
|
|
+
|
|
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
|
|
+
|
|
+/*
|
|
+ * We have one of these per vbd, whether ide, scsi or 'other'. They
|
|
+ * hang in private_data off the gendisk structure. We may end up
|
|
+ * putting all kinds of interesting stuff here :-)
|
|
+ */
|
|
+struct blkfront_info
|
|
+{
|
|
+ struct xenbus_device *xbdev;
|
|
+ dev_t dev;
|
|
+ struct gendisk *gd;
|
|
+ int vdevice;
|
|
+ blkif_vdev_t handle;
|
|
+ int connected;
|
|
+ int ring_ref;
|
|
+ blkif_front_ring_t ring;
|
|
+ struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int irq;
|
|
+ struct xlbd_major_info *mi;
|
|
+ request_queue_t *rq;
|
|
+ struct work_struct work;
|
|
+ struct gnttab_free_callback callback;
|
|
+ struct blk_shadow shadow[BLK_RING_SIZE];
|
|
+ unsigned long shadow_free;
|
|
+ int feature_barrier;
|
|
+ int is_ready;
|
|
+
|
|
+ /**
|
|
+ * The number of people holding this device open. We won't allow a
|
|
+ * hot-unplug unless this is 0.
|
|
+ */
|
|
+ int users;
|
|
+};
|
|
+
|
|
+extern spinlock_t blkif_io_lock;
|
|
+
|
|
+extern int blkif_open(struct inode *inode, struct file *filep);
|
|
+extern int blkif_release(struct inode *inode, struct file *filep);
|
|
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
|
|
+ unsigned command, unsigned long argument);
|
|
+extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
|
|
+extern int blkif_check(dev_t dev);
|
|
+extern int blkif_revalidate(dev_t dev);
|
|
+extern void do_blkif_request (request_queue_t *rq);
|
|
+
|
|
+/* Virtual block-device subsystem. */
|
|
+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
|
|
+ to call add_disk on info->gd once the disk is properly connected
|
|
+ up. */
|
|
+int xlvbd_add(blkif_sector_t capacity, int device,
|
|
+ u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
|
|
+void xlvbd_del(struct blkfront_info *info);
|
|
+int xlvbd_barrier(struct blkfront_info *info);
|
|
+
|
|
+#ifdef CONFIG_SYSFS
|
|
+int xlvbd_sysfs_addif(struct blkfront_info *info);
|
|
+void xlvbd_sysfs_delif(struct blkfront_info *info);
|
|
+#else
|
|
+static inline int xlvbd_sysfs_addif(struct blkfront_info *info)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void xlvbd_sysfs_delif(struct blkfront_info *info)
|
|
+{
|
|
+ ;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blkfront/vbd.c 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,553 @@
|
|
+/******************************************************************************
|
|
+ * vbd.c
|
|
+ *
|
|
+ * XenLinux virtual block-device driver (xvd).
|
|
+ *
|
|
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
|
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
|
|
+ * Copyright (c) 2004-2005, Christian Limpach
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "block.h"
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#define BLKIF_MAJOR(dev) ((dev)>>8)
|
|
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
|
|
+
|
|
+#define EXT_SHIFT 28
|
|
+#define EXTENDED (1<<EXT_SHIFT)
|
|
+#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED))
|
|
+#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED))
|
|
+
|
|
+struct xlbd_minor_state {
|
|
+ unsigned int nr;
|
|
+ unsigned long *bitmap;
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.,
|
|
+ * potentially combinations of the two) in the naming scheme and in a few other
|
|
+ * places.
|
|
+ */
|
|
+
|
|
+#define NUM_IDE_MAJORS 10
|
|
+#define NUM_SCSI_MAJORS 17
|
|
+#define NUM_VBD_MAJORS 2
|
|
+
|
|
+static struct xlbd_type_info xlbd_ide_type = {
|
|
+ .partn_shift = 6,
|
|
+ .disks_per_major = 2,
|
|
+ .devname = "ide",
|
|
+ .diskname = "hd",
|
|
+};
|
|
+
|
|
+static struct xlbd_type_info xlbd_scsi_type = {
|
|
+ .partn_shift = 4,
|
|
+ .disks_per_major = 16,
|
|
+ .devname = "sd",
|
|
+ .diskname = "sd",
|
|
+};
|
|
+
|
|
+static struct xlbd_type_info xlbd_vbd_type = {
|
|
+ .partn_shift = 4,
|
|
+ .disks_per_major = 16,
|
|
+ .devname = "xvd",
|
|
+ .diskname = "xvd",
|
|
+};
|
|
+
|
|
+static struct xlbd_type_info xlbd_vbd_type_ext = {
|
|
+ .partn_shift = 8,
|
|
+ .disks_per_major = 256,
|
|
+ .devname = "xvd",
|
|
+ .diskname = "xvd",
|
|
+};
|
|
+
|
|
+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
|
|
+ NUM_VBD_MAJORS];
|
|
+
|
|
+#define XLBD_MAJOR_IDE_START 0
|
|
+#define XLBD_MAJOR_SCSI_START (NUM_IDE_MAJORS)
|
|
+#define XLBD_MAJOR_VBD_START (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
|
|
+
|
|
+#define XLBD_MAJOR_IDE_RANGE XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1
|
|
+#define XLBD_MAJOR_SCSI_RANGE XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
|
|
+#define XLBD_MAJOR_VBD_RANGE XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
|
|
+
|
|
+#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
|
|
+
|
|
+static struct block_device_operations xlvbd_block_fops =
|
|
+{
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = blkif_open,
|
|
+ .release = blkif_release,
|
|
+ .ioctl = blkif_ioctl,
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
|
|
+ .getgeo = blkif_getgeo
|
|
+#endif
|
|
+};
|
|
+
|
|
+DEFINE_SPINLOCK(blkif_io_lock);
|
|
+
|
|
+static struct xlbd_major_info *
|
|
+xlbd_alloc_major_info(int major, int minor, int index)
|
|
+{
|
|
+ struct xlbd_major_info *ptr;
|
|
+ struct xlbd_minor_state *minors;
|
|
+ int do_register;
|
|
+
|
|
+ ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
|
|
+ if (ptr == NULL)
|
|
+ return NULL;
|
|
+
|
|
+ ptr->major = major;
|
|
+ minors = kmalloc(sizeof(*minors), GFP_KERNEL);
|
|
+ if (minors == NULL) {
|
|
+ kfree(ptr);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ minors->bitmap = kzalloc(BITS_TO_LONGS(256) * sizeof(*minors->bitmap),
|
|
+ GFP_KERNEL);
|
|
+ if (minors->bitmap == NULL) {
|
|
+ kfree(minors);
|
|
+ kfree(ptr);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ spin_lock_init(&minors->lock);
|
|
+ minors->nr = 256;
|
|
+ do_register = 1;
|
|
+
|
|
+ switch (index) {
|
|
+ case XLBD_MAJOR_IDE_RANGE:
|
|
+ ptr->type = &xlbd_ide_type;
|
|
+ ptr->index = index - XLBD_MAJOR_IDE_START;
|
|
+ break;
|
|
+ case XLBD_MAJOR_SCSI_RANGE:
|
|
+ ptr->type = &xlbd_scsi_type;
|
|
+ ptr->index = index - XLBD_MAJOR_SCSI_START;
|
|
+ break;
|
|
+ case XLBD_MAJOR_VBD_RANGE:
|
|
+ ptr->index = 0;
|
|
+ if ((index - XLBD_MAJOR_VBD_START) == 0)
|
|
+ ptr->type = &xlbd_vbd_type;
|
|
+ else
|
|
+ ptr->type = &xlbd_vbd_type_ext;
|
|
+
|
|
+ /*
|
|
+ * if someone already registered block major 202,
|
|
+ * don't try to register it again
|
|
+ */
|
|
+ if (major_info[XLBD_MAJOR_VBD_ALT(index)] != NULL) {
|
|
+ kfree(minors->bitmap);
|
|
+ kfree(minors);
|
|
+ minors = major_info[XLBD_MAJOR_VBD_ALT(index)]->minors;
|
|
+ do_register = 0;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (do_register) {
|
|
+ if (register_blkdev(ptr->major, ptr->type->devname)) {
|
|
+ kfree(minors->bitmap);
|
|
+ kfree(minors);
|
|
+ kfree(ptr);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ printk("xen-vbd: registered block device major %i\n", ptr->major);
|
|
+ }
|
|
+
|
|
+ ptr->minors = minors;
|
|
+ major_info[index] = ptr;
|
|
+ return ptr;
|
|
+}
|
|
+
|
|
+static struct xlbd_major_info *
|
|
+xlbd_get_major_info(int major, int minor, int vdevice)
|
|
+{
|
|
+ struct xlbd_major_info *mi;
|
|
+ int index;
|
|
+
|
|
+ switch (major) {
|
|
+ case IDE0_MAJOR: index = 0; break;
|
|
+ case IDE1_MAJOR: index = 1; break;
|
|
+ case IDE2_MAJOR: index = 2; break;
|
|
+ case IDE3_MAJOR: index = 3; break;
|
|
+ case IDE4_MAJOR: index = 4; break;
|
|
+ case IDE5_MAJOR: index = 5; break;
|
|
+ case IDE6_MAJOR: index = 6; break;
|
|
+ case IDE7_MAJOR: index = 7; break;
|
|
+ case IDE8_MAJOR: index = 8; break;
|
|
+ case IDE9_MAJOR: index = 9; break;
|
|
+ case SCSI_DISK0_MAJOR: index = 10; break;
|
|
+ case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
|
|
+ index = 11 + major - SCSI_DISK1_MAJOR;
|
|
+ break;
|
|
+ case SCSI_DISK8_MAJOR ... SCSI_DISK15_MAJOR:
|
|
+ index = 18 + major - SCSI_DISK8_MAJOR;
|
|
+ break;
|
|
+ case SCSI_CDROM_MAJOR: index = 26; break;
|
|
+ default:
|
|
+ if (!VDEV_IS_EXTENDED(vdevice))
|
|
+ index = 27;
|
|
+ else
|
|
+ index = 28;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ mi = ((major_info[index] != NULL) ? major_info[index] :
|
|
+ xlbd_alloc_major_info(major, minor, index));
|
|
+ if (mi)
|
|
+ mi->usage++;
|
|
+ return mi;
|
|
+}
|
|
+
|
|
+static void
|
|
+xlbd_put_major_info(struct xlbd_major_info *mi)
|
|
+{
|
|
+ mi->usage--;
|
|
+ /* XXX: release major if 0 */
|
|
+}
|
|
+
|
|
+static int
|
|
+xlbd_reserve_minors(struct xlbd_major_info *mi, unsigned int minor,
|
|
+ unsigned int nr_minors)
|
|
+{
|
|
+ struct xlbd_minor_state *ms = mi->minors;
|
|
+ unsigned int end = minor + nr_minors;
|
|
+ int rc;
|
|
+
|
|
+ if (end > ms->nr) {
|
|
+ unsigned long *bitmap, *old;
|
|
+
|
|
+ bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
|
|
+ GFP_KERNEL);
|
|
+ if (bitmap == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock(&ms->lock);
|
|
+ if (end > ms->nr) {
|
|
+ old = ms->bitmap;
|
|
+ memcpy(bitmap, ms->bitmap,
|
|
+ BITS_TO_LONGS(ms->nr) * sizeof(*bitmap));
|
|
+ ms->bitmap = bitmap;
|
|
+ ms->nr = BITS_TO_LONGS(end) * BITS_PER_LONG;
|
|
+ } else
|
|
+ old = bitmap;
|
|
+ spin_unlock(&ms->lock);
|
|
+ kfree(old);
|
|
+ }
|
|
+
|
|
+ spin_lock(&ms->lock);
|
|
+ if (find_next_bit(ms->bitmap, end, minor) >= end) {
|
|
+ for (; minor < end; ++minor)
|
|
+ __set_bit(minor, ms->bitmap);
|
|
+ rc = 0;
|
|
+ } else
|
|
+ rc = -EBUSY;
|
|
+ spin_unlock(&ms->lock);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static void
|
|
+xlbd_release_minors(struct xlbd_major_info *mi, unsigned int minor,
|
|
+ unsigned int nr_minors)
|
|
+{
|
|
+ struct xlbd_minor_state *ms = mi->minors;
|
|
+ unsigned int end = minor + nr_minors;
|
|
+
|
|
+ BUG_ON(end > ms->nr);
|
|
+ spin_lock(&ms->lock);
|
|
+ for (; minor < end; ++minor)
|
|
+ __clear_bit(minor, ms->bitmap);
|
|
+ spin_unlock(&ms->lock);
|
|
+}
|
|
+
|
|
+static int
|
|
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
|
+{
|
|
+ request_queue_t *rq;
|
|
+
|
|
+ rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
|
|
+ if (rq == NULL)
|
|
+ return -1;
|
|
+
|
|
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
|
|
+ blk_queue_hardsect_size(rq, sector_size);
|
|
+ blk_queue_max_sectors(rq, 512);
|
|
+
|
|
+ /* Each segment in a request is up to an aligned page in size. */
|
|
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
|
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
|
|
+
|
|
+ /* Ensure a merged request will fit in a single I/O ring slot. */
|
|
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+
|
|
+ /* Make sure buffer addresses are sector-aligned. */
|
|
+ blk_queue_dma_alignment(rq, 511);
|
|
+
|
|
+ /* Make sure we don't use bounce buffers. */
|
|
+ blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY);
|
|
+
|
|
+ gd->queue = rq;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+xlvbd_alloc_gendisk(int major, int minor, blkif_sector_t capacity, int vdevice,
|
|
+ u16 vdisk_info, u16 sector_size,
|
|
+ struct blkfront_info *info)
|
|
+{
|
|
+ struct gendisk *gd;
|
|
+ struct xlbd_major_info *mi;
|
|
+ int nr_minors = 1;
|
|
+ int err = -ENODEV;
|
|
+ unsigned int offset;
|
|
+
|
|
+ BUG_ON(info->gd != NULL);
|
|
+ BUG_ON(info->mi != NULL);
|
|
+ BUG_ON(info->rq != NULL);
|
|
+
|
|
+ mi = xlbd_get_major_info(major, minor, vdevice);
|
|
+ if (mi == NULL)
|
|
+ goto out;
|
|
+ info->mi = mi;
|
|
+
|
|
+ if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
|
|
+ nr_minors = 1 << mi->type->partn_shift;
|
|
+
|
|
+ err = xlbd_reserve_minors(mi, minor, nr_minors);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ err = -ENODEV;
|
|
+
|
|
+ gd = alloc_disk(nr_minors);
|
|
+ if (gd == NULL)
|
|
+ goto release;
|
|
+
|
|
+ offset = mi->index * mi->type->disks_per_major +
|
|
+ (minor >> mi->type->partn_shift);
|
|
+ if (nr_minors > 1) {
|
|
+ if (offset < 26) {
|
|
+ sprintf(gd->disk_name, "%s%c",
|
|
+ mi->type->diskname, 'a' + offset );
|
|
+ }
|
|
+ else {
|
|
+ sprintf(gd->disk_name, "%s%c%c",
|
|
+ mi->type->diskname,
|
|
+ 'a' + ((offset/26)-1), 'a' + (offset%26) );
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ if (offset < 26) {
|
|
+ sprintf(gd->disk_name, "%s%c%d",
|
|
+ mi->type->diskname,
|
|
+ 'a' + offset,
|
|
+ minor & ((1 << mi->type->partn_shift) - 1));
|
|
+ }
|
|
+ else {
|
|
+ sprintf(gd->disk_name, "%s%c%c%d",
|
|
+ mi->type->diskname,
|
|
+ 'a' + ((offset/26)-1), 'a' + (offset%26),
|
|
+ minor & ((1 << mi->type->partn_shift) - 1));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ gd->major = mi->major;
|
|
+ gd->first_minor = minor;
|
|
+ gd->fops = &xlvbd_block_fops;
|
|
+ gd->private_data = info;
|
|
+ gd->driverfs_dev = &(info->xbdev->dev);
|
|
+ set_capacity(gd, capacity);
|
|
+
|
|
+ if (xlvbd_init_blk_queue(gd, sector_size)) {
|
|
+ del_gendisk(gd);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ info->rq = gd->queue;
|
|
+ info->gd = gd;
|
|
+
|
|
+ if (info->feature_barrier)
|
|
+ xlvbd_barrier(info);
|
|
+
|
|
+ if (vdisk_info & VDISK_READONLY)
|
|
+ set_disk_ro(gd, 1);
|
|
+
|
|
+ if (vdisk_info & VDISK_REMOVABLE)
|
|
+ gd->flags |= GENHD_FL_REMOVABLE;
|
|
+
|
|
+ if (vdisk_info & VDISK_CDROM)
|
|
+ gd->flags |= GENHD_FL_CD;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ release:
|
|
+ xlbd_release_minors(mi, minor, nr_minors);
|
|
+ out:
|
|
+ if (mi)
|
|
+ xlbd_put_major_info(mi);
|
|
+ info->mi = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int
|
|
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
|
|
+ u16 sector_size, struct blkfront_info *info)
|
|
+{
|
|
+ struct block_device *bd;
|
|
+ int err = 0;
|
|
+ int major, minor;
|
|
+
|
|
+ if ((vdevice>>EXT_SHIFT) > 1) {
|
|
+ /* this is above the extended range; something is wrong */
|
|
+ printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice);
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ if (!VDEV_IS_EXTENDED(vdevice)) {
|
|
+ major = BLKIF_MAJOR(vdevice);
|
|
+ minor = BLKIF_MINOR(vdevice);
|
|
+ }
|
|
+ else {
|
|
+ major = 202;
|
|
+ minor = BLKIF_MINOR_EXT(vdevice);
|
|
+ }
|
|
+
|
|
+ info->dev = MKDEV(major, minor);
|
|
+ bd = bdget(info->dev);
|
|
+ if (bd == NULL)
|
|
+ return -ENODEV;
|
|
+
|
|
+ err = xlvbd_alloc_gendisk(major, minor, capacity, vdevice, vdisk_info,
|
|
+ sector_size, info);
|
|
+
|
|
+ bdput(bd);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void
|
|
+xlvbd_del(struct blkfront_info *info)
|
|
+{
|
|
+ unsigned int minor, nr_minors;
|
|
+
|
|
+ if (info->mi == NULL)
|
|
+ return;
|
|
+
|
|
+ BUG_ON(info->gd == NULL);
|
|
+ minor = info->gd->first_minor;
|
|
+ nr_minors = info->gd->minors;
|
|
+ del_gendisk(info->gd);
|
|
+ put_disk(info->gd);
|
|
+ info->gd = NULL;
|
|
+
|
|
+ xlbd_release_minors(info->mi, minor, nr_minors);
|
|
+ xlbd_put_major_info(info->mi);
|
|
+ info->mi = NULL;
|
|
+
|
|
+ BUG_ON(info->rq == NULL);
|
|
+ blk_cleanup_queue(info->rq);
|
|
+ info->rq = NULL;
|
|
+}
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
|
|
+int
|
|
+xlvbd_barrier(struct blkfront_info *info)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = blk_queue_ordered(info->rq,
|
|
+ info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
|
|
+ if (err)
|
|
+ return err;
|
|
+ printk(KERN_INFO "blkfront: %s: barriers %s\n",
|
|
+ info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled");
|
|
+ return 0;
|
|
+}
|
|
+#else
|
|
+int
|
|
+xlvbd_barrier(struct blkfront_info *info)
|
|
+{
|
|
+ printk(KERN_INFO "blkfront: %s: barriers disabled\n", info->gd->disk_name);
|
|
+ return -ENOSYS;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_SYSFS
|
|
+static ssize_t show_media(struct device *dev,
|
|
+ struct device_attribute *attr, char *buf)
|
|
+{
|
|
+ struct xenbus_device *xendev = to_xenbus_device(dev);
|
|
+ struct blkfront_info *info = xendev->dev.driver_data;
|
|
+
|
|
+ if (info->gd->flags & GENHD_FL_CD)
|
|
+ return sprintf(buf, "cdrom\n");
|
|
+ return sprintf(buf, "disk\n");
|
|
+}
|
|
+
|
|
+static struct device_attribute xlvbd_attrs[] = {
|
|
+ __ATTR(media, S_IRUGO, show_media, NULL),
|
|
+};
|
|
+
|
|
+int xlvbd_sysfs_addif(struct blkfront_info *info)
|
|
+{
|
|
+ int i;
|
|
+ int error = 0;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++) {
|
|
+ error = device_create_file(info->gd->driverfs_dev,
|
|
+ &xlvbd_attrs[i]);
|
|
+ if (error)
|
|
+ goto fail;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ while (--i >= 0)
|
|
+ device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+void xlvbd_sysfs_delif(struct blkfront_info *info)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(xlvbd_attrs); i++)
|
|
+ device_remove_file(info->gd->driverfs_dev, &xlvbd_attrs[i]);
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_SYSFS */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap/Makefile 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,5 @@
|
|
+LINUXINCLUDE += -I../xen/include/public/io
|
|
+
|
|
+obj-$(CONFIG_XEN_BLKDEV_TAP) := xenblktap.o
|
|
+
|
|
+xenblktap-y := xenbus.o interface.o blktap.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap/blktap.c 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,1761 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/blktap/blktap.c
|
|
+ *
|
|
+ * Back-end driver for user level virtual block devices. This portion of the
|
|
+ * driver exports a 'unified' block-device interface that can be accessed
|
|
+ * by any operating system that implements a compatible front end. Requests
|
|
+ * are remapped to a user-space memory region.
|
|
+ *
|
|
+ * Based on the blkback driver code.
|
|
+ *
|
|
+ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
|
|
+ *
|
|
+ * Clean ups and fix ups:
|
|
+ * Copyright (c) 2006, Steven Rostedt - Red Hat, Inc.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/list.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include "common.h"
|
|
+#include <xen/balloon.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/major.h>
|
|
+#include <linux/gfp.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/delay.h>
|
|
+#include <asm/tlbflush.h>
|
|
+
|
|
+#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
|
|
+#define MAX_DEV_NAME 100 /*the max tapdisk ring device name e.g. blktap0 */
|
|
+
|
|
+/*
|
|
+ * The maximum number of requests that can be outstanding at any time
|
|
+ * is determined by
|
|
+ *
|
|
+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
|
|
+ *
|
|
+ * where mmap_alloc < MAX_DYNAMIC_MEM.
|
|
+ *
|
|
+ * TODO:
|
|
+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
|
|
+ * sysfs.
|
|
+ */
|
|
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
|
|
+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
|
|
+#define MAX_PENDING_REQS BLK_RING_SIZE
|
|
+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
|
|
+#define MMAP_VADDR(_start, _req,_seg) \
|
|
+ (_start + \
|
|
+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
|
|
+ ((_seg) * PAGE_SIZE))
|
|
+static int mmap_pages = MMAP_PAGES;
|
|
+
|
|
+#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
|
|
+ * have a bunch of pages reserved for shared
|
|
+ * memory rings.
|
|
+ */
|
|
+
|
|
+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
|
|
+typedef struct domid_translate {
|
|
+ unsigned short domid;
|
|
+ unsigned short busid;
|
|
+} domid_translate_t ;
|
|
+
|
|
+typedef struct domid_translate_ext {
|
|
+ unsigned short domid;
|
|
+ u32 busid;
|
|
+} domid_translate_ext_t ;
|
|
+
|
|
+/*Data struct associated with each of the tapdisk devices*/
|
|
+typedef struct tap_blkif {
|
|
+ struct mm_struct *mm; /*User address space */
|
|
+ unsigned long rings_vstart; /*Kernel memory mapping */
|
|
+ unsigned long user_vstart; /*User memory mapping */
|
|
+ unsigned long dev_inuse; /*One process opens device at a time. */
|
|
+ unsigned long dev_pending; /*In process of being opened */
|
|
+ unsigned long ring_ok; /*make this ring->state */
|
|
+ blkif_front_ring_t ufe_ring; /*Rings up to user space. */
|
|
+ wait_queue_head_t wait; /*for poll */
|
|
+ unsigned long mode; /*current switching mode */
|
|
+ int minor; /*Minor number for tapdisk device */
|
|
+ pid_t pid; /*tapdisk process id */
|
|
+ enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace
|
|
+ shutdown */
|
|
+ struct idx_map {
|
|
+ u16 mem, req;
|
|
+ } *idx_map; /*Record the user ring id to kern
|
|
+ [req id, idx] tuple */
|
|
+ blkif_t *blkif; /*Associate blkif with tapdev */
|
|
+ struct domid_translate_ext trans; /*Translation from domid to bus. */
|
|
+ struct vm_foreign_map foreign_map; /*Mapping page */
|
|
+} tap_blkif_t;
|
|
+
|
|
+static struct tap_blkif *tapfds[MAX_TAP_DEV];
|
|
+static int blktap_next_minor;
|
|
+
|
|
+/* Run-time switchable: /sys/module/blktap/parameters/ */
|
|
+static unsigned int log_stats = 0;
|
|
+static unsigned int debug_lvl = 0;
|
|
+module_param(log_stats, int, 0644);
|
|
+module_param(debug_lvl, int, 0644);
|
|
+
|
|
+/*
|
|
+ * Each outstanding request that we've passed to the lower device layers has a
|
|
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
|
|
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
|
|
+ * response queued for it, with the saved 'id' passed back.
|
|
+ */
|
|
+typedef struct {
|
|
+ blkif_t *blkif;
|
|
+ u64 id;
|
|
+ unsigned short mem_idx;
|
|
+ int nr_pages;
|
|
+ atomic_t pendcnt;
|
|
+ unsigned short operation;
|
|
+ int status;
|
|
+ struct list_head free_list;
|
|
+ int inuse;
|
|
+} pending_req_t;
|
|
+
|
|
+static pending_req_t *pending_reqs[MAX_PENDING_REQS];
|
|
+static struct list_head pending_free;
|
|
+static DEFINE_SPINLOCK(pending_free_lock);
|
|
+static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
|
|
+static int alloc_pending_reqs;
|
|
+
|
|
+static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
|
|
+ return (req - pending_reqs[idx]);
|
|
+}
|
|
+
|
|
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
|
|
+
|
|
+#define BLKBACK_INVALID_HANDLE (~0)
|
|
+
|
|
+static struct page **foreign_pages[MAX_DYNAMIC_MEM];
|
|
+static inline struct page *idx_to_page(
|
|
+ unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
|
|
+{
|
|
+ unsigned int arr_idx = req_idx*BLKIF_MAX_SEGMENTS_PER_REQUEST + sg_idx;
|
|
+ return foreign_pages[mmap_idx][arr_idx];
|
|
+}
|
|
+static inline unsigned long idx_to_kaddr(
|
|
+ unsigned int mmap_idx, unsigned int req_idx, unsigned int sg_idx)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(idx_to_page(mmap_idx,req_idx,sg_idx));
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+static unsigned short mmap_alloc = 0;
|
|
+static unsigned short mmap_lock = 0;
|
|
+static unsigned short mmap_inuse = 0;
|
|
+
|
|
+/******************************************************************
|
|
+ * GRANT HANDLES
|
|
+ */
|
|
+
|
|
+/* When using grant tables to map a frame for device access then the
|
|
+ * handle returned must be used to unmap the frame. This is needed to
|
|
+ * drop the ref count on the frame.
|
|
+ */
|
|
+struct grant_handle_pair
|
|
+{
|
|
+ grant_handle_t kernel;
|
|
+ grant_handle_t user;
|
|
+};
|
|
+#define INVALID_GRANT_HANDLE 0xFFFF
|
|
+
|
|
+static struct grant_handle_pair
|
|
+ pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
|
|
+#define pending_handle(_id, _idx, _i) \
|
|
+ (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
|
|
+ + (_i)])
|
|
+
|
|
+
|
|
+static int blktap_read_ufe_ring(tap_blkif_t *info); /*local prototypes*/
|
|
+
|
|
+#define BLKTAP_MINOR 0 /*/dev/xen/blktap has a dynamic major */
|
|
+#define BLKTAP_DEV_DIR "/dev/xen"
|
|
+
|
|
+static int blktap_major;
|
|
+
|
|
+/* blktap IOCTLs: */
|
|
+#define BLKTAP_IOCTL_KICK_FE 1
|
|
+#define BLKTAP_IOCTL_KICK_BE 2 /* currently unused */
|
|
+#define BLKTAP_IOCTL_SETMODE 3
|
|
+#define BLKTAP_IOCTL_SENDPID 4
|
|
+#define BLKTAP_IOCTL_NEWINTF 5
|
|
+#define BLKTAP_IOCTL_MINOR 6
|
|
+#define BLKTAP_IOCTL_MAJOR 7
|
|
+#define BLKTAP_QUERY_ALLOC_REQS 8
|
|
+#define BLKTAP_IOCTL_FREEINTF 9
|
|
+#define BLKTAP_IOCTL_NEWINTF_EXT 50
|
|
+#define BLKTAP_IOCTL_PRINT_IDXS 100
|
|
+
|
|
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */
|
|
+#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */
|
|
+#define BLKTAP_MODE_INTERCEPT_FE 0x00000001
|
|
+#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 /* unimp. */
|
|
+
|
|
+#define BLKTAP_MODE_INTERPOSE \
|
|
+ (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
|
|
+
|
|
+
|
|
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
|
|
+{
|
|
+ return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
|
|
+ (arg == BLKTAP_MODE_INTERCEPT_FE) ||
|
|
+ (arg == BLKTAP_MODE_INTERPOSE ));
|
|
+}
|
|
+
|
|
+/* Requests passing through the tap to userspace are re-assigned an ID.
|
|
+ * We must record a mapping between the BE [IDX,ID] tuple and the userspace
|
|
+ * ring ID.
|
|
+ */
|
|
+
|
|
+#define INVALID_MIDX 0xdead
|
|
+
|
|
+/*TODO: Convert to a free list*/
|
|
+static inline unsigned int GET_NEXT_REQ(const struct idx_map *idx_map)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++)
|
|
+ if (idx_map[i].mem == INVALID_MIDX)
|
|
+ break;
|
|
+
|
|
+ return i;
|
|
+}
|
|
+
|
|
+static inline unsigned int OFFSET_TO_USR_IDX(unsigned long offset)
|
|
+{
|
|
+ return offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+}
|
|
+
|
|
+static inline unsigned int OFFSET_TO_SEG(unsigned long offset)
|
|
+{
|
|
+ return offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+}
|
|
+
|
|
+
|
|
+#define BLKTAP_INVALID_HANDLE(_g) \
|
|
+ (((_g->kernel) == INVALID_GRANT_HANDLE) && \
|
|
+ ((_g->user) == INVALID_GRANT_HANDLE))
|
|
+
|
|
+#define BLKTAP_INVALIDATE_HANDLE(_g) do { \
|
|
+ (_g)->kernel = INVALID_GRANT_HANDLE; (_g)->user = INVALID_GRANT_HANDLE; \
|
|
+ } while(0)
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * BLKTAP VM OPS
|
|
+ */
|
|
+
|
|
+static struct page *blktap_nopage(struct vm_area_struct *vma,
|
|
+ unsigned long address,
|
|
+ int *type)
|
|
+{
|
|
+ /*
|
|
+ * if the page has not been mapped in by the driver then return
|
|
+ * NOPAGE_SIGBUS to the domain.
|
|
+ */
|
|
+
|
|
+ return NOPAGE_SIGBUS;
|
|
+}
|
|
+
|
|
+static pte_t blktap_clear_pte(struct vm_area_struct *vma,
|
|
+ unsigned long uvaddr,
|
|
+ pte_t *ptep, int is_fullmm)
|
|
+{
|
|
+ pte_t copy;
|
|
+ tap_blkif_t *info = NULL;
|
|
+ unsigned int seg, usr_idx, pending_idx, mmap_idx, count = 0;
|
|
+ unsigned long offset, uvstart = 0;
|
|
+ struct page *pg;
|
|
+ struct grant_handle_pair *khandle;
|
|
+ struct gnttab_unmap_grant_ref unmap[2];
|
|
+
|
|
+ /*
|
|
+ * If the address is before the start of the grant mapped region or
|
|
+ * if vm_file is NULL (meaning mmap failed and we have nothing to do)
|
|
+ */
|
|
+ if (vma->vm_file != NULL) {
|
|
+ info = vma->vm_file->private_data;
|
|
+ uvstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
|
|
+ }
|
|
+ if (vma->vm_file == NULL || uvaddr < uvstart)
|
|
+ return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
|
|
+ ptep, is_fullmm);
|
|
+
|
|
+ /* TODO Should these be changed to if statements? */
|
|
+ BUG_ON(!info);
|
|
+ BUG_ON(!info->idx_map);
|
|
+
|
|
+ offset = (uvaddr - uvstart) >> PAGE_SHIFT;
|
|
+ usr_idx = OFFSET_TO_USR_IDX(offset);
|
|
+ seg = OFFSET_TO_SEG(offset);
|
|
+
|
|
+ pending_idx = info->idx_map[usr_idx].req;
|
|
+ mmap_idx = info->idx_map[usr_idx].mem;
|
|
+
|
|
+ pg = idx_to_page(mmap_idx, pending_idx, seg);
|
|
+ ClearPageReserved(pg);
|
|
+ info->foreign_map.map[offset + RING_PAGES] = NULL;
|
|
+
|
|
+ khandle = &pending_handle(mmap_idx, pending_idx, seg);
|
|
+
|
|
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
|
|
+ unsigned long pfn = page_to_pfn(pg);
|
|
+
|
|
+ gnttab_set_unmap_op(&unmap[count],
|
|
+ (unsigned long)pfn_to_kaddr(pfn),
|
|
+ GNTMAP_host_map, khandle->kernel);
|
|
+ count++;
|
|
+
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ }
|
|
+
|
|
+ if (khandle->user != INVALID_GRANT_HANDLE) {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+
|
|
+ copy = *ptep;
|
|
+ gnttab_set_unmap_op(&unmap[count], ptep_to_machine(ptep),
|
|
+ GNTMAP_host_map
|
|
+ | GNTMAP_application_map
|
|
+ | GNTMAP_contains_pte,
|
|
+ khandle->user);
|
|
+ count++;
|
|
+ } else {
|
|
+ BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap));
|
|
+
|
|
+ /* USING SHADOW PAGE TABLES. */
|
|
+ copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
|
|
+ is_fullmm);
|
|
+ }
|
|
+
|
|
+ if (count) {
|
|
+ BLKTAP_INVALIDATE_HANDLE(khandle);
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ unmap, count))
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ return copy;
|
|
+}
|
|
+
|
|
+static void blktap_vma_open(struct vm_area_struct *vma)
|
|
+{
|
|
+ tap_blkif_t *info;
|
|
+ if (vma->vm_file == NULL)
|
|
+ return;
|
|
+
|
|
+ info = vma->vm_file->private_data;
|
|
+ vma->vm_private_data =
|
|
+ &info->foreign_map.map[(vma->vm_start - info->rings_vstart) >> PAGE_SHIFT];
|
|
+}
|
|
+
|
|
+/* tricky part
|
|
+ * When partial munmapping, ->open() is called only splitted vma which
|
|
+ * will be released soon. * See split_vma() and do_munmap() in mm/mmap.c
|
|
+ * So there is no chance to fix up vm_private_data of the end vma.
|
|
+ */
|
|
+static void blktap_vma_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ tap_blkif_t *info;
|
|
+ struct vm_area_struct *next = vma->vm_next;
|
|
+
|
|
+ if (next == NULL ||
|
|
+ vma->vm_ops != next->vm_ops ||
|
|
+ vma->vm_end != next->vm_start ||
|
|
+ vma->vm_file == NULL ||
|
|
+ vma->vm_file != next->vm_file)
|
|
+ return;
|
|
+
|
|
+ info = vma->vm_file->private_data;
|
|
+ next->vm_private_data =
|
|
+ &info->foreign_map.map[(next->vm_start - info->rings_vstart) >> PAGE_SHIFT];
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct blktap_vm_ops = {
|
|
+ nopage: blktap_nopage,
|
|
+ zap_pte: blktap_clear_pte,
|
|
+ open: blktap_vma_open,
|
|
+ close: blktap_vma_close,
|
|
+};
|
|
+
|
|
+/******************************************************************
|
|
+ * BLKTAP FILE OPS
|
|
+ */
|
|
+
|
|
+/*Function Declarations*/
|
|
+static tap_blkif_t *get_next_free_dev(void);
|
|
+static int blktap_open(struct inode *inode, struct file *filp);
|
|
+static int blktap_release(struct inode *inode, struct file *filp);
|
|
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
|
|
+static int blktap_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg);
|
|
+static unsigned int blktap_poll(struct file *file, poll_table *wait);
|
|
+
|
|
+static const struct file_operations blktap_fops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .poll = blktap_poll,
|
|
+ .ioctl = blktap_ioctl,
|
|
+ .open = blktap_open,
|
|
+ .release = blktap_release,
|
|
+ .mmap = blktap_mmap,
|
|
+};
|
|
+
|
|
+
|
|
+static tap_blkif_t *get_next_free_dev(void)
|
|
+{
|
|
+ struct class *class;
|
|
+ tap_blkif_t *info;
|
|
+ int minor;
|
|
+
|
|
+ /*
|
|
+ * This is called only from the ioctl, which
|
|
+ * means we should always have interrupts enabled.
|
|
+ */
|
|
+ BUG_ON(irqs_disabled());
|
|
+
|
|
+ spin_lock_irq(&pending_free_lock);
|
|
+
|
|
+ /* tapfds[0] is always NULL */
|
|
+
|
|
+ for (minor = 1; minor < blktap_next_minor; minor++) {
|
|
+ info = tapfds[minor];
|
|
+ /* we could have failed a previous attempt. */
|
|
+ if (!info ||
|
|
+ ((!test_bit(0, &info->dev_inuse)) &&
|
|
+ (info->dev_pending == 0)) ) {
|
|
+ info->dev_pending = 1;
|
|
+ goto found;
|
|
+ }
|
|
+ }
|
|
+ info = NULL;
|
|
+ minor = -1;
|
|
+
|
|
+ /*
|
|
+ * We didn't find free device. If we can still allocate
|
|
+ * more, then we grab the next device minor that is
|
|
+ * available. This is done while we are still under
|
|
+ * the protection of the pending_free_lock.
|
|
+ */
|
|
+ if (blktap_next_minor < MAX_TAP_DEV)
|
|
+ minor = blktap_next_minor++;
|
|
+found:
|
|
+ spin_unlock_irq(&pending_free_lock);
|
|
+
|
|
+ if (!info && minor > 0) {
|
|
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
|
|
+ if (unlikely(!info)) {
|
|
+ /*
|
|
+ * If we failed here, try to put back
|
|
+ * the next minor number. But if one
|
|
+ * was just taken, then we just lose this
|
|
+ * minor. We can try to allocate this
|
|
+ * minor again later.
|
|
+ */
|
|
+ spin_lock_irq(&pending_free_lock);
|
|
+ if (blktap_next_minor == minor+1)
|
|
+ blktap_next_minor--;
|
|
+ spin_unlock_irq(&pending_free_lock);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ info->minor = minor;
|
|
+ /*
|
|
+ * Make sure that we have a minor before others can
|
|
+ * see us.
|
|
+ */
|
|
+ wmb();
|
|
+ tapfds[minor] = info;
|
|
+
|
|
+ if ((class = get_xen_class()) != NULL)
|
|
+ class_device_create(class, NULL,
|
|
+ MKDEV(blktap_major, minor), NULL,
|
|
+ "blktap%d", minor);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ return info;
|
|
+}
|
|
+
|
|
+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif)
|
|
+{
|
|
+ tap_blkif_t *info;
|
|
+ int i;
|
|
+
|
|
+ for (i = 1; i < blktap_next_minor; i++) {
|
|
+ info = tapfds[i];
|
|
+ if ( info &&
|
|
+ (info->trans.domid == domid) &&
|
|
+ (info->trans.busid == xenbus_id) ) {
|
|
+ info->blkif = blkif;
|
|
+ info->status = RUNNING;
|
|
+ return i;
|
|
+ }
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+void signal_tapdisk(int idx)
|
|
+{
|
|
+ tap_blkif_t *info;
|
|
+ struct task_struct *ptask;
|
|
+ struct mm_struct *mm;
|
|
+
|
|
+ /*
|
|
+ * if the userland tools set things up wrong, this could be negative;
|
|
+ * just don't try to signal in this case
|
|
+ */
|
|
+ if (idx < 0 || idx >= MAX_TAP_DEV)
|
|
+ return;
|
|
+
|
|
+ info = tapfds[idx];
|
|
+ if (!info)
|
|
+ return;
|
|
+
|
|
+ if (info->pid > 0) {
|
|
+ ptask = find_task_by_pid(info->pid);
|
|
+ if (ptask)
|
|
+ info->status = CLEANSHUTDOWN;
|
|
+ }
|
|
+ info->blkif = NULL;
|
|
+
|
|
+ mm = xchg(&info->mm, NULL);
|
|
+ if (mm)
|
|
+ mmput(mm);
|
|
+}
|
|
+
|
|
+static int blktap_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ blkif_sring_t *sring;
|
|
+ int idx = iminor(inode) - BLKTAP_MINOR;
|
|
+ tap_blkif_t *info;
|
|
+ int i;
|
|
+
|
|
+ /* ctrl device, treat differently */
|
|
+ if (!idx)
|
|
+ return 0;
|
|
+ if (idx < 0 || idx >= MAX_TAP_DEV) {
|
|
+ WPRINTK("No device /dev/xen/blktap%d\n", idx);
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ info = tapfds[idx];
|
|
+ if (!info) {
|
|
+ WPRINTK("Unable to open device /dev/xen/blktap%d\n",
|
|
+ idx);
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
|
|
+
|
|
+ /*Only one process can access device at a time*/
|
|
+ if (test_and_set_bit(0, &info->dev_inuse))
|
|
+ return -EBUSY;
|
|
+
|
|
+ info->dev_pending = 0;
|
|
+
|
|
+ /* Allocate the fe ring. */
|
|
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
|
|
+ if (sring == NULL)
|
|
+ goto fail_nomem;
|
|
+
|
|
+ SetPageReserved(virt_to_page(sring));
|
|
+
|
|
+ SHARED_RING_INIT(sring);
|
|
+ FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
|
|
+
|
|
+ filp->private_data = info;
|
|
+ info->mm = NULL;
|
|
+
|
|
+ info->idx_map = kmalloc(sizeof(*info->idx_map) * MAX_PENDING_REQS,
|
|
+ GFP_KERNEL);
|
|
+
|
|
+ if (info->idx_map == NULL)
|
|
+ goto fail_nomem;
|
|
+
|
|
+ if (idx > 0) {
|
|
+ init_waitqueue_head(&info->wait);
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
|
|
+ info->idx_map[i].mem = INVALID_MIDX;
|
|
+ info->idx_map[i].req = ~0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
|
|
+ return 0;
|
|
+
|
|
+ fail_nomem:
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static int blktap_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ tap_blkif_t *info = filp->private_data;
|
|
+ struct mm_struct *mm;
|
|
+
|
|
+ /* check for control device */
|
|
+ if (!info)
|
|
+ return 0;
|
|
+
|
|
+ info->ring_ok = 0;
|
|
+ smp_wmb();
|
|
+
|
|
+ mm = xchg(&info->mm, NULL);
|
|
+ if (mm)
|
|
+ mmput(mm);
|
|
+ kfree(info->foreign_map.map);
|
|
+ info->foreign_map.map = NULL;
|
|
+
|
|
+ /* Free the ring page. */
|
|
+ ClearPageReserved(virt_to_page(info->ufe_ring.sring));
|
|
+ free_page((unsigned long) info->ufe_ring.sring);
|
|
+
|
|
+ if (info->idx_map) {
|
|
+ kfree(info->idx_map);
|
|
+ info->idx_map = NULL;
|
|
+ }
|
|
+
|
|
+ if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
|
|
+ if (info->blkif->xenblkd != NULL) {
|
|
+ kthread_stop(info->blkif->xenblkd);
|
|
+ info->blkif->xenblkd = NULL;
|
|
+ }
|
|
+ info->status = CLEANSHUTDOWN;
|
|
+ }
|
|
+
|
|
+ clear_bit(0, &info->dev_inuse);
|
|
+ DPRINTK("Freeing device [/dev/xen/blktap%d]\n",info->minor);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* Note on mmap:
|
|
+ * We need to map pages to user space in a way that will allow the block
|
|
+ * subsystem set up direct IO to them. This couldn't be done before, because
|
|
+ * there isn't really a sane way to translate a user virtual address down to a
|
|
+ * physical address when the page belongs to another domain.
|
|
+ *
|
|
+ * My first approach was to map the page in to kernel memory, add an entry
|
|
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
|
|
+ * and then attempt to map that page up to user space. This is disallowed
|
|
+ * by xen though, which realizes that we don't really own the machine frame
|
|
+ * underlying the physical page.
|
|
+ *
|
|
+ * The new approach is to provide explicit support for this in xen linux.
|
|
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
|
|
+ * mapped from other vms. vma->vm_private_data is set up as a mapping
|
|
+ * from pages to actual page structs. There is a new clause in get_user_pages
|
|
+ * that does the right thing for this sort of mapping.
|
|
+ */
|
|
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
+{
|
|
+ int size;
|
|
+ tap_blkif_t *info = filp->private_data;
|
|
+ int ret;
|
|
+
|
|
+ if (info == NULL) {
|
|
+ WPRINTK("blktap: mmap, retrieving idx failed\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+ vma->vm_ops = &blktap_vm_ops;
|
|
+
|
|
+ size = vma->vm_end - vma->vm_start;
|
|
+ if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
|
|
+ WPRINTK("you _must_ map exactly %d pages!\n",
|
|
+ mmap_pages + RING_PAGES);
|
|
+ return -EAGAIN;
|
|
+ }
|
|
+
|
|
+ size >>= PAGE_SHIFT;
|
|
+ info->rings_vstart = vma->vm_start;
|
|
+ info->user_vstart = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
|
|
+
|
|
+ /* Map the ring pages to the start of the region and reserve it. */
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ ret = vm_insert_page(vma, vma->vm_start,
|
|
+ virt_to_page(info->ufe_ring.sring));
|
|
+ else
|
|
+ ret = remap_pfn_range(vma, vma->vm_start,
|
|
+ __pa(info->ufe_ring.sring) >> PAGE_SHIFT,
|
|
+ PAGE_SIZE, vma->vm_page_prot);
|
|
+ if (ret) {
|
|
+ WPRINTK("Mapping user ring failed!\n");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ /* Mark this VM as containing foreign pages, and set up mappings. */
|
|
+ info->foreign_map.map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) *
|
|
+ sizeof(*info->foreign_map.map), GFP_KERNEL);
|
|
+ if (info->foreign_map.map == NULL) {
|
|
+ WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ vma->vm_private_data = &info->foreign_map;
|
|
+ vma->vm_flags |= VM_FOREIGN;
|
|
+ vma->vm_flags |= VM_DONTCOPY;
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+ vma->vm_mm->context.has_foreign_mappings = 1;
|
|
+#endif
|
|
+
|
|
+ info->mm = get_task_mm(current);
|
|
+ smp_wmb();
|
|
+ info->ring_ok = 1;
|
|
+ return 0;
|
|
+ fail:
|
|
+ /* Clear any active mappings. */
|
|
+ zap_page_range(vma, vma->vm_start,
|
|
+ vma->vm_end - vma->vm_start, NULL);
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+
|
|
+static int blktap_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ tap_blkif_t *info = filp->private_data;
|
|
+
|
|
+ switch(cmd) {
|
|
+ case BLKTAP_IOCTL_KICK_FE:
|
|
+ {
|
|
+ /* There are fe messages to process. */
|
|
+ return blktap_read_ufe_ring(info);
|
|
+ }
|
|
+ case BLKTAP_IOCTL_SETMODE:
|
|
+ {
|
|
+ if (info) {
|
|
+ if (BLKTAP_MODE_VALID(arg)) {
|
|
+ info->mode = arg;
|
|
+ /* XXX: may need to flush rings here. */
|
|
+ DPRINTK("blktap: set mode to %lx\n",
|
|
+ arg);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_PRINT_IDXS:
|
|
+ {
|
|
+ if (info) {
|
|
+ printk("User Rings: \n-----------\n");
|
|
+ printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
|
|
+ "| req_prod: %2d, rsp_prod: %2d\n",
|
|
+ info->ufe_ring.rsp_cons,
|
|
+ info->ufe_ring.req_prod_pvt,
|
|
+ info->ufe_ring.sring->req_prod,
|
|
+ info->ufe_ring.sring->rsp_prod);
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_SENDPID:
|
|
+ {
|
|
+ if (info) {
|
|
+ info->pid = (pid_t)arg;
|
|
+ DPRINTK("blktap: pid received %d\n",
|
|
+ info->pid);
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_NEWINTF:
|
|
+ {
|
|
+ uint64_t val = (uint64_t)arg;
|
|
+ domid_translate_t *tr = (domid_translate_t *)&val;
|
|
+
|
|
+ DPRINTK("NEWINTF Req for domid %d and bus id %d\n",
|
|
+ tr->domid, tr->busid);
|
|
+ info = get_next_free_dev();
|
|
+ if (!info) {
|
|
+ WPRINTK("Error initialising /dev/xen/blktap - "
|
|
+ "No more devices\n");
|
|
+ return -1;
|
|
+ }
|
|
+ info->trans.domid = tr->domid;
|
|
+ info->trans.busid = tr->busid;
|
|
+ return info->minor;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_NEWINTF_EXT:
|
|
+ {
|
|
+ void __user *udata = (void __user *) arg;
|
|
+ domid_translate_ext_t tr;
|
|
+
|
|
+ if (copy_from_user(&tr, udata, sizeof(domid_translate_ext_t)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ DPRINTK("NEWINTF_EXT Req for domid %d and bus id %d\n",
|
|
+ tr.domid, tr.busid);
|
|
+ info = get_next_free_dev();
|
|
+ if (!info) {
|
|
+ WPRINTK("Error initialising /dev/xen/blktap - "
|
|
+ "No more devices\n");
|
|
+ return -1;
|
|
+ }
|
|
+ info->trans.domid = tr.domid;
|
|
+ info->trans.busid = tr.busid;
|
|
+ return info->minor;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_FREEINTF:
|
|
+ {
|
|
+ unsigned long dev = arg;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (info || dev >= MAX_TAP_DEV)
|
|
+ return -EINVAL;
|
|
+
|
|
+ info = tapfds[dev];
|
|
+ if (!info)
|
|
+ return 0; /* should this be an error? */
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ if (info->dev_pending)
|
|
+ info->dev_pending = 0;
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+ case BLKTAP_IOCTL_MINOR:
|
|
+ if (!info) {
|
|
+ unsigned long dev = arg;
|
|
+
|
|
+ if (dev >= MAX_TAP_DEV)
|
|
+ return -EINVAL;
|
|
+
|
|
+ info = tapfds[dev];
|
|
+ if (!info)
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return info->minor;
|
|
+
|
|
+ case BLKTAP_IOCTL_MAJOR:
|
|
+ return blktap_major;
|
|
+
|
|
+ case BLKTAP_QUERY_ALLOC_REQS:
|
|
+ WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%lu\n",
|
|
+ alloc_pending_reqs, MAX_PENDING_REQS);
|
|
+ return (alloc_pending_reqs/MAX_PENDING_REQS) * 100;
|
|
+ }
|
|
+ return -ENOIOCTLCMD;
|
|
+}
|
|
+
|
|
+static unsigned int blktap_poll(struct file *filp, poll_table *wait)
|
|
+{
|
|
+ tap_blkif_t *info = filp->private_data;
|
|
+
|
|
+ /* do not work on the control device */
|
|
+ if (!info)
|
|
+ return 0;
|
|
+
|
|
+ poll_wait(filp, &info->wait, wait);
|
|
+ if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
|
|
+ RING_PUSH_REQUESTS(&info->ufe_ring);
|
|
+ return POLLIN | POLLRDNORM;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void blktap_kick_user(int idx)
|
|
+{
|
|
+ tap_blkif_t *info;
|
|
+
|
|
+ if (idx < 0 || idx >= MAX_TAP_DEV)
|
|
+ return;
|
|
+
|
|
+ info = tapfds[idx];
|
|
+ if (!info)
|
|
+ return;
|
|
+
|
|
+ wake_up_interruptible(&info->wait);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+static int do_block_io_op(blkif_t *blkif);
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ blkif_request_t *req,
|
|
+ pending_req_t *pending_req);
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st);
|
|
+
|
|
+/******************************************************************
|
|
+ * misc small helpers
|
|
+ */
|
|
+static int req_increase(void)
|
|
+{
|
|
+ int i, j;
|
|
+
|
|
+ if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock)
|
|
+ return -EINVAL;
|
|
+
|
|
+ pending_reqs[mmap_alloc] = kzalloc(sizeof(pending_req_t)
|
|
+ * MAX_PENDING_REQS, GFP_KERNEL);
|
|
+ foreign_pages[mmap_alloc] = alloc_empty_pages_and_pagevec(mmap_pages);
|
|
+
|
|
+ if (!pending_reqs[mmap_alloc] || !foreign_pages[mmap_alloc])
|
|
+ goto out_of_memory;
|
|
+
|
|
+ DPRINTK("%s: reqs=%lu, pages=%d\n",
|
|
+ __FUNCTION__, MAX_PENDING_REQS, mmap_pages);
|
|
+
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
|
|
+ list_add_tail(&pending_reqs[mmap_alloc][i].free_list,
|
|
+ &pending_free);
|
|
+ pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
|
|
+ for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
|
|
+ BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc,
|
|
+ i, j));
|
|
+ }
|
|
+
|
|
+ mmap_alloc++;
|
|
+ DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
|
|
+ return 0;
|
|
+
|
|
+ out_of_memory:
|
|
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
|
|
+ kfree(pending_reqs[mmap_alloc]);
|
|
+ WPRINTK("%s: out of memory\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static void mmap_req_del(int mmap)
|
|
+{
|
|
+ assert_spin_locked(&pending_free_lock);
|
|
+
|
|
+ kfree(pending_reqs[mmap]);
|
|
+ pending_reqs[mmap] = NULL;
|
|
+
|
|
+ free_empty_pages_and_pagevec(foreign_pages[mmap_alloc], mmap_pages);
|
|
+ foreign_pages[mmap] = NULL;
|
|
+
|
|
+ mmap_lock = 0;
|
|
+ DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
|
|
+ mmap_alloc--;
|
|
+}
|
|
+
|
|
+static pending_req_t* alloc_req(void)
|
|
+{
|
|
+ pending_req_t *req = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+
|
|
+ if (!list_empty(&pending_free)) {
|
|
+ req = list_entry(pending_free.next, pending_req_t, free_list);
|
|
+ list_del(&req->free_list);
|
|
+ }
|
|
+
|
|
+ if (req) {
|
|
+ req->inuse = 1;
|
|
+ alloc_pending_reqs++;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+
|
|
+ return req;
|
|
+}
|
|
+
|
|
+static void free_req(pending_req_t *req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int was_empty;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+
|
|
+ alloc_pending_reqs--;
|
|
+ req->inuse = 0;
|
|
+ if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
|
|
+ mmap_inuse--;
|
|
+ if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ return;
|
|
+ }
|
|
+ was_empty = list_empty(&pending_free);
|
|
+ list_add(&req->free_list, &pending_free);
|
|
+
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+
|
|
+ if (was_empty)
|
|
+ wake_up(&pending_free_wq);
|
|
+}
|
|
+
|
|
+static void blktap_zap_page_range(struct mm_struct *mm,
|
|
+ unsigned long uvaddr, int nr_pages)
|
|
+{
|
|
+ unsigned long end = uvaddr + (nr_pages << PAGE_SHIFT);
|
|
+ struct vm_area_struct *vma;
|
|
+
|
|
+ vma = find_vma(mm, uvaddr);
|
|
+ while (vma && uvaddr < end) {
|
|
+ unsigned long s = max(uvaddr, vma->vm_start);
|
|
+ unsigned long e = min(end, vma->vm_end);
|
|
+
|
|
+ zap_page_range(vma, s, e - s, NULL);
|
|
+
|
|
+ uvaddr = e;
|
|
+ vma = vma->vm_next;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void fast_flush_area(pending_req_t *req, unsigned int k_idx,
|
|
+ unsigned int u_idx, int tapidx)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
|
|
+ unsigned int i, mmap_idx, invcount = 0, locked = 0;
|
|
+ struct grant_handle_pair *khandle;
|
|
+ uint64_t ptep;
|
|
+ int ret;
|
|
+ unsigned long uvaddr;
|
|
+ tap_blkif_t *info;
|
|
+ struct mm_struct *mm;
|
|
+
|
|
+
|
|
+ if ((tapidx < 0) || (tapidx >= MAX_TAP_DEV)
|
|
+ || !(info = tapfds[tapidx])) {
|
|
+ WPRINTK("fast_flush: Couldn't get info!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ mm = info->mm;
|
|
+
|
|
+ if (mm != NULL && xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ down_write(&mm->mmap_sem);
|
|
+ blktap_zap_page_range(mm,
|
|
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
|
|
+ req->nr_pages);
|
|
+ up_write(&mm->mmap_sem);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ mmap_idx = req->mem_idx;
|
|
+
|
|
+ for (i = 0; i < req->nr_pages; i++) {
|
|
+ uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
|
|
+
|
|
+ khandle = &pending_handle(mmap_idx, k_idx, i);
|
|
+
|
|
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
|
|
+ gnttab_set_unmap_op(&unmap[invcount],
|
|
+ idx_to_kaddr(mmap_idx, k_idx, i),
|
|
+ GNTMAP_host_map, khandle->kernel);
|
|
+ invcount++;
|
|
+
|
|
+ set_phys_to_machine(
|
|
+ page_to_pfn(idx_to_page(mmap_idx, k_idx, i)),
|
|
+ INVALID_P2M_ENTRY);
|
|
+ }
|
|
+
|
|
+ if (mm != NULL && khandle->user != INVALID_GRANT_HANDLE) {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+ if (!locked++)
|
|
+ down_write(&mm->mmap_sem);
|
|
+ if (create_lookup_pte_addr(
|
|
+ mm,
|
|
+ MMAP_VADDR(info->user_vstart, u_idx, i),
|
|
+ &ptep) !=0) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+ WPRINTK("Couldn't get a pte addr!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ gnttab_set_unmap_op(&unmap[invcount], ptep,
|
|
+ GNTMAP_host_map
|
|
+ | GNTMAP_application_map
|
|
+ | GNTMAP_contains_pte,
|
|
+ khandle->user);
|
|
+ invcount++;
|
|
+ }
|
|
+
|
|
+ BLKTAP_INVALIDATE_HANDLE(khandle);
|
|
+ }
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ if (mm != NULL && !xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ if (!locked++)
|
|
+ down_write(&mm->mmap_sem);
|
|
+ blktap_zap_page_range(mm,
|
|
+ MMAP_VADDR(info->user_vstart, u_idx, 0),
|
|
+ req->nr_pages);
|
|
+ }
|
|
+
|
|
+ if (locked)
|
|
+ up_write(&mm->mmap_sem);
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * SCHEDULER FUNCTIONS
|
|
+ */
|
|
+
|
|
+static void print_stats(blkif_t *blkif)
|
|
+{
|
|
+ printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d\n",
|
|
+ current->comm, blkif->st_oo_req,
|
|
+ blkif->st_rd_req, blkif->st_wr_req);
|
|
+ blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
|
|
+ blkif->st_rd_req = 0;
|
|
+ blkif->st_wr_req = 0;
|
|
+ blkif->st_oo_req = 0;
|
|
+}
|
|
+
|
|
+int tap_blkif_schedule(void *arg)
|
|
+{
|
|
+ blkif_t *blkif = arg;
|
|
+ tap_blkif_t *info;
|
|
+
|
|
+ blkif_get(blkif);
|
|
+
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: started\n", current->comm);
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ if (try_to_freeze())
|
|
+ continue;
|
|
+
|
|
+ wait_event_interruptible(
|
|
+ blkif->wq,
|
|
+ blkif->waiting_reqs || kthread_should_stop());
|
|
+ wait_event_interruptible(
|
|
+ pending_free_wq,
|
|
+ !list_empty(&pending_free) || kthread_should_stop());
|
|
+
|
|
+ blkif->waiting_reqs = 0;
|
|
+ smp_mb(); /* clear flag *before* checking for work */
|
|
+
|
|
+ if (do_block_io_op(blkif))
|
|
+ blkif->waiting_reqs = 1;
|
|
+
|
|
+ if (log_stats && time_after(jiffies, blkif->st_print))
|
|
+ print_stats(blkif);
|
|
+ }
|
|
+
|
|
+ if (log_stats)
|
|
+ print_stats(blkif);
|
|
+ if (debug_lvl)
|
|
+ printk(KERN_DEBUG "%s: exiting\n", current->comm);
|
|
+
|
|
+ blkif->xenblkd = NULL;
|
|
+ info = tapfds[blkif->dev_num];
|
|
+ blkif_put(blkif);
|
|
+
|
|
+ if (info) {
|
|
+ struct mm_struct *mm = xchg(&info->mm, NULL);
|
|
+
|
|
+ if (mm)
|
|
+ mmput(mm);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/******************************************************************
|
|
+ * COMPLETION CALLBACK -- Called by user level ioctl()
|
|
+ */
|
|
+
|
|
+static int blktap_read_ufe_ring(tap_blkif_t *info)
|
|
+{
|
|
+ /* This is called to read responses from the UFE ring. */
|
|
+ RING_IDX i, j, rp;
|
|
+ blkif_response_t *resp;
|
|
+ blkif_t *blkif=NULL;
|
|
+ unsigned int pending_idx, usr_idx, mmap_idx;
|
|
+ pending_req_t *pending_req;
|
|
+
|
|
+ if (!info)
|
|
+ return 0;
|
|
+
|
|
+ /* We currently only forward packets in INTERCEPT_FE mode. */
|
|
+ if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
|
|
+ return 0;
|
|
+
|
|
+ /* for each outstanding message on the UFEring */
|
|
+ rp = info->ufe_ring.sring->rsp_prod;
|
|
+ rmb();
|
|
+
|
|
+ for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
|
|
+ blkif_response_t res;
|
|
+ resp = RING_GET_RESPONSE(&info->ufe_ring, i);
|
|
+ memcpy(&res, resp, sizeof(res));
|
|
+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
|
|
+ ++info->ufe_ring.rsp_cons;
|
|
+
|
|
+ /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
|
|
+ if (res.id >= MAX_PENDING_REQS) {
|
|
+ WPRINTK("incorrect req map [%llx]\n",
|
|
+ (unsigned long long)res.id);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ usr_idx = (unsigned int)res.id;
|
|
+ pending_idx = info->idx_map[usr_idx].req;
|
|
+ mmap_idx = info->idx_map[usr_idx].mem;
|
|
+
|
|
+ if (mmap_idx >= mmap_alloc ||
|
|
+ pending_idx >= MAX_PENDING_REQS) {
|
|
+ WPRINTK("incorrect req map [%d],"
|
|
+ " internal map [%d,%d]\n",
|
|
+ usr_idx, mmap_idx, pending_idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ pending_req = &pending_reqs[mmap_idx][pending_idx];
|
|
+ blkif = pending_req->blkif;
|
|
+
|
|
+ for (j = 0; j < pending_req->nr_pages; j++) {
|
|
+
|
|
+ unsigned long uvaddr;
|
|
+ struct page *pg;
|
|
+ int offset;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, j);
|
|
+
|
|
+ pg = idx_to_page(mmap_idx, pending_idx, j);
|
|
+ ClearPageReserved(pg);
|
|
+ offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
|
|
+ info->foreign_map.map[offset] = NULL;
|
|
+ }
|
|
+ fast_flush_area(pending_req, pending_idx, usr_idx, info->minor);
|
|
+ info->idx_map[usr_idx].mem = INVALID_MIDX;
|
|
+ make_response(blkif, pending_req->id, res.operation,
|
|
+ res.status);
|
|
+ blkif_put(pending_req->blkif);
|
|
+ free_req(pending_req);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/******************************************************************************
|
|
+ * NOTIFICATION FROM GUEST OS.
|
|
+ */
|
|
+
|
|
+static void blkif_notify_work(blkif_t *blkif)
|
|
+{
|
|
+ blkif->waiting_reqs = 1;
|
|
+ wake_up(&blkif->wq);
|
|
+}
|
|
+
|
|
+irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ blkif_notify_work(dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
|
|
+ */
|
|
+static int print_dbug = 1;
|
|
+static int do_block_io_op(blkif_t *blkif)
|
|
+{
|
|
+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
|
|
+ blkif_request_t req;
|
|
+ pending_req_t *pending_req;
|
|
+ RING_IDX rc, rp;
|
|
+ int more_to_do = 0;
|
|
+ tap_blkif_t *info;
|
|
+
|
|
+ rc = blk_rings->common.req_cons;
|
|
+ rp = blk_rings->common.sring->req_prod;
|
|
+ rmb(); /* Ensure we see queued requests up to 'rp'. */
|
|
+
|
|
+ /*Check blkif has corresponding UE ring*/
|
|
+ if (blkif->dev_num < 0 || blkif->dev_num >= MAX_TAP_DEV) {
|
|
+ /*oops*/
|
|
+ if (print_dbug) {
|
|
+ WPRINTK("Corresponding UE "
|
|
+ "ring does not exist!\n");
|
|
+ print_dbug = 0; /*We only print this message once*/
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ info = tapfds[blkif->dev_num];
|
|
+
|
|
+ if (!info || !test_bit(0, &info->dev_inuse)) {
|
|
+ if (print_dbug) {
|
|
+ WPRINTK("Can't get UE info!\n");
|
|
+ print_dbug = 0;
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ while (rc != rp) {
|
|
+
|
|
+ if (RING_FULL(&info->ufe_ring)) {
|
|
+ WPRINTK("RING_FULL! More to do\n");
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) {
|
|
+ WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
|
|
+ " More to do\n");
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (kthread_should_stop()) {
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pending_req = alloc_req();
|
|
+ if (NULL == pending_req) {
|
|
+ blkif->st_oo_req++;
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc),
|
|
+ sizeof(req));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.req_cons = ++rc; /* before make_response() */
|
|
+
|
|
+ /* Apply all sanity checks to /private copy/ of request. */
|
|
+ barrier();
|
|
+
|
|
+ switch (req.operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ blkif->st_rd_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+
|
|
+ case BLKIF_OP_WRITE:
|
|
+ blkif->st_wr_req++;
|
|
+ dispatch_rw_block_io(blkif, &req, pending_req);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ /* A good sign something is wrong: sleep for a while to
|
|
+ * avoid excessive CPU consumption by a bad guest. */
|
|
+ msleep(1);
|
|
+ WPRINTK("unknown operation [%d]\n",
|
|
+ req.operation);
|
|
+ make_response(blkif, req.id, req.operation,
|
|
+ BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Yield point for this unbounded loop. */
|
|
+ cond_resched();
|
|
+ }
|
|
+
|
|
+ blktap_kick_user(blkif->dev_num);
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+static void dispatch_rw_block_io(blkif_t *blkif,
|
|
+ blkif_request_t *req,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]);
|
|
+ int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
|
|
+ struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
|
|
+ unsigned int nseg;
|
|
+ int ret, i, nr_sects = 0;
|
|
+ tap_blkif_t *info;
|
|
+ blkif_request_t *target;
|
|
+ unsigned int mmap_idx = pending_req->mem_idx;
|
|
+ unsigned int pending_idx = RTN_PEND_IDX(pending_req, mmap_idx);
|
|
+ unsigned int usr_idx;
|
|
+ struct mm_struct *mm;
|
|
+ struct vm_area_struct *vma = NULL;
|
|
+
|
|
+ if (blkif->dev_num < 0 || blkif->dev_num >= MAX_TAP_DEV)
|
|
+ goto fail_response;
|
|
+
|
|
+ info = tapfds[blkif->dev_num];
|
|
+ if (info == NULL)
|
|
+ goto fail_response;
|
|
+
|
|
+ /* Check we have space on user ring - should never fail. */
|
|
+ usr_idx = GET_NEXT_REQ(info->idx_map);
|
|
+ if (usr_idx >= MAX_PENDING_REQS) {
|
|
+ WARN_ON(1);
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ /* Check that number of segments is sane. */
|
|
+ nseg = req->nr_segments;
|
|
+ if ( unlikely(nseg == 0) ||
|
|
+ unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
|
|
+ WPRINTK("Bad number of segments in request (%d)\n", nseg);
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ /* Make sure userspace is ready. */
|
|
+ if (!info->ring_ok) {
|
|
+ WPRINTK("blktap: ring not ready for requests!\n");
|
|
+ goto fail_response;
|
|
+ }
|
|
+ smp_rmb();
|
|
+
|
|
+ if (RING_FULL(&info->ufe_ring)) {
|
|
+ WPRINTK("blktap: fe_ring is full, can't add "
|
|
+ "IO Request will be dropped. %d %d\n",
|
|
+ RING_SIZE(&info->ufe_ring),
|
|
+ RING_SIZE(&blkif->blk_rings.common));
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ pending_req->blkif = blkif;
|
|
+ pending_req->id = req->id;
|
|
+ pending_req->operation = operation;
|
|
+ pending_req->status = BLKIF_RSP_OKAY;
|
|
+ pending_req->nr_pages = nseg;
|
|
+ op = 0;
|
|
+ mm = info->mm;
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ down_write(&mm->mmap_sem);
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ unsigned long uvaddr;
|
|
+ unsigned long kvaddr;
|
|
+ uint64_t ptep;
|
|
+ uint32_t flags;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
|
|
+ kvaddr = idx_to_kaddr(mmap_idx, pending_idx, i);
|
|
+
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (operation == WRITE)
|
|
+ flags |= GNTMAP_readonly;
|
|
+ gnttab_set_map_op(&map[op], kvaddr, flags,
|
|
+ req->seg[i].gref, blkif->domid);
|
|
+ op++;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* Now map it to user. */
|
|
+ ret = create_lookup_pte_addr(mm, uvaddr, &ptep);
|
|
+ if (ret) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+ WPRINTK("Couldn't get a pte addr!\n");
|
|
+ goto fail_flush;
|
|
+ }
|
|
+
|
|
+ flags = GNTMAP_host_map | GNTMAP_application_map
|
|
+ | GNTMAP_contains_pte;
|
|
+ if (operation == WRITE)
|
|
+ flags |= GNTMAP_readonly;
|
|
+ gnttab_set_map_op(&map[op], ptep, flags,
|
|
+ req->seg[i].gref, blkif->domid);
|
|
+ op++;
|
|
+ }
|
|
+
|
|
+ nr_sects += (req->seg[i].last_sect -
|
|
+ req->seg[i].first_sect + 1);
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+
|
|
+ for (i = 0; i < (nseg*2); i+=2) {
|
|
+ unsigned long uvaddr;
|
|
+ unsigned long offset;
|
|
+ struct page *pg;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]);
|
|
+
|
|
+ if (unlikely(map[i].status != GNTST_okay)) {
|
|
+ WPRINTK("invalid kernel buffer -- could not remap it\n");
|
|
+ ret = 1;
|
|
+ map[i].handle = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+
|
|
+ if (unlikely(map[i+1].status != GNTST_okay)) {
|
|
+ WPRINTK("invalid kernel buffer -- could not remap it\n");
|
|
+ ret = 1;
|
|
+ map[i+1].handle = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+
|
|
+ pending_handle(mmap_idx, pending_idx, i/2).kernel
|
|
+ = map[i].handle;
|
|
+ pending_handle(mmap_idx, pending_idx, i/2).user
|
|
+ = map[i+1].handle;
|
|
+
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ pg = idx_to_page(mmap_idx, pending_idx, i/2);
|
|
+ set_phys_to_machine(page_to_pfn(pg),
|
|
+ FOREIGN_FRAME(map[i].dev_bus_addr
|
|
+ >> PAGE_SHIFT));
|
|
+ offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
|
|
+ info->foreign_map.map[offset] = pg;
|
|
+ }
|
|
+ } else {
|
|
+ for (i = 0; i < nseg; i++) {
|
|
+ unsigned long uvaddr;
|
|
+ unsigned long offset;
|
|
+ struct page *pg;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]);
|
|
+
|
|
+ if (unlikely(map[i].status != GNTST_okay)) {
|
|
+ WPRINTK("invalid kernel buffer -- could not remap it\n");
|
|
+ ret = 1;
|
|
+ map[i].handle = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+
|
|
+ pending_handle(mmap_idx, pending_idx, i).kernel
|
|
+ = map[i].handle;
|
|
+
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ offset = (uvaddr - info->rings_vstart) >> PAGE_SHIFT;
|
|
+ pg = idx_to_page(mmap_idx, pending_idx, i);
|
|
+ info->foreign_map.map[offset] = pg;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (ret)
|
|
+ goto fail_flush;
|
|
+
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ down_write(&mm->mmap_sem);
|
|
+ /* Mark mapped pages as reserved: */
|
|
+ for (i = 0; i < req->nr_segments; i++) {
|
|
+ struct page *pg;
|
|
+
|
|
+ pg = idx_to_page(mmap_idx, pending_idx, i);
|
|
+ SetPageReserved(pg);
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ unsigned long uvaddr = MMAP_VADDR(info->user_vstart,
|
|
+ usr_idx, i);
|
|
+ if (vma && uvaddr >= vma->vm_end) {
|
|
+ vma = vma->vm_next;
|
|
+ if (vma &&
|
|
+ (uvaddr < vma->vm_start ||
|
|
+ uvaddr >= vma->vm_end))
|
|
+ vma = NULL;
|
|
+ }
|
|
+ if (vma == NULL) {
|
|
+ vma = find_vma(mm, uvaddr);
|
|
+ /* this virtual area was already munmapped.
|
|
+ so skip to next page */
|
|
+ if (!vma)
|
|
+ continue;
|
|
+ }
|
|
+ ret = vm_insert_page(vma, uvaddr, pg);
|
|
+ if (ret) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+ goto fail_flush;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ up_write(&mm->mmap_sem);
|
|
+
|
|
+ /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
|
|
+ info->idx_map[usr_idx].mem = mmap_idx;
|
|
+ info->idx_map[usr_idx].req = pending_idx;
|
|
+
|
|
+ blkif_get(blkif);
|
|
+ /* Finally, write the request message to the user ring. */
|
|
+ target = RING_GET_REQUEST(&info->ufe_ring,
|
|
+ info->ufe_ring.req_prod_pvt);
|
|
+ memcpy(target, req, sizeof(*req));
|
|
+ target->id = usr_idx;
|
|
+ wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
|
|
+ info->ufe_ring.req_prod_pvt++;
|
|
+
|
|
+ if (operation == READ)
|
|
+ blkif->st_rd_sect += nr_sects;
|
|
+ else if (operation == WRITE)
|
|
+ blkif->st_wr_sect += nr_sects;
|
|
+
|
|
+ return;
|
|
+
|
|
+ fail_flush:
|
|
+ WPRINTK("Reached Fail_flush\n");
|
|
+ fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
|
|
+ fail_response:
|
|
+ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
|
|
+ free_req(pending_req);
|
|
+ msleep(1); /* back off a bit */
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/******************************************************************
|
|
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
|
|
+ */
|
|
+
|
|
+
|
|
+static void make_response(blkif_t *blkif, u64 id,
|
|
+ unsigned short op, int st)
|
|
+{
|
|
+ blkif_response_t resp;
|
|
+ unsigned long flags;
|
|
+ blkif_back_rings_t *blk_rings = &blkif->blk_rings;
|
|
+ int more_to_do = 0;
|
|
+ int notify;
|
|
+
|
|
+ resp.id = id;
|
|
+ resp.operation = op;
|
|
+ resp.status = st;
|
|
+
|
|
+ spin_lock_irqsave(&blkif->blk_ring_lock, flags);
|
|
+ /* Place on the response ring for the relevant domain. */
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->native,
|
|
+ blk_rings->native.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_32,
|
|
+ blk_rings->x86_32.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ memcpy(RING_GET_RESPONSE(&blk_rings->x86_64,
|
|
+ blk_rings->x86_64.rsp_prod_pvt),
|
|
+ &resp, sizeof(resp));
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+ blk_rings->common.rsp_prod_pvt++;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify);
|
|
+
|
|
+ if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) {
|
|
+ /*
|
|
+ * Tail check for pending requests. Allows frontend to avoid
|
|
+ * notifications if requests are already in flight (lower
|
|
+ * overheads and promotes batching).
|
|
+ */
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do);
|
|
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) {
|
|
+ more_to_do = 1;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
|
|
+ if (more_to_do)
|
|
+ blkif_notify_work(blkif);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(blkif->irq);
|
|
+}
|
|
+
|
|
+static int __init blkif_init(void)
|
|
+{
|
|
+ int i, ret;
|
|
+ struct class *class;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ INIT_LIST_HEAD(&pending_free);
|
|
+ for(i = 0; i < 2; i++) {
|
|
+ ret = req_increase();
|
|
+ if (ret)
|
|
+ break;
|
|
+ }
|
|
+ if (i == 0)
|
|
+ return ret;
|
|
+
|
|
+ tap_blkif_interface_init();
|
|
+
|
|
+ alloc_pending_reqs = 0;
|
|
+
|
|
+ tap_blkif_xenbus_init();
|
|
+
|
|
+ /* Dynamically allocate a major for this device */
|
|
+ ret = register_chrdev(0, "blktap", &blktap_fops);
|
|
+
|
|
+ if (ret < 0) {
|
|
+ WPRINTK("Couldn't register /dev/xen/blktap\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ blktap_major = ret;
|
|
+
|
|
+ /* tapfds[0] is always NULL */
|
|
+ blktap_next_minor++;
|
|
+
|
|
+ DPRINTK("Created misc_dev %d:0 [/dev/xen/blktap0]\n", ret);
|
|
+
|
|
+ /* Make sure the xen class exists */
|
|
+ if ((class = get_xen_class()) != NULL) {
|
|
+ /*
|
|
+ * This will allow udev to create the blktap ctrl device.
|
|
+ * We only want to create blktap0 first. We don't want
|
|
+ * to flood the sysfs system with needless blktap devices.
|
|
+ * We only create the device when a request of a new device is
|
|
+ * made.
|
|
+ */
|
|
+ class_device_create(class, NULL,
|
|
+ MKDEV(blktap_major, 0), NULL,
|
|
+ "blktap0");
|
|
+ } else {
|
|
+ /* this is bad, but not fatal */
|
|
+ WPRINTK("blktap: sysfs xen_class not created\n");
|
|
+ }
|
|
+
|
|
+ DPRINTK("Blktap device successfully created\n");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+module_init(blkif_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap/common.h 2008-09-15 13:40:15.000000000 +0200
|
|
@@ -0,0 +1,122 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __BLKIF__BACKEND__COMMON_H__
|
|
+#define __BLKIF__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/blkif.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/driver_util.h>
|
|
+
|
|
+#define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args)
|
|
+
|
|
+struct backend_info;
|
|
+
|
|
+typedef struct blkif_st {
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+ /* Physical parameters of the comms window. */
|
|
+ unsigned int irq;
|
|
+ /* Comms information. */
|
|
+ enum blkif_protocol blk_protocol;
|
|
+ blkif_back_rings_t blk_rings;
|
|
+ struct vm_struct *blk_ring_area;
|
|
+ /* Back pointer to the backend_info. */
|
|
+ struct backend_info *be;
|
|
+ /* Private fields. */
|
|
+ spinlock_t blk_ring_lock;
|
|
+ atomic_t refcnt;
|
|
+
|
|
+ wait_queue_head_t wq;
|
|
+ struct task_struct *xenblkd;
|
|
+ unsigned int waiting_reqs;
|
|
+ request_queue_t *plug;
|
|
+
|
|
+ /* statistics */
|
|
+ unsigned long st_print;
|
|
+ int st_rd_req;
|
|
+ int st_wr_req;
|
|
+ int st_oo_req;
|
|
+ int st_rd_sect;
|
|
+ int st_wr_sect;
|
|
+
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+
|
|
+ grant_handle_t shmem_handle;
|
|
+ grant_ref_t shmem_ref;
|
|
+
|
|
+ int dev_num;
|
|
+ uint64_t sectors;
|
|
+} blkif_t;
|
|
+
|
|
+blkif_t *tap_alloc_blkif(domid_t domid);
|
|
+void tap_blkif_free(blkif_t *blkif);
|
|
+void tap_blkif_kmem_cache_free(blkif_t *blkif);
|
|
+int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
|
|
+ unsigned int evtchn);
|
|
+void tap_blkif_unmap(blkif_t *blkif);
|
|
+
|
|
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define blkif_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ wake_up(&(_b)->waiting_to_free);\
|
|
+ } while (0)
|
|
+
|
|
+
|
|
+struct phys_req {
|
|
+ unsigned short dev;
|
|
+ unsigned short nr_sects;
|
|
+ struct block_device *bdev;
|
|
+ blkif_sector_t sector_number;
|
|
+};
|
|
+
|
|
+void tap_blkif_interface_init(void);
|
|
+
|
|
+void tap_blkif_xenbus_init(void);
|
|
+
|
|
+irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+int tap_blkif_schedule(void *arg);
|
|
+
|
|
+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
|
|
+void signal_tapdisk(int idx);
|
|
+
|
|
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap/interface.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,183 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/blktap/interface.c
|
|
+ *
|
|
+ * Block-device interface management.
|
|
+ *
|
|
+ * Copyright (c) 2004, Keir Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <xen/evtchn.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+static kmem_cache_t *blkif_cachep;
|
|
+
|
|
+blkif_t *tap_alloc_blkif(domid_t domid)
|
|
+{
|
|
+ blkif_t *blkif;
|
|
+
|
|
+ blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
|
|
+ if (!blkif)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ memset(blkif, 0, sizeof(*blkif));
|
|
+ blkif->domid = domid;
|
|
+ spin_lock_init(&blkif->blk_ring_lock);
|
|
+ atomic_set(&blkif->refcnt, 1);
|
|
+ init_waitqueue_head(&blkif->wq);
|
|
+ blkif->st_print = jiffies;
|
|
+ init_waitqueue_head(&blkif->waiting_to_free);
|
|
+
|
|
+ return blkif;
|
|
+}
|
|
+
|
|
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, shared_page, blkif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status == GNTST_okay) {
|
|
+ blkif->shmem_ref = shared_page;
|
|
+ blkif->shmem_handle = op.handle;
|
|
+ ret = 0;
|
|
+ } else {
|
|
+ DPRINTK("Grant table operation failure %d!\n", (int)op.status);
|
|
+ ret = -EINVAL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_page(blkif_t *blkif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
|
|
+ GNTMAP_host_map, blkif->shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int tap_blkif_map(blkif_t *blkif, unsigned long shared_page,
|
|
+ unsigned int evtchn)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ /* Already connected through? */
|
|
+ if (blkif->irq)
|
|
+ return 0;
|
|
+
|
|
+ if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = map_frontend_page(blkif, shared_page);
|
|
+ if (err) {
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ switch (blkif->blk_protocol) {
|
|
+ case BLKIF_PROTOCOL_NATIVE:
|
|
+ {
|
|
+ blkif_sring_t *sring;
|
|
+ sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_32:
|
|
+ {
|
|
+ blkif_x86_32_sring_t *sring_x86_32;
|
|
+ sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ case BLKIF_PROTOCOL_X86_64:
|
|
+ {
|
|
+ blkif_x86_64_sring_t *sring_x86_64;
|
|
+ sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr;
|
|
+ BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ blkif->domid, evtchn, tap_blkif_be_int,
|
|
+ 0, "blkif-backend", blkif);
|
|
+ if (err < 0) {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ return err;
|
|
+ }
|
|
+ blkif->irq = err;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void tap_blkif_unmap(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->irq) {
|
|
+ unbind_from_irqhandler(blkif->irq, blkif);
|
|
+ blkif->irq = 0;
|
|
+ }
|
|
+ if (blkif->blk_rings.common.sring) {
|
|
+ unmap_frontend_page(blkif);
|
|
+ free_vm_area(blkif->blk_ring_area);
|
|
+ blkif->blk_rings.common.sring = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void tap_blkif_free(blkif_t *blkif)
|
|
+{
|
|
+ atomic_dec(&blkif->refcnt);
|
|
+ wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
|
|
+ atomic_inc(&blkif->refcnt);
|
|
+
|
|
+ tap_blkif_unmap(blkif);
|
|
+}
|
|
+
|
|
+void tap_blkif_kmem_cache_free(blkif_t *blkif)
|
|
+{
|
|
+ if (!atomic_dec_and_test(&blkif->refcnt))
|
|
+ BUG();
|
|
+ kmem_cache_free(blkif_cachep, blkif);
|
|
+}
|
|
+
|
|
+void __init tap_blkif_interface_init(void)
|
|
+{
|
|
+ blkif_cachep = kmem_cache_create("blktapif_cache", sizeof(blkif_t),
|
|
+ 0, 0, NULL, NULL);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap/xenbus.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,508 @@
|
|
+/* drivers/xen/blktap/xenbus.c
|
|
+ *
|
|
+ * Xenbus code for blktap
|
|
+ *
|
|
+ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
|
|
+ *
|
|
+ * Based on the blkback xenbus code:
|
|
+ *
|
|
+ * Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+ * Copyright (C) 2005 XenSource Ltd
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include "common.h"
|
|
+
|
|
+
|
|
+struct backend_info
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+ blkif_t *blkif;
|
|
+ struct xenbus_watch backend_watch;
|
|
+ int xenbus_id;
|
|
+ int group_added;
|
|
+};
|
|
+
|
|
+static DEFINE_RWLOCK(sysfs_read_lock);
|
|
+
|
|
+static void connect(struct backend_info *);
|
|
+static int connect_ring(struct backend_info *);
|
|
+static int blktap_remove(struct xenbus_device *dev);
|
|
+static int blktap_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id);
|
|
+static void tap_backend_changed(struct xenbus_watch *, const char **,
|
|
+ unsigned int);
|
|
+static void tap_frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state);
|
|
+
|
|
+static int strsep_len(const char *str, char c, unsigned int len)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for (i = 0; str[i]; i++)
|
|
+ if (str[i] == c) {
|
|
+ if (len == 0)
|
|
+ return i;
|
|
+ len--;
|
|
+ }
|
|
+ return (len == 0) ? i : -ERANGE;
|
|
+}
|
|
+
|
|
+static long get_id(const char *str)
|
|
+{
|
|
+ int len,end;
|
|
+ const char *ptr;
|
|
+ char *tptr, num[10];
|
|
+
|
|
+ len = strsep_len(str, '/', 2);
|
|
+ end = strlen(str);
|
|
+ if ( (len < 0) || (end < 0) ) return -1;
|
|
+
|
|
+ ptr = str + len + 1;
|
|
+ strncpy(num,ptr,end - len);
|
|
+ tptr = num + (end - (len + 1));
|
|
+ *tptr = '\0';
|
|
+ DPRINTK("Get_id called for %s (%s)\n",str,num);
|
|
+
|
|
+ return simple_strtol(num, NULL, 10);
|
|
+}
|
|
+
|
|
+static int blktap_name(blkif_t *blkif, char *buf)
|
|
+{
|
|
+ char *devpath, *devname;
|
|
+ struct xenbus_device *dev = blkif->be->dev;
|
|
+
|
|
+ devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
|
|
+ if (IS_ERR(devpath))
|
|
+ return PTR_ERR(devpath);
|
|
+
|
|
+ if ((devname = strstr(devpath, "/dev/")) != NULL)
|
|
+ devname += strlen("/dev/");
|
|
+ else
|
|
+ devname = devpath;
|
|
+
|
|
+ snprintf(buf, TASK_COMM_LEN, "blktap.%d.%s", blkif->domid, devname);
|
|
+ kfree(devpath);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/****************************************************************
|
|
+ * sysfs interface for I/O requests of blktap device
|
|
+ */
|
|
+
|
|
+#define VBD_SHOW(name, format, args...) \
|
|
+ static ssize_t show_##name(struct device *_dev, \
|
|
+ struct device_attribute *attr, \
|
|
+ char *buf) \
|
|
+ { \
|
|
+ ssize_t ret = -ENODEV; \
|
|
+ struct xenbus_device *dev; \
|
|
+ struct backend_info *be; \
|
|
+ \
|
|
+ if (!get_device(_dev)) \
|
|
+ return ret; \
|
|
+ dev = to_xenbus_device(_dev); \
|
|
+ read_lock(&sysfs_read_lock); \
|
|
+ if ((be = dev->dev.driver_data) != NULL) \
|
|
+ ret = sprintf(buf, format, ##args); \
|
|
+ read_unlock(&sysfs_read_lock); \
|
|
+ put_device(_dev); \
|
|
+ return ret; \
|
|
+ } \
|
|
+ static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
|
|
+
|
|
+VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req);
|
|
+VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req);
|
|
+VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req);
|
|
+VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect);
|
|
+VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect);
|
|
+
|
|
+static struct attribute *tapstat_attrs[] = {
|
|
+ &dev_attr_oo_req.attr,
|
|
+ &dev_attr_rd_req.attr,
|
|
+ &dev_attr_wr_req.attr,
|
|
+ &dev_attr_rd_sect.attr,
|
|
+ &dev_attr_wr_sect.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group tapstat_group = {
|
|
+ .name = "statistics",
|
|
+ .attrs = tapstat_attrs,
|
|
+};
|
|
+
|
|
+int xentap_sysfs_addif(struct xenbus_device *dev)
|
|
+{
|
|
+ int err;
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
|
|
+ if (!err)
|
|
+ be->group_added = 1;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void xentap_sysfs_delif(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
|
|
+ be->group_added = 0;
|
|
+}
|
|
+
|
|
+static int blktap_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ write_lock(&sysfs_read_lock);
|
|
+ if (be->group_added)
|
|
+ xentap_sysfs_delif(be->dev);
|
|
+ if (be->backend_watch.node) {
|
|
+ unregister_xenbus_watch(&be->backend_watch);
|
|
+ kfree(be->backend_watch.node);
|
|
+ be->backend_watch.node = NULL;
|
|
+ }
|
|
+ if (be->blkif) {
|
|
+ if (be->blkif->xenblkd)
|
|
+ kthread_stop(be->blkif->xenblkd);
|
|
+ signal_tapdisk(be->blkif->dev_num);
|
|
+ tap_blkif_free(be->blkif);
|
|
+ tap_blkif_kmem_cache_free(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ }
|
|
+ kfree(be);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ write_unlock(&sysfs_read_lock);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void tap_update_blkif_status(blkif_t *blkif)
|
|
+{
|
|
+ int err;
|
|
+ char name[TASK_COMM_LEN];
|
|
+
|
|
+ /* Not ready to connect? */
|
|
+ if(!blkif->irq || !blkif->sectors) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Already connected? */
|
|
+ if (blkif->be->dev->state == XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ /* Attempt to connect: exit if we fail to. */
|
|
+ connect(blkif->be);
|
|
+ if (blkif->be->dev->state != XenbusStateConnected)
|
|
+ return;
|
|
+
|
|
+ err = blktap_name(blkif, name);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(blkif->be->dev, err, "get blktap dev name");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (!blkif->be->group_added) {
|
|
+ err = xentap_sysfs_addif(blkif->be->dev);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(blkif->be->dev, err,
|
|
+ "creating sysfs entries");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ blkif->xenblkd = kthread_run(tap_blkif_schedule, blkif, name);
|
|
+ if (IS_ERR(blkif->xenblkd)) {
|
|
+ err = PTR_ERR(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ xenbus_dev_fatal(blkif->be->dev, err, "start xenblkd");
|
|
+ WPRINTK("Error starting thread %s\n", name);
|
|
+ } else
|
|
+ DPRINTK("Thread started for domid %d, connected disk %d\n",
|
|
+ blkif->domid, blkif->dev_num);
|
|
+
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate
|
|
+ * the basic structures, and watch the store waiting for the
|
|
+ * user-space program to tell us the physical device info. Switch to
|
|
+ * InitWait.
|
|
+ */
|
|
+static int blktap_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ be->dev = dev;
|
|
+ dev->dev.driver_data = be;
|
|
+ be->xenbus_id = get_id(dev->nodename);
|
|
+
|
|
+ be->blkif = tap_alloc_blkif(dev->otherend_id);
|
|
+ if (IS_ERR(be->blkif)) {
|
|
+ err = PTR_ERR(be->blkif);
|
|
+ be->blkif = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating block interface");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ /* setup back pointer */
|
|
+ be->blkif->be = be;
|
|
+ be->blkif->sectors = 0;
|
|
+
|
|
+ /* set a watch on disk info, waiting for userspace to update details*/
|
|
+ err = xenbus_watch_path2(dev, dev->nodename, "info",
|
|
+ &be->backend_watch, tap_backend_changed);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ DPRINTK("blktap probe failed\n");
|
|
+ blktap_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the user space code has placed the device
|
|
+ * information in xenstore.
|
|
+ */
|
|
+static void tap_backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int err;
|
|
+ unsigned long info;
|
|
+ struct backend_info *be
|
|
+ = container_of(watch, struct backend_info, backend_watch);
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ /**
|
|
+ * Check to see whether userspace code has opened the image
|
|
+ * and written sector
|
|
+ * and disk info to xenstore
|
|
+ */
|
|
+ err = xenbus_gather(XBT_NIL, dev->nodename, "info", "%lu", &info,
|
|
+ NULL);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ return;
|
|
+ if (err) {
|
|
+ xenbus_dev_error(dev, err, "getting info");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ DPRINTK("Userspace update on disk info, %lu\n",info);
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->nodename, "sectors", "%llu",
|
|
+ &be->blkif->sectors, NULL);
|
|
+
|
|
+ /* Associate tap dev with domid*/
|
|
+ be->blkif->dev_num = dom_to_devid(be->blkif->domid, be->xenbus_id,
|
|
+ be->blkif);
|
|
+
|
|
+ tap_update_blkif_status(be->blkif);
|
|
+}
|
|
+
|
|
+
|
|
+static void blkif_disconnect(blkif_t *blkif)
|
|
+{
|
|
+ if (blkif->xenblkd) {
|
|
+ kthread_stop(blkif->xenblkd);
|
|
+ blkif->xenblkd = NULL;
|
|
+ }
|
|
+
|
|
+ /* idempotent */
|
|
+ tap_blkif_free(blkif);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Callback received when the frontend's state changes.
|
|
+ */
|
|
+static void tap_frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("fe_changed(%s,%d)\n", dev->nodename, frontend_state);
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
|
|
+ __FUNCTION__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateConnected:
|
|
+ /* Ensure we connect even when two watches fire in
|
|
+ close successsion and we miss the intermediate value
|
|
+ of frontend_state. */
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ /* Enforce precondition before potential leak point.
|
|
+ * blkif_disconnect() is idempotent.
|
|
+ */
|
|
+ blkif_disconnect(be->blkif);
|
|
+
|
|
+ err = connect_ring(be);
|
|
+ if (err)
|
|
+ break;
|
|
+ tap_update_blkif_status(be->blkif);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ blkif_disconnect(be->blkif);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ /* Implies the effects of blkif_disconnect() via
|
|
+ * blktap_remove().
|
|
+ */
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Switch to Connected state.
|
|
+ */
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "switching to Connected state",
|
|
+ dev->nodename);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_ring(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ char protocol[64];
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s\n", dev->otherend);
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
|
|
+ &ring_ref, "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
|
|
+ "%63s", protocol, NULL);
|
|
+ if (err)
|
|
+ strcpy(protocol, "unspecified, assuming native");
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
|
|
+ else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
|
|
+ be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
|
|
+ else {
|
|
+ xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
|
|
+ return -1;
|
|
+ }
|
|
+ printk(KERN_INFO
|
|
+ "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n",
|
|
+ ring_ref, evtchn, be->blkif->blk_protocol, protocol);
|
|
+
|
|
+ /* Map the shared frame, irq etc. */
|
|
+ err = tap_blkif_map(be->blkif, ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
|
|
+ ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id blktap_ids[] = {
|
|
+ { "tap" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver blktap = {
|
|
+ .name = "tap",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = blktap_ids,
|
|
+ .probe = blktap_probe,
|
|
+ .remove = blktap_remove,
|
|
+ .otherend_changed = tap_frontend_changed
|
|
+};
|
|
+
|
|
+
|
|
+void tap_blkif_xenbus_init(void)
|
|
+{
|
|
+ xenbus_register_backend(&blktap);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/Makefile 2009-05-29 10:25:53.000000000 +0200
|
|
@@ -0,0 +1,3 @@
|
|
+obj-$(CONFIG_XEN_BLKDEV_TAP2) := blktap.o
|
|
+
|
|
+blktap-objs := control.o ring.o wait_queue.o device.o request.o sysfs.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/blktap.h 2010-02-24 13:13:46.000000000 +0100
|
|
@@ -0,0 +1,254 @@
|
|
+#ifndef _BLKTAP_H_
|
|
+#define _BLKTAP_H_
|
|
+
|
|
+#include <linux/fs.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/cdev.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/scatterlist.h>
|
|
+#include <xen/blkif.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+//#define ENABLE_PASSTHROUGH
|
|
+
|
|
+extern int blktap_debug_level;
|
|
+
|
|
+#define BTPRINTK(level, tag, force, _f, _a...) \
|
|
+ do { \
|
|
+ if (blktap_debug_level > level && \
|
|
+ (force || printk_ratelimit())) \
|
|
+ printk(tag "%s: " _f, __func__, ##_a); \
|
|
+ } while (0)
|
|
+
|
|
+#define BTDBG(_f, _a...) BTPRINTK(8, KERN_DEBUG, 1, _f, ##_a)
|
|
+#define BTINFO(_f, _a...) BTPRINTK(0, KERN_INFO, 0, _f, ##_a)
|
|
+#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
|
|
+#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
|
|
+
|
|
+#define MAX_BLKTAP_DEVICE 256
|
|
+
|
|
+#define BLKTAP_CONTROL 1
|
|
+#define BLKTAP_RING_FD 2
|
|
+#define BLKTAP_RING_VMA 3
|
|
+#define BLKTAP_DEVICE 4
|
|
+#define BLKTAP_SYSFS 5
|
|
+#define BLKTAP_PAUSE_REQUESTED 6
|
|
+#define BLKTAP_PAUSED 7
|
|
+#define BLKTAP_SHUTDOWN_REQUESTED 8
|
|
+#define BLKTAP_PASSTHROUGH 9
|
|
+#define BLKTAP_DEFERRED 10
|
|
+
|
|
+/* blktap IOCTLs: */
|
|
+#define BLKTAP2_IOCTL_KICK_FE 1
|
|
+#define BLKTAP2_IOCTL_ALLOC_TAP 200
|
|
+#define BLKTAP2_IOCTL_FREE_TAP 201
|
|
+#define BLKTAP2_IOCTL_CREATE_DEVICE 202
|
|
+#define BLKTAP2_IOCTL_SET_PARAMS 203
|
|
+#define BLKTAP2_IOCTL_PAUSE 204
|
|
+#define BLKTAP2_IOCTL_REOPEN 205
|
|
+#define BLKTAP2_IOCTL_RESUME 206
|
|
+
|
|
+#define BLKTAP2_MAX_MESSAGE_LEN 256
|
|
+
|
|
+#define BLKTAP2_RING_MESSAGE_PAUSE 1
|
|
+#define BLKTAP2_RING_MESSAGE_RESUME 2
|
|
+#define BLKTAP2_RING_MESSAGE_CLOSE 3
|
|
+
|
|
+#define BLKTAP_REQUEST_FREE 0
|
|
+#define BLKTAP_REQUEST_PENDING 1
|
|
+
|
|
+/*
|
|
+ * The maximum number of requests that can be outstanding at any time
|
|
+ * is determined by
|
|
+ *
|
|
+ * [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST]
|
|
+ *
|
|
+ * where mmap_alloc < MAX_DYNAMIC_MEM.
|
|
+ *
|
|
+ * TODO:
|
|
+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
|
|
+ * sysfs.
|
|
+ */
|
|
+#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
|
|
+#define MAX_DYNAMIC_MEM BLK_RING_SIZE
|
|
+#define MAX_PENDING_REQS BLK_RING_SIZE
|
|
+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
|
|
+#define MMAP_VADDR(_start, _req, _seg) \
|
|
+ (_start + \
|
|
+ ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) + \
|
|
+ ((_seg) * PAGE_SIZE))
|
|
+
|
|
+#define blktap_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define blktap_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ wake_up(&(_b)->wq); \
|
|
+ } while (0)
|
|
+
|
|
+struct blktap;
|
|
+
|
|
+struct grant_handle_pair {
|
|
+ grant_handle_t kernel;
|
|
+ grant_handle_t user;
|
|
+};
|
|
+#define INVALID_GRANT_HANDLE 0xFFFF
|
|
+
|
|
+struct blktap_handle {
|
|
+ unsigned int ring;
|
|
+ unsigned int device;
|
|
+ unsigned int minor;
|
|
+};
|
|
+
|
|
+struct blktap_params {
|
|
+ char name[BLKTAP2_MAX_MESSAGE_LEN];
|
|
+ unsigned long long capacity;
|
|
+ unsigned long sector_size;
|
|
+};
|
|
+
|
|
+struct blktap_device {
|
|
+ int users;
|
|
+ spinlock_t lock;
|
|
+ struct gendisk *gd;
|
|
+
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+ struct block_device *bdev;
|
|
+#endif
|
|
+};
|
|
+
|
|
+struct blktap_ring {
|
|
+ struct vm_area_struct *vma;
|
|
+ blkif_front_ring_t ring;
|
|
+ struct vm_foreign_map foreign_map;
|
|
+ unsigned long ring_vstart;
|
|
+ unsigned long user_vstart;
|
|
+
|
|
+ int response;
|
|
+
|
|
+ wait_queue_head_t poll_wait;
|
|
+
|
|
+ dev_t devno;
|
|
+ struct class_device *dev;
|
|
+ atomic_t sysfs_refcnt;
|
|
+ struct mutex sysfs_mutex;
|
|
+};
|
|
+
|
|
+struct blktap_statistics {
|
|
+ unsigned long st_print;
|
|
+ int st_rd_req;
|
|
+ int st_wr_req;
|
|
+ int st_oo_req;
|
|
+ int st_rd_sect;
|
|
+ int st_wr_sect;
|
|
+ s64 st_rd_cnt;
|
|
+ s64 st_rd_sum_usecs;
|
|
+ s64 st_rd_max_usecs;
|
|
+ s64 st_wr_cnt;
|
|
+ s64 st_wr_sum_usecs;
|
|
+ s64 st_wr_max_usecs;
|
|
+};
|
|
+
|
|
+struct blktap_request {
|
|
+ uint64_t id;
|
|
+ uint16_t usr_idx;
|
|
+
|
|
+ uint8_t status;
|
|
+ atomic_t pendcnt;
|
|
+ uint8_t nr_pages;
|
|
+ unsigned short operation;
|
|
+
|
|
+ struct timeval time;
|
|
+ struct grant_handle_pair handles[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ struct list_head free_list;
|
|
+};
|
|
+
|
|
+struct blktap {
|
|
+ int minor;
|
|
+ pid_t pid;
|
|
+ atomic_t refcnt;
|
|
+ unsigned long dev_inuse;
|
|
+
|
|
+ struct blktap_params params;
|
|
+
|
|
+ struct rw_semaphore tap_sem;
|
|
+
|
|
+ struct blktap_ring ring;
|
|
+ struct blktap_device device;
|
|
+
|
|
+ int pending_cnt;
|
|
+ struct blktap_request *pending_requests[MAX_PENDING_REQS];
|
|
+ struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+
|
|
+ wait_queue_head_t wq;
|
|
+ struct list_head deferred_queue;
|
|
+
|
|
+ struct blktap_statistics stats;
|
|
+};
|
|
+
|
|
+extern struct blktap *blktaps[MAX_BLKTAP_DEVICE];
|
|
+
|
|
+static inline int
|
|
+blktap_active(struct blktap *tap)
|
|
+{
|
|
+ return test_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
|
|
+}
|
|
+
|
|
+static inline int
|
|
+blktap_validate_params(struct blktap *tap, struct blktap_params *params)
|
|
+{
|
|
+ /* TODO: sanity check */
|
|
+ params->name[sizeof(params->name) - 1] = '\0';
|
|
+ BTINFO("%s: capacity: %llu, sector-size: %lu\n",
|
|
+ params->name, params->capacity, params->sector_size);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int blktap_control_destroy_device(struct blktap *);
|
|
+
|
|
+int blktap_ring_init(int *);
|
|
+int blktap_ring_free(void);
|
|
+int blktap_ring_create(struct blktap *);
|
|
+int blktap_ring_destroy(struct blktap *);
|
|
+int blktap_ring_pause(struct blktap *);
|
|
+int blktap_ring_resume(struct blktap *);
|
|
+void blktap_ring_kick_user(struct blktap *);
|
|
+
|
|
+int blktap_sysfs_init(void);
|
|
+void blktap_sysfs_free(void);
|
|
+int blktap_sysfs_create(struct blktap *);
|
|
+int blktap_sysfs_destroy(struct blktap *);
|
|
+
|
|
+int blktap_device_init(int *);
|
|
+void blktap_device_free(void);
|
|
+int blktap_device_create(struct blktap *);
|
|
+int blktap_device_destroy(struct blktap *);
|
|
+int blktap_device_pause(struct blktap *);
|
|
+int blktap_device_resume(struct blktap *);
|
|
+void blktap_device_restart(struct blktap *);
|
|
+void blktap_device_finish_request(struct blktap *,
|
|
+ blkif_response_t *,
|
|
+ struct blktap_request *);
|
|
+void blktap_device_fail_pending_requests(struct blktap *);
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+int blktap_device_enable_passthrough(struct blktap *,
|
|
+ unsigned, unsigned);
|
|
+#endif
|
|
+
|
|
+void blktap_defer(struct blktap *);
|
|
+void blktap_run_deferred(void);
|
|
+
|
|
+int blktap_request_pool_init(void);
|
|
+void blktap_request_pool_free(void);
|
|
+int blktap_request_pool_grow(void);
|
|
+int blktap_request_pool_shrink(void);
|
|
+struct blktap_request *blktap_request_allocate(struct blktap *);
|
|
+void blktap_request_free(struct blktap *, struct blktap_request *);
|
|
+struct page *request_to_page(struct blktap_request *, int);
|
|
+
|
|
+static inline unsigned long
|
|
+request_to_kaddr(struct blktap_request *req, int seg)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(request_to_page(req, seg));
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/control.c 2010-04-29 09:34:47.000000000 +0200
|
|
@@ -0,0 +1,277 @@
|
|
+#include <linux/module.h>
|
|
+#include <linux/miscdevice.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+static DEFINE_SPINLOCK(blktap_control_lock);
|
|
+struct blktap *blktaps[MAX_BLKTAP_DEVICE];
|
|
+
|
|
+static int ring_major;
|
|
+static int device_major;
|
|
+static int blktap_control_registered;
|
|
+
|
|
+static void
|
|
+blktap_control_initialize_tap(struct blktap *tap)
|
|
+{
|
|
+ int minor = tap->minor;
|
|
+
|
|
+ memset(tap, 0, sizeof(*tap));
|
|
+ set_bit(BLKTAP_CONTROL, &tap->dev_inuse);
|
|
+ init_rwsem(&tap->tap_sem);
|
|
+ init_waitqueue_head(&tap->wq);
|
|
+ atomic_set(&tap->refcnt, 0);
|
|
+
|
|
+ tap->minor = minor;
|
|
+}
|
|
+
|
|
+static struct blktap *
|
|
+blktap_control_create_tap(void)
|
|
+{
|
|
+ int minor;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ tap = kmalloc(sizeof(*tap), GFP_KERNEL);
|
|
+ if (unlikely(!tap))
|
|
+ return NULL;
|
|
+
|
|
+ blktap_control_initialize_tap(tap);
|
|
+
|
|
+ spin_lock_irq(&blktap_control_lock);
|
|
+ for (minor = 0; minor < MAX_BLKTAP_DEVICE; minor++)
|
|
+ if (!blktaps[minor])
|
|
+ break;
|
|
+
|
|
+ if (minor == MAX_BLKTAP_DEVICE) {
|
|
+ kfree(tap);
|
|
+ tap = NULL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ tap->minor = minor;
|
|
+ blktaps[minor] = tap;
|
|
+
|
|
+out:
|
|
+ spin_unlock_irq(&blktap_control_lock);
|
|
+ return tap;
|
|
+}
|
|
+
|
|
+static struct blktap *
|
|
+blktap_control_allocate_tap(void)
|
|
+{
|
|
+ int err, minor;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ /*
|
|
+ * This is called only from the ioctl, which
|
|
+ * means we should always have interrupts enabled.
|
|
+ */
|
|
+ BUG_ON(irqs_disabled());
|
|
+
|
|
+ spin_lock_irq(&blktap_control_lock);
|
|
+
|
|
+ for (minor = 0; minor < MAX_BLKTAP_DEVICE; minor++) {
|
|
+ tap = blktaps[minor];
|
|
+ if (!tap)
|
|
+ goto found;
|
|
+
|
|
+ if (!tap->dev_inuse) {
|
|
+ blktap_control_initialize_tap(tap);
|
|
+ goto found;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ tap = NULL;
|
|
+
|
|
+found:
|
|
+ spin_unlock_irq(&blktap_control_lock);
|
|
+
|
|
+ if (!tap) {
|
|
+ tap = blktap_control_create_tap();
|
|
+ if (!tap)
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ err = blktap_ring_create(tap);
|
|
+ if (err) {
|
|
+ BTERR("ring creation failed: %d\n", err);
|
|
+ clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ BTINFO("allocated tap %p\n", tap);
|
|
+ return tap;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_control_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ unsigned long dev;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ switch (cmd) {
|
|
+ case BLKTAP2_IOCTL_ALLOC_TAP: {
|
|
+ struct blktap_handle h;
|
|
+
|
|
+ tap = blktap_control_allocate_tap();
|
|
+ if (!tap) {
|
|
+ BTERR("error allocating device\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ h.ring = ring_major;
|
|
+ h.device = device_major;
|
|
+ h.minor = tap->minor;
|
|
+
|
|
+ if (copy_to_user((struct blktap_handle __user *)arg,
|
|
+ &h, sizeof(h))) {
|
|
+ blktap_control_destroy_device(tap);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ case BLKTAP2_IOCTL_FREE_TAP:
|
|
+ dev = arg;
|
|
+
|
|
+ if (dev >= MAX_BLKTAP_DEVICE || !blktaps[dev])
|
|
+ return -EINVAL;
|
|
+
|
|
+ blktap_control_destroy_device(blktaps[dev]);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -ENOIOCTLCMD;
|
|
+}
|
|
+
|
|
+static struct file_operations blktap_control_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .ioctl = blktap_control_ioctl,
|
|
+};
|
|
+
|
|
+static struct miscdevice blktap_misc = {
|
|
+ .minor = MISC_DYNAMIC_MINOR,
|
|
+ .name = "blktap-control",
|
|
+ .fops = &blktap_control_file_operations,
|
|
+};
|
|
+
|
|
+int
|
|
+blktap_control_destroy_device(struct blktap *tap)
|
|
+{
|
|
+ int err;
|
|
+ unsigned long inuse;
|
|
+
|
|
+ if (!tap)
|
|
+ return 0;
|
|
+
|
|
+ set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
|
|
+
|
|
+ for (;;) {
|
|
+ inuse = tap->dev_inuse;
|
|
+ err = blktap_device_destroy(tap);
|
|
+ if (err)
|
|
+ goto wait;
|
|
+
|
|
+ inuse = tap->dev_inuse;
|
|
+ err = blktap_ring_destroy(tap);
|
|
+ if (err)
|
|
+ goto wait;
|
|
+
|
|
+ inuse = tap->dev_inuse;
|
|
+ err = blktap_sysfs_destroy(tap);
|
|
+ if (err)
|
|
+ goto wait;
|
|
+
|
|
+ break;
|
|
+
|
|
+ wait:
|
|
+ BTDBG("inuse: 0x%lx, dev_inuse: 0x%lx\n",
|
|
+ inuse, tap->dev_inuse);
|
|
+ if (wait_event_interruptible(tap->wq, tap->dev_inuse != inuse))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ clear_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse);
|
|
+
|
|
+ if (tap->dev_inuse == (1UL << BLKTAP_CONTROL)) {
|
|
+ err = 0;
|
|
+ clear_bit(BLKTAP_CONTROL, &tap->dev_inuse);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __init
|
|
+blktap_control_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = misc_register(&blktap_misc);
|
|
+ if (err) {
|
|
+ BTERR("misc_register failed for control device");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ blktap_control_registered = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_control_free(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < MAX_BLKTAP_DEVICE; i++)
|
|
+ blktap_control_destroy_device(blktaps[i]);
|
|
+
|
|
+ if (blktap_control_registered)
|
|
+ if (misc_deregister(&blktap_misc) < 0)
|
|
+ BTERR("misc_deregister failed for control device");
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_exit(void)
|
|
+{
|
|
+ blktap_control_free();
|
|
+ blktap_ring_free();
|
|
+ blktap_sysfs_free();
|
|
+ blktap_device_free();
|
|
+ blktap_request_pool_free();
|
|
+}
|
|
+
|
|
+static int __init
|
|
+blktap_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = blktap_request_pool_init();
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ err = blktap_device_init(&device_major);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_ring_init(&ring_major);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_sysfs_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ err = blktap_control_init();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ blktap_exit();
|
|
+ return err;
|
|
+}
|
|
+
|
|
+module_init(blktap_init);
|
|
+module_exit(blktap_exit);
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/device.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,1191 @@
|
|
+#include <linux/fs.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/cdrom.h>
|
|
+#include <linux/hdreg.h>
|
|
+#include <linux/module.h>
|
|
+#include <asm/tlbflush.h>
|
|
+
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_ioctl.h>
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/io/blkif.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+#include "../blkback/blkback-pagemap.h"
|
|
+
|
|
+#if 0
|
|
+#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
|
|
+#else
|
|
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
|
|
+#endif
|
|
+
|
|
+struct blktap_grant_table {
|
|
+ int cnt;
|
|
+ struct gnttab_map_grant_ref grants[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
|
|
+};
|
|
+
|
|
+static int blktap_device_major;
|
|
+
|
|
+static inline struct blktap *
|
|
+dev_to_blktap(struct blktap_device *dev)
|
|
+{
|
|
+ return container_of(dev, struct blktap, device);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_open(struct inode *inode, struct file *filep)
|
|
+{
|
|
+ struct blktap *tap;
|
|
+ struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
|
|
+
|
|
+ if (!dev)
|
|
+ return -ENOENT;
|
|
+
|
|
+ tap = dev_to_blktap(dev);
|
|
+ if (!blktap_active(tap) ||
|
|
+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ return -ENOENT;
|
|
+
|
|
+ dev->users++;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_release(struct inode *inode, struct file *filep)
|
|
+{
|
|
+ struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
|
|
+ struct blktap *tap = dev_to_blktap(dev);
|
|
+
|
|
+ dev->users--;
|
|
+ if (test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ blktap_device_destroy(tap);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_getgeo(struct block_device *bd, struct hd_geometry *hg)
|
|
+{
|
|
+ /* We don't have real geometry info, but let's at least return
|
|
+ values consistent with the size of the device */
|
|
+ sector_t nsect = get_capacity(bd->bd_disk);
|
|
+ sector_t cylinders = nsect;
|
|
+
|
|
+ hg->heads = 0xff;
|
|
+ hg->sectors = 0x3f;
|
|
+ sector_div(cylinders, hg->heads * hg->sectors);
|
|
+ hg->cylinders = cylinders;
|
|
+ if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
|
|
+ hg->cylinders = 0xffff;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_ioctl(struct inode *inode, struct file *filep,
|
|
+ unsigned command, unsigned long argument)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
|
|
+ command, (long)argument, inode->i_rdev);
|
|
+
|
|
+ switch (command) {
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
|
|
+ case HDIO_GETGEO: {
|
|
+ struct block_device *bd = inode->i_bdev;
|
|
+ struct hd_geometry geo;
|
|
+ int ret;
|
|
+
|
|
+ if (!argument)
|
|
+ return -EINVAL;
|
|
+
|
|
+ geo.start = get_start_sect(bd);
|
|
+ ret = blktap_device_getgeo(bd, &geo);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (copy_to_user((struct hd_geometry __user *)argument, &geo,
|
|
+ sizeof(geo)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
+ case CDROMMULTISESSION:
|
|
+ BTDBG("FIXME: support multisession CDs later\n");
|
|
+ for (i = 0; i < sizeof(struct cdrom_multisession); i++)
|
|
+ if (put_user(0, (char __user *)(argument + i)))
|
|
+ return -EFAULT;
|
|
+ return 0;
|
|
+
|
|
+ case SCSI_IOCTL_GET_IDLUN:
|
|
+ if (!access_ok(VERIFY_WRITE, argument,
|
|
+ sizeof(struct scsi_idlun)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ /* return 0 for now. */
|
|
+ __put_user(0, &((struct scsi_idlun __user *)argument)->dev_id);
|
|
+ __put_user(0,
|
|
+ &((struct scsi_idlun __user *)argument)->host_unique_id);
|
|
+ return 0;
|
|
+
|
|
+ default:
|
|
+ /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
|
|
+ command);*/
|
|
+ return -EINVAL; /* same return as native Linux */
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct block_device_operations blktap_device_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = blktap_device_open,
|
|
+ .release = blktap_device_release,
|
|
+ .ioctl = blktap_device_ioctl,
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
|
|
+ .getgeo = blktap_device_getgeo
|
|
+#endif
|
|
+};
|
|
+
|
|
+static int
|
|
+blktap_map_uaddr_fn(pte_t *ptep, struct page *pmd_page,
|
|
+ unsigned long addr, void *data)
|
|
+{
|
|
+ pte_t *pte = (pte_t *)data;
|
|
+
|
|
+ BTDBG("ptep %p -> %012llx\n", ptep, (unsigned long long)pte_val(*pte));
|
|
+ set_pte(ptep, *pte);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_map_uaddr(struct mm_struct *mm, unsigned long address, pte_t pte)
|
|
+{
|
|
+ return apply_to_page_range(mm, address,
|
|
+ PAGE_SIZE, blktap_map_uaddr_fn, &pte);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_umap_uaddr_fn(pte_t *ptep, struct page *pmd_page,
|
|
+ unsigned long addr, void *data)
|
|
+{
|
|
+ struct mm_struct *mm = (struct mm_struct *)data;
|
|
+
|
|
+ BTDBG("ptep %p\n", ptep);
|
|
+ pte_clear(mm, addr, ptep);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_umap_uaddr(struct mm_struct *mm, unsigned long address)
|
|
+{
|
|
+ return apply_to_page_range(mm, address,
|
|
+ PAGE_SIZE, blktap_umap_uaddr_fn, mm);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+flush_tlb_kernel_page(unsigned long kvaddr)
|
|
+{
|
|
+#ifdef CONFIG_X86
|
|
+ xen_invlpg_all(kvaddr);
|
|
+#else
|
|
+ flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_end_dequeued_request(struct blktap_device *dev,
|
|
+ struct request *req, int uptodate)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ ret = end_that_request_first(req, uptodate, req->hard_nr_sectors);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+ end_that_request_last(req, uptodate);
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * tap->tap_sem held on entry
|
|
+ */
|
|
+static void
|
|
+blktap_device_fast_flush(struct blktap *tap, struct blktap_request *request)
|
|
+{
|
|
+ uint64_t ptep;
|
|
+ int ret, usr_idx;
|
|
+ unsigned int i, cnt;
|
|
+ struct page **map, *page;
|
|
+ struct blktap_ring *ring;
|
|
+ struct grant_handle_pair *khandle;
|
|
+ unsigned long kvaddr, uvaddr, offset;
|
|
+ struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST * 2];
|
|
+ grant_handle_t self_gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ int self_gref_nr = 0;
|
|
+
|
|
+ cnt = 0;
|
|
+ ring = &tap->ring;
|
|
+ usr_idx = request->usr_idx;
|
|
+ map = ring->foreign_map.map;
|
|
+
|
|
+ if (!ring->vma)
|
|
+ return;
|
|
+
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ zap_page_range(ring->vma,
|
|
+ MMAP_VADDR(ring->user_vstart, usr_idx, 0),
|
|
+ request->nr_pages << PAGE_SHIFT, NULL);
|
|
+
|
|
+ for (i = 0; i < request->nr_pages; i++) {
|
|
+ kvaddr = request_to_kaddr(request, i);
|
|
+ uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
|
|
+
|
|
+ khandle = request->handles + i;
|
|
+
|
|
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
|
|
+ gnttab_set_unmap_op(&unmap[cnt], kvaddr,
|
|
+ GNTMAP_host_map, khandle->kernel);
|
|
+ cnt++;
|
|
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
|
|
+ INVALID_P2M_ENTRY);
|
|
+ }
|
|
+
|
|
+ if (khandle->user != INVALID_GRANT_HANDLE) {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+ if (create_lookup_pte_addr(ring->vma->vm_mm,
|
|
+ uvaddr, &ptep) != 0) {
|
|
+ BTERR("Couldn't get a pte addr!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ gnttab_set_unmap_op(&unmap[cnt], ptep,
|
|
+ GNTMAP_host_map
|
|
+ | GNTMAP_application_map
|
|
+ | GNTMAP_contains_pte,
|
|
+ khandle->user);
|
|
+ cnt++;
|
|
+ }
|
|
+
|
|
+ offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
|
|
+
|
|
+ BTDBG("offset: 0x%08lx, page: %p, request: %p, usr_idx: %d, "
|
|
+ "seg: %d, kvaddr: 0x%08lx, khandle: %u, uvaddr: "
|
|
+ "0x%08lx, handle: %u\n", offset, map[offset], request,
|
|
+ usr_idx, i, kvaddr, khandle->kernel, uvaddr,
|
|
+ khandle->user);
|
|
+
|
|
+ page = map[offset];
|
|
+ if (page) {
|
|
+ if (PageBlkback(page)) {
|
|
+ ClearPageBlkback(page);
|
|
+ set_page_private(page, 0);
|
|
+ } else if (
|
|
+ xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ self_gref[self_gref_nr] = khandle->kernel;
|
|
+ self_gref_nr++;
|
|
+ }
|
|
+ }
|
|
+ map[offset] = NULL;
|
|
+
|
|
+ khandle->kernel = INVALID_GRANT_HANDLE;
|
|
+ khandle->user = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+
|
|
+ if (cnt) {
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ unmap, cnt);
|
|
+ BUG_ON(ret);
|
|
+ }
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ zap_page_range(ring->vma,
|
|
+ MMAP_VADDR(ring->user_vstart, usr_idx, 0),
|
|
+ request->nr_pages << PAGE_SHIFT, NULL);
|
|
+ else {
|
|
+ for (i = 0; i < self_gref_nr; i++) {
|
|
+ gnttab_end_foreign_access_ref(self_gref[i]);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * tap->tap_sem held on entry
|
|
+ */
|
|
+static void
|
|
+blktap_unmap(struct blktap *tap, struct blktap_request *request)
|
|
+{
|
|
+ int i, usr_idx;
|
|
+ unsigned long kvaddr;
|
|
+
|
|
+ usr_idx = request->usr_idx;
|
|
+ down_write(&tap->ring.vma->vm_mm->mmap_sem);
|
|
+
|
|
+ for (i = 0; i < request->nr_pages; i++) {
|
|
+ kvaddr = request_to_kaddr(request, i);
|
|
+ BTDBG("request: %p, seg: %d, kvaddr: 0x%08lx, khandle: %u, "
|
|
+ "uvaddr: 0x%08lx, uhandle: %u\n", request, i,
|
|
+ kvaddr, request->handles[i].kernel,
|
|
+ MMAP_VADDR(tap->ring.user_vstart, usr_idx, i),
|
|
+ request->handles[i].user);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
|
|
+ request->handles[i].kernel == INVALID_GRANT_HANDLE) {
|
|
+ blktap_umap_uaddr(&init_mm, kvaddr);
|
|
+ flush_tlb_kernel_page(kvaddr);
|
|
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
|
|
+ INVALID_P2M_ENTRY);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ blktap_device_fast_flush(tap, request);
|
|
+ up_write(&tap->ring.vma->vm_mm->mmap_sem);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called if the tapdisk process dies unexpectedly.
|
|
+ * fail and release any pending requests and disable queue.
|
|
+ */
|
|
+void
|
|
+blktap_device_fail_pending_requests(struct blktap *tap)
|
|
+{
|
|
+ int usr_idx;
|
|
+ struct request *req;
|
|
+ struct blktap_device *dev;
|
|
+ struct blktap_request *request;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ return;
|
|
+
|
|
+ down_write(&tap->tap_sem);
|
|
+
|
|
+ dev = &tap->device;
|
|
+ for (usr_idx = 0; usr_idx < MAX_PENDING_REQS; usr_idx++) {
|
|
+ request = tap->pending_requests[usr_idx];
|
|
+ if (!request || request->status != BLKTAP_REQUEST_PENDING)
|
|
+ continue;
|
|
+
|
|
+ BTERR("%u:%u: failing pending %s of %d pages\n",
|
|
+ blktap_device_major, tap->minor,
|
|
+ (request->operation == BLKIF_OP_READ ?
|
|
+ "read" : "write"), request->nr_pages);
|
|
+
|
|
+ blktap_unmap(tap, request);
|
|
+ req = (struct request *)(unsigned long)request->id;
|
|
+ blktap_device_end_dequeued_request(dev, req, 0);
|
|
+ blktap_request_free(tap, request);
|
|
+ }
|
|
+
|
|
+ up_write(&tap->tap_sem);
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+
|
|
+ /* fail any future requests */
|
|
+ dev->gd->queue->queuedata = NULL;
|
|
+ blk_start_queue(dev->gd->queue);
|
|
+
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * tap->tap_sem held on entry
|
|
+ */
|
|
+void
|
|
+blktap_device_finish_request(struct blktap *tap,
|
|
+ blkif_response_t *res,
|
|
+ struct blktap_request *request)
|
|
+{
|
|
+ int uptodate;
|
|
+ struct request *req;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+
|
|
+ blktap_unmap(tap, request);
|
|
+
|
|
+ req = (struct request *)(unsigned long)request->id;
|
|
+ uptodate = (res->status == BLKIF_RSP_OKAY);
|
|
+
|
|
+ BTDBG("req %p res status %d operation %d/%d id %lld\n", req,
|
|
+ res->status, res->operation, request->operation,
|
|
+ (unsigned long long)res->id);
|
|
+
|
|
+ switch (request->operation) {
|
|
+ case BLKIF_OP_READ:
|
|
+ case BLKIF_OP_WRITE:
|
|
+ if (unlikely(res->status != BLKIF_RSP_OKAY))
|
|
+ BTERR("Bad return from device data "
|
|
+ "request: %x\n", res->status);
|
|
+ blktap_device_end_dequeued_request(dev, req, uptodate);
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ blktap_request_free(tap, request);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_prep_foreign(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ blkif_request_t *blkif_req,
|
|
+ unsigned int seg, struct page *page,
|
|
+ struct blktap_grant_table *table)
|
|
+{
|
|
+ uint64_t ptep;
|
|
+ uint32_t flags;
|
|
+ struct page *tap_page;
|
|
+ struct blktap_ring *ring;
|
|
+ struct blkback_pagemap map;
|
|
+ unsigned long uvaddr, kvaddr;
|
|
+
|
|
+ ring = &tap->ring;
|
|
+ map = blkback_pagemap_read(page);
|
|
+ blkif_req->seg[seg].gref = map.gref;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(ring->user_vstart, request->usr_idx, seg);
|
|
+ kvaddr = request_to_kaddr(request, seg);
|
|
+ flags = GNTMAP_host_map |
|
|
+ (request->operation == BLKIF_OP_WRITE ? GNTMAP_readonly : 0);
|
|
+
|
|
+ gnttab_set_map_op(&table->grants[table->cnt],
|
|
+ kvaddr, flags, map.gref, map.domid);
|
|
+ table->cnt++;
|
|
+
|
|
+ /* enable chained tap devices */
|
|
+ tap_page = request_to_page(request, seg);
|
|
+ set_page_private(tap_page, page_private(page));
|
|
+ SetPageBlkback(tap_page);
|
|
+
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ return 0;
|
|
+
|
|
+ if (create_lookup_pte_addr(ring->vma->vm_mm, uvaddr, &ptep)) {
|
|
+ BTERR("couldn't get a pte addr!\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ flags |= GNTMAP_application_map | GNTMAP_contains_pte;
|
|
+ gnttab_set_map_op(&table->grants[table->cnt],
|
|
+ ptep, flags, map.gref, map.domid);
|
|
+ table->cnt++;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_map_foreign(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ blkif_request_t *blkif_req,
|
|
+ struct blktap_grant_table *table)
|
|
+{
|
|
+ struct page *page;
|
|
+ int i, grant, err, usr_idx;
|
|
+ struct blktap_ring *ring;
|
|
+ unsigned long uvaddr, foreign_mfn;
|
|
+
|
|
+ if (!table->cnt)
|
|
+ return 0;
|
|
+
|
|
+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
|
|
+ table->grants, table->cnt);
|
|
+ BUG_ON(err);
|
|
+
|
|
+ grant = 0;
|
|
+ usr_idx = request->usr_idx;
|
|
+ ring = &tap->ring;
|
|
+
|
|
+ for (i = 0; i < request->nr_pages; i++) {
|
|
+ if (!blkif_req->seg[i].gref)
|
|
+ continue;
|
|
+
|
|
+ uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
|
|
+
|
|
+ if (unlikely(table->grants[grant].status != GNTST_okay)) {
|
|
+ BTERR("invalid kernel buffer: could not remap it\n");
|
|
+ /* This should never happen: blkback should handle eagain first */
|
|
+ BUG_ON(table->grants[grant].status == GNTST_eagain);
|
|
+ err |= 1;
|
|
+ table->grants[grant].handle = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+
|
|
+ request->handles[i].kernel = table->grants[grant].handle;
|
|
+ foreign_mfn = table->grants[grant].dev_bus_addr >> PAGE_SHIFT;
|
|
+ grant++;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ if (unlikely(table->grants[grant].status != GNTST_okay)) {
|
|
+ /* This should never happen: blkback should handle eagain first */
|
|
+ WARN_ON(table->grants[grant].status == GNTST_eagain);
|
|
+ BTERR("invalid user buffer: could not remap it\n");
|
|
+ err |= 1;
|
|
+ table->grants[grant].handle = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+ request->handles[i].user = table->grants[grant].handle;
|
|
+ grant++;
|
|
+ }
|
|
+
|
|
+ if (err)
|
|
+ continue;
|
|
+
|
|
+ page = request_to_page(request, i);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ set_phys_to_machine(page_to_pfn(page),
|
|
+ FOREIGN_FRAME(foreign_mfn));
|
|
+ else if (vm_insert_page(ring->vma, uvaddr, page))
|
|
+ err |= 1;
|
|
+
|
|
+ BTDBG("pending_req: %p, seg: %d, page: %p, "
|
|
+ "kvaddr: 0x%p, khandle: %u, uvaddr: 0x%08lx, "
|
|
+ "uhandle: %u\n", request, i, page,
|
|
+ pfn_to_kaddr(page_to_pfn(page)),
|
|
+ request->handles[i].kernel,
|
|
+ uvaddr, request->handles[i].user);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_map(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ unsigned int seg, struct page *page)
|
|
+{
|
|
+ pte_t pte;
|
|
+ int usr_idx;
|
|
+ struct blktap_ring *ring;
|
|
+ unsigned long uvaddr, kvaddr;
|
|
+ int err = 0;
|
|
+
|
|
+ ring = &tap->ring;
|
|
+ usr_idx = request->usr_idx;
|
|
+ uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, seg);
|
|
+ kvaddr = request_to_kaddr(request, seg);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ pte = mk_pte(page, ring->vma->vm_page_prot);
|
|
+ blktap_map_uaddr(ring->vma->vm_mm, uvaddr, pte_mkwrite(pte));
|
|
+ flush_tlb_page(ring->vma, uvaddr);
|
|
+ blktap_map_uaddr(&init_mm, kvaddr, mk_pte(page, PAGE_KERNEL));
|
|
+ flush_tlb_kernel_page(kvaddr);
|
|
+
|
|
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT, pte_mfn(pte));
|
|
+ request->handles[seg].kernel = INVALID_GRANT_HANDLE;
|
|
+ } else {
|
|
+ /* grant this page access to self domain and map it. */
|
|
+ domid_t domid = 0; /* XXX my domian id: grant table hypercall
|
|
+ doesn't understand DOMID_SELF */
|
|
+ int gref;
|
|
+ uint32_t flags;
|
|
+ struct gnttab_map_grant_ref map;
|
|
+ struct page *tap_page;
|
|
+
|
|
+ gref = gnttab_grant_foreign_access(
|
|
+ domid, page_to_pfn(page),
|
|
+ (request->operation == BLKIF_OP_WRITE)?
|
|
+ GTF_readonly: 0);
|
|
+
|
|
+ flags = GNTMAP_host_map |
|
|
+ (request->operation == BLKIF_OP_WRITE ?
|
|
+ GNTMAP_readonly : 0);
|
|
+
|
|
+ gnttab_set_map_op(&map, kvaddr, flags, gref, domid);
|
|
+
|
|
+ /* enable chained tap devices */
|
|
+ tap_page = request_to_page(request, seg);
|
|
+ set_page_private(tap_page, page_private(page));
|
|
+ SetPageBlkback(tap_page);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &map);
|
|
+
|
|
+ /* We are not expecting the grant op to fail */
|
|
+ BUG_ON(map.status != GNTST_okay);
|
|
+
|
|
+ err = vm_insert_page(ring->vma, uvaddr, tap_page);
|
|
+ if (err) {
|
|
+ struct gnttab_unmap_grant_ref unmap;
|
|
+ gnttab_set_unmap_op(&unmap, kvaddr,
|
|
+ GNTMAP_host_map, gref);
|
|
+ VOID(HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, &unmap, 1));
|
|
+ } else
|
|
+ request->handles[seg].kernel = gref;
|
|
+ }
|
|
+ request->handles[seg].user = INVALID_GRANT_HANDLE;
|
|
+
|
|
+ BTDBG("pending_req: %p, seg: %d, page: %p, kvaddr: 0x%08lx, "
|
|
+ "uvaddr: 0x%08lx\n", request, seg, page, kvaddr,
|
|
+ uvaddr);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_process_request(struct blktap *tap,
|
|
+ struct blktap_request *request,
|
|
+ struct request *req)
|
|
+{
|
|
+ struct page *page;
|
|
+ int i, usr_idx, err;
|
|
+ struct blktap_ring *ring;
|
|
+ struct scatterlist *sg;
|
|
+ struct blktap_grant_table table;
|
|
+ unsigned int fsect, lsect, nr_sects;
|
|
+ unsigned long offset, uvaddr;
|
|
+ struct blkif_request blkif_req, *target;
|
|
+
|
|
+ err = -1;
|
|
+ memset(&table, 0, sizeof(table));
|
|
+
|
|
+ if (!blktap_active(tap))
|
|
+ goto out;
|
|
+
|
|
+ ring = &tap->ring;
|
|
+ usr_idx = request->usr_idx;
|
|
+ blkif_req.id = usr_idx;
|
|
+ blkif_req.sector_number = (blkif_sector_t)req->sector;
|
|
+ blkif_req.handle = 0;
|
|
+ blkif_req.operation = rq_data_dir(req) ?
|
|
+ BLKIF_OP_WRITE : BLKIF_OP_READ;
|
|
+
|
|
+ request->id = (unsigned long)req;
|
|
+ request->operation = blkif_req.operation;
|
|
+ request->status = BLKTAP_REQUEST_PENDING;
|
|
+ do_gettimeofday(&request->time);
|
|
+
|
|
+ nr_sects = 0;
|
|
+ request->nr_pages = 0;
|
|
+ blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg);
|
|
+ BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ for (i = 0; i < blkif_req.nr_segments; ++i) {
|
|
+ sg = tap->sg + i;
|
|
+ fsect = sg->offset >> 9;
|
|
+ lsect = fsect + (sg->length >> 9) - 1;
|
|
+ nr_sects += sg->length >> 9;
|
|
+
|
|
+ blkif_req.seg[i] =
|
|
+ (struct blkif_request_segment) {
|
|
+ .gref = 0,
|
|
+ .first_sect = fsect,
|
|
+ .last_sect = lsect };
|
|
+
|
|
+ if (PageBlkback(sg->page)) {
|
|
+ /* foreign page -- use xen */
|
|
+ if (blktap_prep_foreign(tap,
|
|
+ request,
|
|
+ &blkif_req,
|
|
+ i,
|
|
+ sg->page,
|
|
+ &table))
|
|
+ goto out;
|
|
+ } else {
|
|
+ /* do it the old fashioned way */
|
|
+ if (blktap_map(tap,
|
|
+ request,
|
|
+ i,
|
|
+ sg->page))
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ uvaddr = MMAP_VADDR(ring->user_vstart, usr_idx, i);
|
|
+ offset = (uvaddr - ring->vma->vm_start) >> PAGE_SHIFT;
|
|
+ page = request_to_page(request, i);
|
|
+ ring->foreign_map.map[offset] = page;
|
|
+ SetPageReserved(page);
|
|
+
|
|
+ BTDBG("mapped uaddr %08lx to page %p pfn 0x%lx\n",
|
|
+ uvaddr, page, page_to_pfn(page));
|
|
+ BTDBG("offset: 0x%08lx, pending_req: %p, seg: %d, "
|
|
+ "page: %p, kvaddr: %p, uvaddr: 0x%08lx\n",
|
|
+ offset, request, i,
|
|
+ page, pfn_to_kaddr(page_to_pfn(page)), uvaddr);
|
|
+
|
|
+ request->nr_pages++;
|
|
+ }
|
|
+
|
|
+ if (blktap_map_foreign(tap, request, &blkif_req, &table))
|
|
+ goto out;
|
|
+
|
|
+ /* Finally, write the request message to the user ring. */
|
|
+ target = RING_GET_REQUEST(&ring->ring, ring->ring.req_prod_pvt);
|
|
+ memcpy(target, &blkif_req, sizeof(blkif_req));
|
|
+ target->id = request->usr_idx;
|
|
+ wmb(); /* blktap_poll() reads req_prod_pvt asynchronously */
|
|
+ ring->ring.req_prod_pvt++;
|
|
+
|
|
+ if (rq_data_dir(req)) {
|
|
+ tap->stats.st_wr_sect += nr_sects;
|
|
+ tap->stats.st_wr_req++;
|
|
+ } else {
|
|
+ tap->stats.st_rd_sect += nr_sects;
|
|
+ tap->stats.st_rd_req++;
|
|
+ }
|
|
+
|
|
+ err = 0;
|
|
+
|
|
+out:
|
|
+ if (err)
|
|
+ blktap_device_fast_flush(tap, request);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+#define rq_for_each_bio_safe(_bio, _tmp, _req) \
|
|
+ if ((_req)->bio) \
|
|
+ for (_bio = (_req)->bio; \
|
|
+ _bio && ((_tmp = _bio->bi_next) || 1); \
|
|
+ _bio = _tmp)
|
|
+
|
|
+static void
|
|
+blktap_device_forward_request(struct blktap *tap, struct request *req)
|
|
+{
|
|
+ struct bio *bio, *tmp;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+
|
|
+ rq_for_each_bio_safe(bio, tmp, req) {
|
|
+ bio->bi_bdev = dev->bdev;
|
|
+ submit_bio(bio->bi_rw, bio);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_close_bdev(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+
|
|
+ if (dev->bdev)
|
|
+ blkdev_put(dev->bdev);
|
|
+
|
|
+ dev->bdev = NULL;
|
|
+ clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_device_open_bdev(struct blktap *tap, u32 pdev)
|
|
+{
|
|
+ struct block_device *bdev;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+
|
|
+ bdev = open_by_devnum(pdev, FMODE_WRITE);
|
|
+ if (IS_ERR(bdev)) {
|
|
+ BTERR("opening device %x:%x failed: %ld\n",
|
|
+ MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev));
|
|
+ return PTR_ERR(bdev);
|
|
+ }
|
|
+
|
|
+ if (!bdev->bd_disk) {
|
|
+ BTERR("device %x:%x doesn't exist\n",
|
|
+ MAJOR(pdev), MINOR(pdev));
|
|
+ blkdev_put(dev->bdev);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ dev->bdev = bdev;
|
|
+ set_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
|
|
+
|
|
+ /* TODO: readjust queue parameters */
|
|
+
|
|
+ BTINFO("set device %d to passthrough on %x:%x\n",
|
|
+ tap->minor, MAJOR(pdev), MINOR(pdev));
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_enable_passthrough(struct blktap *tap,
|
|
+ unsigned major, unsigned minor)
|
|
+{
|
|
+ u32 pdev;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+ pdev = MKDEV(major, minor);
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (dev->bdev) {
|
|
+ if (pdev)
|
|
+ return -EINVAL;
|
|
+ blktap_device_close_bdev(tap);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return blktap_device_open_bdev(tap, pdev);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * dev->lock held on entry
|
|
+ */
|
|
+static void
|
|
+blktap_device_run_queue(struct blktap *tap)
|
|
+{
|
|
+ int queued, err;
|
|
+ request_queue_t *rq;
|
|
+ struct request *req;
|
|
+ struct blktap_ring *ring;
|
|
+ struct blktap_device *dev;
|
|
+ struct blktap_request *request;
|
|
+
|
|
+ queued = 0;
|
|
+ ring = &tap->ring;
|
|
+ dev = &tap->device;
|
|
+ rq = dev->gd->queue;
|
|
+
|
|
+ BTDBG("running queue for %d\n", tap->minor);
|
|
+
|
|
+ while ((req = elv_next_request(rq)) != NULL) {
|
|
+ if (!blk_fs_request(req)) {
|
|
+ end_request(req, 0);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (blk_barrier_rq(req)) {
|
|
+ end_request(req, 0);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+ if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
|
|
+ blkdev_dequeue_request(req);
|
|
+ blktap_device_forward_request(tap, req);
|
|
+ continue;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (RING_FULL(&ring->ring)) {
|
|
+ wait:
|
|
+ /* Avoid pointless unplugs. */
|
|
+ blk_stop_queue(rq);
|
|
+ blktap_defer(tap);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ request = blktap_request_allocate(tap);
|
|
+ if (!request) {
|
|
+ tap->stats.st_oo_req++;
|
|
+ goto wait;
|
|
+ }
|
|
+
|
|
+ BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
|
|
+ "buffer:%p [%s], pending: %p\n", req, tap->minor,
|
|
+ req->cmd, (unsigned long long)req->sector,
|
|
+ req->current_nr_sectors, req->nr_sectors, req->buffer,
|
|
+ rq_data_dir(req) ? "write" : "read", request);
|
|
+
|
|
+ blkdev_dequeue_request(req);
|
|
+
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+ down_write(&tap->tap_sem);
|
|
+
|
|
+ err = blktap_device_process_request(tap, request, req);
|
|
+ if (!err)
|
|
+ queued++;
|
|
+ else {
|
|
+ blktap_device_end_dequeued_request(dev, req, 0);
|
|
+ blktap_request_free(tap, request);
|
|
+ }
|
|
+
|
|
+ up_write(&tap->tap_sem);
|
|
+ spin_lock_irq(&dev->lock);
|
|
+ }
|
|
+
|
|
+ if (queued)
|
|
+ blktap_ring_kick_user(tap);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * dev->lock held on entry
|
|
+ */
|
|
+static void
|
|
+blktap_device_do_request(request_queue_t *rq)
|
|
+{
|
|
+ struct request *req;
|
|
+ struct blktap *tap;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = rq->queuedata;
|
|
+ if (!dev)
|
|
+ goto fail;
|
|
+
|
|
+ tap = dev_to_blktap(dev);
|
|
+ if (!blktap_active(tap))
|
|
+ goto fail;
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
|
|
+ test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
|
|
+ blktap_defer(tap);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ blktap_device_run_queue(tap);
|
|
+ return;
|
|
+
|
|
+fail:
|
|
+ while ((req = elv_next_request(rq))) {
|
|
+ BTERR("device closed: failing secs %llu - %llu\n",
|
|
+ (unsigned long long)req->sector,
|
|
+ (unsigned long long)req->sector + req->nr_sectors);
|
|
+ end_request(req, 0);
|
|
+ }
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_device_restart(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ dev = &tap->device;
|
|
+
|
|
+ if (blktap_active(tap) && RING_FULL(&tap->ring.ring)) {
|
|
+ blktap_defer(tap);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse) ||
|
|
+ test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
|
|
+ blktap_defer(tap);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+
|
|
+ /* Re-enable calldowns. */
|
|
+ if (dev->gd) {
|
|
+ struct request_queue *rq = dev->gd->queue;
|
|
+
|
|
+ if (blk_queue_stopped(rq))
|
|
+ blk_start_queue(rq);
|
|
+
|
|
+ /* Kick things off immediately. */
|
|
+ blktap_device_do_request(rq);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_device_configure(struct blktap *tap)
|
|
+{
|
|
+ struct request_queue *rq;
|
|
+ struct blktap_device *dev = &tap->device;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !dev->gd)
|
|
+ return;
|
|
+
|
|
+ dev = &tap->device;
|
|
+ rq = dev->gd->queue;
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+
|
|
+ set_capacity(dev->gd, tap->params.capacity);
|
|
+
|
|
+ /* Hard sector size and max sectors impersonate the equiv. hardware. */
|
|
+ blk_queue_hardsect_size(rq, tap->params.sector_size);
|
|
+ blk_queue_max_sectors(rq, 512);
|
|
+
|
|
+ /* Each segment in a request is up to an aligned page in size. */
|
|
+ blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
|
|
+ blk_queue_max_segment_size(rq, PAGE_SIZE);
|
|
+
|
|
+ /* Ensure a merged request will fit in a single I/O ring slot. */
|
|
+ blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+ blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+
|
|
+ /* Make sure buffer addresses are sector-aligned. */
|
|
+ blk_queue_dma_alignment(rq, 511);
|
|
+
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_resume(struct blktap *tap)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return 0;
|
|
+
|
|
+ err = blktap_ring_resume(tap);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ /* device size may have changed */
|
|
+ blktap_device_configure(tap);
|
|
+
|
|
+ BTDBG("restarting device\n");
|
|
+ blktap_device_restart(tap);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_pause(struct blktap *tap)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ struct blktap_device *dev = &tap->device;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse) || !blktap_active(tap))
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return 0;
|
|
+
|
|
+ spin_lock_irqsave(&dev->lock, flags);
|
|
+
|
|
+ blk_stop_queue(dev->gd->queue);
|
|
+ set_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
|
|
+
|
|
+ spin_unlock_irqrestore(&dev->lock, flags);
|
|
+
|
|
+ return blktap_ring_pause(tap);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_destroy(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_device *dev = &tap->device;
|
|
+ struct gendisk *gd = dev->gd;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ return 0;
|
|
+
|
|
+ BTINFO("destroy device %d users %d\n", tap->minor, dev->users);
|
|
+
|
|
+ if (dev->users)
|
|
+ return -EBUSY;
|
|
+
|
|
+ spin_lock_irq(&dev->lock);
|
|
+ /* No more blktap_device_do_request(). */
|
|
+ blk_stop_queue(gd->queue);
|
|
+ clear_bit(BLKTAP_DEVICE, &tap->dev_inuse);
|
|
+ dev->gd = NULL;
|
|
+ spin_unlock_irq(&dev->lock);
|
|
+
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+ if (dev->bdev)
|
|
+ blktap_device_close_bdev(tap);
|
|
+#endif
|
|
+
|
|
+ del_gendisk(gd);
|
|
+ blk_cleanup_queue(gd->queue);
|
|
+ put_disk(gd);
|
|
+
|
|
+ wake_up(&tap->wq);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_device_create(struct blktap *tap)
|
|
+{
|
|
+ int minor, err;
|
|
+ struct gendisk *gd;
|
|
+ struct request_queue *rq;
|
|
+ struct blktap_device *dev;
|
|
+
|
|
+ gd = NULL;
|
|
+ rq = NULL;
|
|
+ dev = &tap->device;
|
|
+ minor = tap->minor;
|
|
+
|
|
+ if (test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ return -EEXIST;
|
|
+
|
|
+ if (blktap_validate_params(tap, &tap->params))
|
|
+ return -EINVAL;
|
|
+
|
|
+ BTINFO("minor %d sectors %Lu sector-size %lu\n",
|
|
+ minor, tap->params.capacity, tap->params.sector_size);
|
|
+
|
|
+ err = -ENODEV;
|
|
+
|
|
+ gd = alloc_disk(1);
|
|
+ if (!gd)
|
|
+ goto error;
|
|
+
|
|
+ if (minor < 26)
|
|
+ sprintf(gd->disk_name, "tapdev%c", 'a' + minor);
|
|
+ else
|
|
+ sprintf(gd->disk_name, "tapdev%c%c",
|
|
+ 'a' + ((minor / 26) - 1), 'a' + (minor % 26));
|
|
+
|
|
+ gd->major = blktap_device_major;
|
|
+ gd->first_minor = minor;
|
|
+ gd->fops = &blktap_device_file_operations;
|
|
+ gd->private_data = dev;
|
|
+
|
|
+ spin_lock_init(&dev->lock);
|
|
+ rq = blk_init_queue(blktap_device_do_request, &dev->lock);
|
|
+ if (!rq)
|
|
+ goto error;
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
|
|
+ elevator_init(rq, "noop");
|
|
+#else
|
|
+ elevator_init(rq, &elevator_noop);
|
|
+#endif
|
|
+
|
|
+ gd->queue = rq;
|
|
+ rq->queuedata = dev;
|
|
+ dev->gd = gd;
|
|
+
|
|
+ set_bit(BLKTAP_DEVICE, &tap->dev_inuse);
|
|
+ blktap_device_configure(tap);
|
|
+
|
|
+ add_disk(gd);
|
|
+
|
|
+ err = 0;
|
|
+ goto out;
|
|
+
|
|
+ error:
|
|
+ if (gd)
|
|
+ del_gendisk(gd);
|
|
+ if (rq)
|
|
+ blk_cleanup_queue(rq);
|
|
+
|
|
+ out:
|
|
+ BTINFO("creation of %u:%u: %d\n", blktap_device_major, tap->minor, err);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_device_init(int *maj)
|
|
+{
|
|
+ int major;
|
|
+
|
|
+ /* Dynamically allocate a major for this device */
|
|
+ major = register_blkdev(0, "tapdev");
|
|
+ if (major < 0) {
|
|
+ BTERR("Couldn't register blktap device\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ blktap_device_major = *maj = major;
|
|
+ BTINFO("blktap device major %d\n", major);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_device_free(void)
|
|
+{
|
|
+ if (blktap_device_major)
|
|
+ if (unregister_blkdev(blktap_device_major, "tapdev"))
|
|
+ BTERR("blktap device unregister failed\n");
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/request.c 2010-01-04 11:56:34.000000000 +0100
|
|
@@ -0,0 +1,296 @@
|
|
+#include <linux/spinlock.h>
|
|
+#include <xen/balloon.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+#define MAX_BUCKETS 8
|
|
+#define BUCKET_SIZE MAX_PENDING_REQS
|
|
+
|
|
+#define BLKTAP_POOL_CLOSING 1
|
|
+
|
|
+struct blktap_request_bucket;
|
|
+
|
|
+struct blktap_request_handle {
|
|
+ int slot;
|
|
+ uint8_t inuse;
|
|
+ struct blktap_request request;
|
|
+ struct blktap_request_bucket *bucket;
|
|
+};
|
|
+
|
|
+struct blktap_request_bucket {
|
|
+ atomic_t reqs_in_use;
|
|
+ struct blktap_request_handle handles[BUCKET_SIZE];
|
|
+ struct page **foreign_pages;
|
|
+};
|
|
+
|
|
+struct blktap_request_pool {
|
|
+ spinlock_t lock;
|
|
+ uint8_t status;
|
|
+ struct list_head free_list;
|
|
+ atomic_t reqs_in_use;
|
|
+ wait_queue_head_t wait_queue;
|
|
+ struct blktap_request_bucket *buckets[MAX_BUCKETS];
|
|
+};
|
|
+
|
|
+static struct blktap_request_pool pool;
|
|
+
|
|
+static inline struct blktap_request_handle *
|
|
+blktap_request_to_handle(struct blktap_request *req)
|
|
+{
|
|
+ return container_of(req, struct blktap_request_handle, request);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_request_pool_init_request(struct blktap_request *request)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ request->usr_idx = -1;
|
|
+ request->nr_pages = 0;
|
|
+ request->status = BLKTAP_REQUEST_FREE;
|
|
+ INIT_LIST_HEAD(&request->free_list);
|
|
+ for (i = 0; i < ARRAY_SIZE(request->handles); i++) {
|
|
+ request->handles[i].user = INVALID_GRANT_HANDLE;
|
|
+ request->handles[i].kernel = INVALID_GRANT_HANDLE;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_request_pool_allocate_bucket(void)
|
|
+{
|
|
+ int i, idx;
|
|
+ unsigned long flags;
|
|
+ struct blktap_request *request;
|
|
+ struct blktap_request_handle *handle;
|
|
+ struct blktap_request_bucket *bucket;
|
|
+
|
|
+ bucket = kzalloc(sizeof(struct blktap_request_bucket), GFP_KERNEL);
|
|
+ if (!bucket)
|
|
+ goto fail;
|
|
+
|
|
+ bucket->foreign_pages = alloc_empty_pages_and_pagevec(MMAP_PAGES);
|
|
+ if (!bucket->foreign_pages)
|
|
+ goto fail;
|
|
+
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+
|
|
+ idx = -1;
|
|
+ for (i = 0; i < MAX_BUCKETS; i++) {
|
|
+ if (!pool.buckets[i]) {
|
|
+ idx = i;
|
|
+ pool.buckets[idx] = bucket;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (idx == -1) {
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < BUCKET_SIZE; i++) {
|
|
+ handle = bucket->handles + i;
|
|
+ request = &handle->request;
|
|
+
|
|
+ handle->slot = i;
|
|
+ handle->inuse = 0;
|
|
+ handle->bucket = bucket;
|
|
+
|
|
+ blktap_request_pool_init_request(request);
|
|
+ list_add_tail(&request->free_list, &pool.free_list);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ if (bucket && bucket->foreign_pages)
|
|
+ free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
|
|
+ kfree(bucket);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_request_pool_free_bucket(struct blktap_request_bucket *bucket)
|
|
+{
|
|
+ if (!bucket)
|
|
+ return;
|
|
+
|
|
+ BTDBG("freeing bucket %p\n", bucket);
|
|
+
|
|
+ free_empty_pages_and_pagevec(bucket->foreign_pages, MMAP_PAGES);
|
|
+ kfree(bucket);
|
|
+}
|
|
+
|
|
+struct page *
|
|
+request_to_page(struct blktap_request *req, int seg)
|
|
+{
|
|
+ struct blktap_request_handle *handle = blktap_request_to_handle(req);
|
|
+ int idx = handle->slot * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
|
+ return handle->bucket->foreign_pages[idx];
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_request_pool_shrink(void)
|
|
+{
|
|
+ int i, err;
|
|
+ unsigned long flags;
|
|
+ struct blktap_request_bucket *bucket;
|
|
+
|
|
+ err = -EAGAIN;
|
|
+
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+
|
|
+ /* always keep at least one bucket */
|
|
+ for (i = 1; i < MAX_BUCKETS; i++) {
|
|
+ bucket = pool.buckets[i];
|
|
+ if (!bucket)
|
|
+ continue;
|
|
+
|
|
+ if (atomic_read(&bucket->reqs_in_use))
|
|
+ continue;
|
|
+
|
|
+ blktap_request_pool_free_bucket(bucket);
|
|
+ pool.buckets[i] = NULL;
|
|
+ err = 0;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_request_pool_grow(void)
|
|
+{
|
|
+ return blktap_request_pool_allocate_bucket();
|
|
+}
|
|
+
|
|
+struct blktap_request *
|
|
+blktap_request_allocate(struct blktap *tap)
|
|
+{
|
|
+ int i;
|
|
+ uint16_t usr_idx;
|
|
+ unsigned long flags;
|
|
+ struct blktap_request *request;
|
|
+
|
|
+ usr_idx = -1;
|
|
+ request = NULL;
|
|
+
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+
|
|
+ if (pool.status == BLKTAP_POOL_CLOSING)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(tap->pending_requests); i++)
|
|
+ if (!tap->pending_requests[i]) {
|
|
+ usr_idx = i;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (usr_idx == (uint16_t)-1)
|
|
+ goto out;
|
|
+
|
|
+ if (!list_empty(&pool.free_list)) {
|
|
+ request = list_entry(pool.free_list.next,
|
|
+ struct blktap_request, free_list);
|
|
+ list_del(&request->free_list);
|
|
+ }
|
|
+
|
|
+ if (request) {
|
|
+ struct blktap_request_handle *handle;
|
|
+
|
|
+ atomic_inc(&pool.reqs_in_use);
|
|
+
|
|
+ handle = blktap_request_to_handle(request);
|
|
+ atomic_inc(&handle->bucket->reqs_in_use);
|
|
+ handle->inuse = 1;
|
|
+
|
|
+ request->usr_idx = usr_idx;
|
|
+
|
|
+ tap->pending_requests[usr_idx] = request;
|
|
+ tap->pending_cnt++;
|
|
+ }
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+ return request;
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_request_free(struct blktap *tap, struct blktap_request *request)
|
|
+{
|
|
+ int free;
|
|
+ unsigned long flags;
|
|
+ struct blktap_request_handle *handle;
|
|
+
|
|
+ BUG_ON(request->usr_idx >= ARRAY_SIZE(tap->pending_requests));
|
|
+ handle = blktap_request_to_handle(request);
|
|
+
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+
|
|
+ handle->inuse = 0;
|
|
+ tap->pending_requests[request->usr_idx] = NULL;
|
|
+ blktap_request_pool_init_request(request);
|
|
+ list_add(&request->free_list, &pool.free_list);
|
|
+ atomic_dec(&handle->bucket->reqs_in_use);
|
|
+ free = atomic_dec_and_test(&pool.reqs_in_use);
|
|
+
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+
|
|
+ if (--tap->pending_cnt == 0)
|
|
+ wake_up_interruptible(&tap->wq);
|
|
+
|
|
+ if (free)
|
|
+ wake_up(&pool.wait_queue);
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_request_pool_free(void)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+
|
|
+ pool.status = BLKTAP_POOL_CLOSING;
|
|
+ while (atomic_read(&pool.reqs_in_use)) {
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+ wait_event(pool.wait_queue, !atomic_read(&pool.reqs_in_use));
|
|
+ spin_lock_irqsave(&pool.lock, flags);
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < MAX_BUCKETS; i++) {
|
|
+ blktap_request_pool_free_bucket(pool.buckets[i]);
|
|
+ pool.buckets[i] = NULL;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pool.lock, flags);
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_request_pool_init(void)
|
|
+{
|
|
+ int i, err;
|
|
+
|
|
+ memset(&pool, 0, sizeof(pool));
|
|
+
|
|
+ spin_lock_init(&pool.lock);
|
|
+ INIT_LIST_HEAD(&pool.free_list);
|
|
+ atomic_set(&pool.reqs_in_use, 0);
|
|
+ init_waitqueue_head(&pool.wait_queue);
|
|
+
|
|
+ for (i = 0; i < 2; i++) {
|
|
+ err = blktap_request_pool_allocate_bucket();
|
|
+ if (err)
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ blktap_request_pool_free();
|
|
+ return err;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/ring.c 2010-08-31 09:24:21.000000000 +0200
|
|
@@ -0,0 +1,610 @@
|
|
+#include <linux/module.h>
|
|
+#include <linux/signal.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+static int blktap_ring_major;
|
|
+
|
|
+static inline struct blktap *
|
|
+vma_to_blktap(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct vm_foreign_map *m = vma->vm_private_data;
|
|
+ struct blktap_ring *r = container_of(m, struct blktap_ring, foreign_map);
|
|
+ return container_of(r, struct blktap, ring);
|
|
+}
|
|
+
|
|
+ /*
|
|
+ * BLKTAP - immediately before the mmap area,
|
|
+ * we have a bunch of pages reserved for shared memory rings.
|
|
+ */
|
|
+#define RING_PAGES 1
|
|
+
|
|
+static int
|
|
+blktap_read_ring(struct blktap *tap)
|
|
+{
|
|
+ /* This is called to read responses from the ring. */
|
|
+ int usr_idx;
|
|
+ RING_IDX rc, rp;
|
|
+ blkif_response_t res;
|
|
+ struct blktap_ring *ring;
|
|
+ struct blktap_request *request;
|
|
+
|
|
+ down_read(&tap->tap_sem);
|
|
+
|
|
+ ring = &tap->ring;
|
|
+ if (!ring->vma) {
|
|
+ up_read(&tap->tap_sem);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* for each outstanding message on the ring */
|
|
+ rp = ring->ring.sring->rsp_prod;
|
|
+ rmb();
|
|
+
|
|
+ for (rc = ring->ring.rsp_cons; rc != rp; rc++) {
|
|
+ memcpy(&res, RING_GET_RESPONSE(&ring->ring, rc), sizeof(res));
|
|
+ mb(); /* rsp_cons read by RING_FULL() in do_block_io_op(). */
|
|
+ ++ring->ring.rsp_cons;
|
|
+
|
|
+ usr_idx = (int)res.id;
|
|
+ if (usr_idx >= MAX_PENDING_REQS ||
|
|
+ !tap->pending_requests[usr_idx]) {
|
|
+ BTWARN("Request %d/%d invalid [%x], tapdisk %d%p\n",
|
|
+ rc, rp, usr_idx, tap->pid, ring->vma);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ request = tap->pending_requests[usr_idx];
|
|
+ BTDBG("request %p response #%d id %x\n", request, rc, usr_idx);
|
|
+ blktap_device_finish_request(tap, &res, request);
|
|
+ }
|
|
+
|
|
+ up_read(&tap->tap_sem);
|
|
+
|
|
+ blktap_run_deferred();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct page *
|
|
+blktap_ring_nopage(struct vm_area_struct *vma,
|
|
+ unsigned long address, int *type)
|
|
+{
|
|
+ /*
|
|
+ * if the page has not been mapped in by the driver then return
|
|
+ * NOPAGE_SIGBUS to the domain.
|
|
+ */
|
|
+
|
|
+ return NOPAGE_SIGBUS;
|
|
+}
|
|
+
|
|
+static pte_t
|
|
+blktap_ring_clear_pte(struct vm_area_struct *vma,
|
|
+ unsigned long uvaddr,
|
|
+ pte_t *ptep, int is_fullmm)
|
|
+{
|
|
+ pte_t copy;
|
|
+ struct blktap *tap;
|
|
+ unsigned long kvaddr;
|
|
+ struct page **map, *page;
|
|
+ struct blktap_ring *ring;
|
|
+ struct blktap_request *request;
|
|
+ struct grant_handle_pair *khandle;
|
|
+ struct gnttab_unmap_grant_ref unmap[2];
|
|
+ int offset, seg, usr_idx, count = 0;
|
|
+
|
|
+ tap = vma_to_blktap(vma);
|
|
+ ring = &tap->ring;
|
|
+ map = ring->foreign_map.map;
|
|
+ BUG_ON(!map); /* TODO Should this be changed to if statement? */
|
|
+
|
|
+ /*
|
|
+ * Zap entry if the address is before the start of the grant
|
|
+ * mapped region.
|
|
+ */
|
|
+ if (uvaddr < ring->user_vstart)
|
|
+ return ptep_get_and_clear_full(vma->vm_mm, uvaddr,
|
|
+ ptep, is_fullmm);
|
|
+
|
|
+ offset = (int)((uvaddr - ring->user_vstart) >> PAGE_SHIFT);
|
|
+ usr_idx = offset / BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+ seg = offset % BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+
|
|
+ offset = (int)((uvaddr - vma->vm_start) >> PAGE_SHIFT);
|
|
+ page = map[offset];
|
|
+ if (page && PageBlkback(page)) {
|
|
+ ClearPageBlkback(page);
|
|
+ set_page_private(page, 0);
|
|
+ }
|
|
+ map[offset] = NULL;
|
|
+
|
|
+ request = tap->pending_requests[usr_idx];
|
|
+ kvaddr = request_to_kaddr(request, seg);
|
|
+ khandle = request->handles + seg;
|
|
+
|
|
+ if (khandle->kernel != INVALID_GRANT_HANDLE) {
|
|
+ gnttab_set_unmap_op(&unmap[count], kvaddr,
|
|
+ GNTMAP_host_map, khandle->kernel);
|
|
+ count++;
|
|
+
|
|
+ set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
|
|
+ INVALID_P2M_ENTRY);
|
|
+ }
|
|
+
|
|
+
|
|
+ if (khandle->user != INVALID_GRANT_HANDLE) {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+
|
|
+ copy = *ptep;
|
|
+ gnttab_set_unmap_op(&unmap[count], virt_to_machine(ptep),
|
|
+ GNTMAP_host_map
|
|
+ | GNTMAP_application_map
|
|
+ | GNTMAP_contains_pte,
|
|
+ khandle->user);
|
|
+ count++;
|
|
+ } else
|
|
+ copy = ptep_get_and_clear_full(vma->vm_mm, uvaddr, ptep,
|
|
+ is_fullmm);
|
|
+
|
|
+ if (count)
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ unmap, count))
|
|
+ BUG();
|
|
+
|
|
+ khandle->kernel = INVALID_GRANT_HANDLE;
|
|
+ khandle->user = INVALID_GRANT_HANDLE;
|
|
+
|
|
+ return copy;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_ring_vm_unmap(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct blktap *tap = vma_to_blktap(vma);
|
|
+
|
|
+ down_write(&tap->tap_sem);
|
|
+ clear_bit(BLKTAP_RING_VMA, &tap->dev_inuse);
|
|
+ clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
|
|
+ clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
|
|
+ up_write(&tap->tap_sem);
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_ring_vm_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct blktap *tap = vma_to_blktap(vma);
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ blktap_ring_vm_unmap(vma); /* fail future requests */
|
|
+ blktap_device_fail_pending_requests(tap); /* fail pending requests */
|
|
+ blktap_device_restart(tap); /* fail deferred requests */
|
|
+
|
|
+ down_write(&tap->tap_sem);
|
|
+
|
|
+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
|
|
+
|
|
+ kfree(ring->foreign_map.map);
|
|
+ ring->foreign_map.map = NULL;
|
|
+
|
|
+ /* Free the ring page. */
|
|
+ ClearPageReserved(virt_to_page(ring->ring.sring));
|
|
+ free_page((unsigned long)ring->ring.sring);
|
|
+
|
|
+ BTINFO("unmapping ring %d\n", tap->minor);
|
|
+ ring->ring.sring = NULL;
|
|
+ ring->vma = NULL;
|
|
+
|
|
+ up_write(&tap->tap_sem);
|
|
+
|
|
+ wake_up(&tap->wq);
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct blktap_ring_vm_operations = {
|
|
+ .close = blktap_ring_vm_close,
|
|
+ .unmap = blktap_ring_vm_unmap,
|
|
+ .nopage = blktap_ring_nopage,
|
|
+ .zap_pte = blktap_ring_clear_pte,
|
|
+};
|
|
+
|
|
+static int
|
|
+blktap_ring_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ int idx;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ idx = iminor(inode);
|
|
+ if (idx < 0 || idx >= MAX_BLKTAP_DEVICE || blktaps[idx] == NULL) {
|
|
+ BTERR("unable to open device blktap%d\n", idx);
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ tap = blktaps[idx];
|
|
+
|
|
+ BTINFO("opening device blktap%d\n", idx);
|
|
+
|
|
+ if (!test_bit(BLKTAP_CONTROL, &tap->dev_inuse))
|
|
+ return -ENODEV;
|
|
+
|
|
+ /* Only one process can access ring at a time */
|
|
+ if (test_and_set_bit(BLKTAP_RING_FD, &tap->dev_inuse))
|
|
+ return -EBUSY;
|
|
+
|
|
+ filp->private_data = tap;
|
|
+ BTINFO("opened device %d\n", tap->minor);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+
|
|
+ BTINFO("freeing device %d\n", tap->minor);
|
|
+ clear_bit(BLKTAP_RING_FD, &tap->dev_inuse);
|
|
+ filp->private_data = NULL;
|
|
+ wake_up(&tap->wq);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Note on mmap:
|
|
+ * We need to map pages to user space in a way that will allow the block
|
|
+ * subsystem set up direct IO to them. This couldn't be done before, because
|
|
+ * there isn't really a sane way to translate a user virtual address down to a
|
|
+ * physical address when the page belongs to another domain.
|
|
+ *
|
|
+ * My first approach was to map the page in to kernel memory, add an entry
|
|
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
|
|
+ * and then attempt to map that page up to user space. This is disallowed
|
|
+ * by xen though, which realizes that we don't really own the machine frame
|
|
+ * underlying the physical page.
|
|
+ *
|
|
+ * The new approach is to provide explicit support for this in xen linux.
|
|
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
|
|
+ * mapped from other vms. vma->vm_private_data is set up as a mapping
|
|
+ * from pages to actual page structs. There is a new clause in get_user_pages
|
|
+ * that does the right thing for this sort of mapping.
|
|
+ */
|
|
+static int
|
|
+blktap_ring_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
+{
|
|
+ int size, err;
|
|
+ struct page **map;
|
|
+ struct blktap *tap;
|
|
+ blkif_sring_t *sring;
|
|
+ struct blktap_ring *ring;
|
|
+
|
|
+ tap = filp->private_data;
|
|
+ ring = &tap->ring;
|
|
+ map = NULL;
|
|
+ sring = NULL;
|
|
+
|
|
+ if (!tap || test_and_set_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
+ if (size != (MMAP_PAGES + RING_PAGES)) {
|
|
+ BTERR("you _must_ map exactly %lu pages!\n",
|
|
+ MMAP_PAGES + RING_PAGES);
|
|
+ return -EAGAIN;
|
|
+ }
|
|
+
|
|
+ /* Allocate the fe ring. */
|
|
+ sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
|
|
+ if (!sring) {
|
|
+ BTERR("Couldn't alloc sring.\n");
|
|
+ goto fail_mem;
|
|
+ }
|
|
+
|
|
+ map = kzalloc(size * sizeof(struct page *), GFP_KERNEL);
|
|
+ if (!map) {
|
|
+ BTERR("Couldn't alloc VM_FOREIGN map.\n");
|
|
+ goto fail_mem;
|
|
+ }
|
|
+
|
|
+ SetPageReserved(virt_to_page(sring));
|
|
+
|
|
+ SHARED_RING_INIT(sring);
|
|
+ FRONT_RING_INIT(&ring->ring, sring, PAGE_SIZE);
|
|
+
|
|
+ ring->ring_vstart = vma->vm_start;
|
|
+ ring->user_vstart = ring->ring_vstart + (RING_PAGES << PAGE_SHIFT);
|
|
+
|
|
+ /* Map the ring pages to the start of the region and reserve it. */
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ err = vm_insert_page(vma, vma->vm_start,
|
|
+ virt_to_page(ring->ring.sring));
|
|
+ else
|
|
+ err = remap_pfn_range(vma, vma->vm_start,
|
|
+ __pa(ring->ring.sring) >> PAGE_SHIFT,
|
|
+ PAGE_SIZE, vma->vm_page_prot);
|
|
+ if (err) {
|
|
+ BTERR("Mapping user ring failed: %d\n", err);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ /* Mark this VM as containing foreign pages, and set up mappings. */
|
|
+ ring->foreign_map.map = map;
|
|
+ vma->vm_private_data = &ring->foreign_map;
|
|
+ vma->vm_flags |= VM_FOREIGN;
|
|
+ vma->vm_flags |= VM_DONTCOPY;
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+ vma->vm_ops = &blktap_ring_vm_operations;
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+ vma->vm_mm->context.has_foreign_mappings = 1;
|
|
+#endif
|
|
+
|
|
+ tap->pid = current->pid;
|
|
+ BTINFO("blktap: mapping pid is %d\n", tap->pid);
|
|
+
|
|
+ ring->vma = vma;
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ /* Clear any active mappings. */
|
|
+ zap_page_range(vma, vma->vm_start,
|
|
+ vma->vm_end - vma->vm_start, NULL);
|
|
+ ClearPageReserved(virt_to_page(sring));
|
|
+ fail_mem:
|
|
+ free_page((unsigned long)sring);
|
|
+ kfree(map);
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static inline void
|
|
+blktap_ring_set_message(struct blktap *tap, int msg)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ down_read(&tap->tap_sem);
|
|
+ if (ring->ring.sring)
|
|
+ ring->ring.sring->private.tapif_user.msg = msg;
|
|
+ up_read(&tap->tap_sem);
|
|
+}
|
|
+
|
|
+static int
|
|
+blktap_ring_ioctl(struct inode *inode, struct file *filp,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ struct blktap_params params;
|
|
+ struct blktap *tap = filp->private_data;
|
|
+
|
|
+ BTDBG("%d: cmd: %u, arg: %lu\n", tap->minor, cmd, arg);
|
|
+
|
|
+ switch(cmd) {
|
|
+ case BLKTAP2_IOCTL_KICK_FE:
|
|
+ /* There are fe messages to process. */
|
|
+ return blktap_read_ring(tap);
|
|
+
|
|
+ case BLKTAP2_IOCTL_CREATE_DEVICE:
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (copy_from_user(¶ms, (struct blktap_params __user *)arg,
|
|
+ sizeof(params))) {
|
|
+ BTERR("failed to get params\n");
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ if (blktap_validate_params(tap, ¶ms)) {
|
|
+ BTERR("invalid params\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ tap->params = params;
|
|
+ return blktap_device_create(tap);
|
|
+
|
|
+ case BLKTAP2_IOCTL_SET_PARAMS:
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (copy_from_user(¶ms, (struct blktap_params __user *)arg,
|
|
+ sizeof(params))) {
|
|
+ BTERR("failed to get params\n");
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ if (blktap_validate_params(tap, ¶ms)) {
|
|
+ BTERR("invalid params\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ tap->params = params;
|
|
+ return 0;
|
|
+
|
|
+ case BLKTAP2_IOCTL_PAUSE:
|
|
+ if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ set_bit(BLKTAP_PAUSED, &tap->dev_inuse);
|
|
+ clear_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse);
|
|
+
|
|
+ blktap_ring_set_message(tap, 0);
|
|
+ wake_up_interruptible(&tap->wq);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+
|
|
+ case BLKTAP2_IOCTL_REOPEN:
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (copy_to_user((char __user *)arg,
|
|
+ tap->params.name,
|
|
+ strlen(tap->params.name) + 1))
|
|
+ return -EFAULT;
|
|
+
|
|
+ blktap_ring_set_message(tap, 0);
|
|
+ wake_up_interruptible(&tap->wq);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ case BLKTAP2_IOCTL_RESUME:
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ tap->ring.response = (int)arg;
|
|
+ if (!tap->ring.response)
|
|
+ clear_bit(BLKTAP_PAUSED, &tap->dev_inuse);
|
|
+
|
|
+ blktap_ring_set_message(tap, 0);
|
|
+ wake_up_interruptible(&tap->wq);
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -ENOIOCTLCMD;
|
|
+}
|
|
+
|
|
+static unsigned int blktap_ring_poll(struct file *filp, poll_table *wait)
|
|
+{
|
|
+ struct blktap *tap = filp->private_data;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ poll_wait(filp, &ring->poll_wait, wait);
|
|
+ if (ring->ring.sring->private.tapif_user.msg ||
|
|
+ ring->ring.req_prod_pvt != ring->ring.sring->req_prod) {
|
|
+ RING_PUSH_REQUESTS(&ring->ring);
|
|
+ return POLLIN | POLLRDNORM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct file_operations blktap_ring_file_operations = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = blktap_ring_open,
|
|
+ .release = blktap_ring_release,
|
|
+ .ioctl = blktap_ring_ioctl,
|
|
+ .mmap = blktap_ring_mmap,
|
|
+ .poll = blktap_ring_poll,
|
|
+};
|
|
+
|
|
+void
|
|
+blktap_ring_kick_user(struct blktap *tap)
|
|
+{
|
|
+ wake_up_interruptible(&tap->ring.poll_wait);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_resume(struct blktap *tap)
|
|
+{
|
|
+ int err;
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+
|
|
+ if (!blktap_active(tap))
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* set shared flag for resume */
|
|
+ ring->response = 0;
|
|
+
|
|
+ blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_RESUME);
|
|
+ blktap_ring_kick_user(tap);
|
|
+
|
|
+ wait_event_interruptible(tap->wq, ring->response ||
|
|
+ !test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
|
|
+
|
|
+ err = ring->response;
|
|
+ ring->response = 0;
|
|
+
|
|
+ BTDBG("err: %d\n", err);
|
|
+
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EAGAIN;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_pause(struct blktap *tap)
|
|
+{
|
|
+ if (!blktap_active(tap))
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse))
|
|
+ return -EINVAL;
|
|
+
|
|
+ BTDBG("draining queue\n");
|
|
+ wait_event_interruptible(tap->wq, !tap->pending_cnt);
|
|
+ if (tap->pending_cnt)
|
|
+ return -EAGAIN;
|
|
+
|
|
+ blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_PAUSE);
|
|
+ blktap_ring_kick_user(tap);
|
|
+
|
|
+ BTDBG("waiting for tapdisk response\n");
|
|
+ wait_event_interruptible(tap->wq, test_bit(BLKTAP_PAUSED, &tap->dev_inuse));
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse))
|
|
+ return -EAGAIN;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_destroy(struct blktap *tap)
|
|
+{
|
|
+ if (!test_bit(BLKTAP_RING_FD, &tap->dev_inuse) &&
|
|
+ !test_bit(BLKTAP_RING_VMA, &tap->dev_inuse))
|
|
+ return 0;
|
|
+
|
|
+ BTDBG("sending tapdisk close message\n");
|
|
+ blktap_ring_set_message(tap, BLKTAP2_RING_MESSAGE_CLOSE);
|
|
+ blktap_ring_kick_user(tap);
|
|
+
|
|
+ return -EAGAIN;
|
|
+}
|
|
+
|
|
+static void
|
|
+blktap_ring_initialize(struct blktap_ring *ring, int minor)
|
|
+{
|
|
+ memset(ring, 0, sizeof(*ring));
|
|
+ init_waitqueue_head(&ring->poll_wait);
|
|
+ ring->devno = MKDEV(blktap_ring_major, minor);
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_create(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring = &tap->ring;
|
|
+ blktap_ring_initialize(ring, tap->minor);
|
|
+ return blktap_sysfs_create(tap);
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_ring_init(int *major)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = register_chrdev(0, "blktap2", &blktap_ring_file_operations);
|
|
+ if (err < 0) {
|
|
+ BTERR("error registering blktap ring device: %d\n", err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ blktap_ring_major = *major = err;
|
|
+ BTINFO("blktap ring major: %d\n", blktap_ring_major);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_ring_free(void)
|
|
+{
|
|
+ if (blktap_ring_major)
|
|
+ unregister_chrdev(blktap_ring_major, "blktap2");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/sysfs.c 2011-03-02 12:00:16.000000000 +0100
|
|
@@ -0,0 +1,425 @@
|
|
+#include <linux/types.h>
|
|
+#include <linux/device.h>
|
|
+#include <linux/module.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+int blktap_debug_level = 1;
|
|
+
|
|
+static struct class *class;
|
|
+static DECLARE_WAIT_QUEUE_HEAD(sysfs_wq);
|
|
+
|
|
+static inline void
|
|
+blktap_sysfs_get(struct blktap *tap)
|
|
+{
|
|
+ atomic_inc(&tap->ring.sysfs_refcnt);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+blktap_sysfs_put(struct blktap *tap)
|
|
+{
|
|
+ if (atomic_dec_and_test(&tap->ring.sysfs_refcnt))
|
|
+ wake_up(&sysfs_wq);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+blktap_sysfs_enter(struct blktap *tap)
|
|
+{
|
|
+ blktap_sysfs_get(tap); /* pin sysfs device */
|
|
+ mutex_lock(&tap->ring.sysfs_mutex); /* serialize sysfs operations */
|
|
+}
|
|
+
|
|
+static inline void
|
|
+blktap_sysfs_exit(struct blktap *tap)
|
|
+{
|
|
+ mutex_unlock(&tap->ring.sysfs_mutex);
|
|
+ blktap_sysfs_put(tap);
|
|
+}
|
|
+
|
|
+static ssize_t blktap_sysfs_pause_device(struct class_device *, const char *, size_t);
|
|
+static CLASS_DEVICE_ATTR(pause, S_IWUSR, NULL, blktap_sysfs_pause_device);
|
|
+static ssize_t blktap_sysfs_resume_device(struct class_device *, const char *, size_t);
|
|
+static CLASS_DEVICE_ATTR(resume, S_IWUSR, NULL, blktap_sysfs_resume_device);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_set_name(struct class_device *dev, const char *buf, size_t size)
|
|
+{
|
|
+ int err;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ blktap_sysfs_enter(tap);
|
|
+
|
|
+ if (!tap->ring.dev ||
|
|
+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
|
|
+ err = -EPERM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (size > BLKTAP2_MAX_MESSAGE_LEN) {
|
|
+ err = -ENAMETOOLONG;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (strnlen(buf, BLKTAP2_MAX_MESSAGE_LEN) >= BLKTAP2_MAX_MESSAGE_LEN) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ snprintf(tap->params.name, sizeof(tap->params.name) - 1, "%s", buf);
|
|
+ err = size;
|
|
+
|
|
+out:
|
|
+ blktap_sysfs_exit(tap);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_get_name(struct class_device *dev, char *buf)
|
|
+{
|
|
+ ssize_t size;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ blktap_sysfs_enter(tap);
|
|
+
|
|
+ if (!tap->ring.dev)
|
|
+ size = -ENODEV;
|
|
+ else if (tap->params.name[0])
|
|
+ size = sprintf(buf, "%s\n", tap->params.name);
|
|
+ else
|
|
+ size = sprintf(buf, "%d\n", tap->minor);
|
|
+
|
|
+ blktap_sysfs_exit(tap);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+static CLASS_DEVICE_ATTR(name, S_IRUSR | S_IWUSR,
|
|
+ blktap_sysfs_get_name, blktap_sysfs_set_name);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_remove_device(struct class_device *dev,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ int err;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ if (!tap->ring.dev)
|
|
+ return size;
|
|
+
|
|
+ if (test_and_set_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse))
|
|
+ return -EBUSY;
|
|
+
|
|
+ err = blktap_control_destroy_device(tap);
|
|
+
|
|
+ return (err ? : size);
|
|
+}
|
|
+static CLASS_DEVICE_ATTR(remove, S_IWUSR, NULL, blktap_sysfs_remove_device);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_pause_device(struct class_device *dev,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ int err;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ blktap_sysfs_enter(tap);
|
|
+
|
|
+ BTDBG("pausing %u:%u: dev_inuse: %lu\n",
|
|
+ MAJOR(tap->ring.devno), MINOR(tap->ring.devno), tap->dev_inuse);
|
|
+
|
|
+ if (!tap->ring.dev ||
|
|
+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSE_REQUESTED, &tap->dev_inuse)) {
|
|
+ err = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
|
|
+ err = 0;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = blktap_device_pause(tap);
|
|
+ if (!err) {
|
|
+ class_device_remove_file(dev, &class_device_attr_pause);
|
|
+ class_device_create_file(dev, &class_device_attr_resume);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ blktap_sysfs_exit(tap);
|
|
+
|
|
+ return (err ? err : size);
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_resume_device(struct class_device *dev,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ int err;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ blktap_sysfs_enter(tap);
|
|
+
|
|
+ if (!tap->ring.dev ||
|
|
+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = blktap_device_resume(tap);
|
|
+ if (!err) {
|
|
+ class_device_remove_file(dev, &class_device_attr_resume);
|
|
+ class_device_create_file(dev, &class_device_attr_pause);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ blktap_sysfs_exit(tap);
|
|
+
|
|
+ BTDBG("returning %zd\n", (err ? err : size));
|
|
+ return (err ? err : size);
|
|
+}
|
|
+
|
|
+#ifdef ENABLE_PASSTHROUGH
|
|
+static ssize_t
|
|
+blktap_sysfs_enable_passthrough(struct class_device *dev,
|
|
+ const char *buf, size_t size)
|
|
+{
|
|
+ int err;
|
|
+ unsigned major, minor;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ BTINFO("passthrough request enabled\n");
|
|
+
|
|
+ blktap_sysfs_enter(tap);
|
|
+
|
|
+ if (!tap->ring.dev ||
|
|
+ test_bit(BLKTAP_SHUTDOWN_REQUESTED, &tap->dev_inuse)) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!test_bit(BLKTAP_PAUSED, &tap->dev_inuse)) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = sscanf(buf, "%x:%x", &major, &minor);
|
|
+ if (err != 2) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = blktap_device_enable_passthrough(tap, major, minor);
|
|
+
|
|
+out:
|
|
+ blktap_sysfs_exit(tap);
|
|
+ BTDBG("returning %d\n", (err ? err : size));
|
|
+ return (err ? err : size);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_debug_device(struct class_device *dev, char *buf)
|
|
+{
|
|
+ char *tmp;
|
|
+ int i, ret;
|
|
+ struct blktap *tap = (struct blktap *)dev->class_data;
|
|
+
|
|
+ tmp = buf;
|
|
+ blktap_sysfs_get(tap);
|
|
+
|
|
+ if (!tap->ring.dev) {
|
|
+ ret = sprintf(tmp, "no device\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ tmp += sprintf(tmp, "%s (%u:%u), refcnt: %d, dev_inuse: 0x%08lx\n",
|
|
+ tap->params.name, MAJOR(tap->ring.devno),
|
|
+ MINOR(tap->ring.devno), atomic_read(&tap->refcnt),
|
|
+ tap->dev_inuse);
|
|
+ tmp += sprintf(tmp, "capacity: 0x%llx, sector size: 0x%lx, "
|
|
+ "device users: %d\n", tap->params.capacity,
|
|
+ tap->params.sector_size, tap->device.users);
|
|
+
|
|
+ down_read(&tap->tap_sem);
|
|
+
|
|
+ tmp += sprintf(tmp, "pending requests: %d\n", tap->pending_cnt);
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
|
|
+ struct blktap_request *req = tap->pending_requests[i];
|
|
+ if (!req)
|
|
+ continue;
|
|
+
|
|
+ tmp += sprintf(tmp, "req %d: id: %llu, usr_idx: %d, "
|
|
+ "status: 0x%02x, pendcnt: %d, "
|
|
+ "nr_pages: %u, op: %d, time: %lu:%lu\n",
|
|
+ i, (unsigned long long)req->id, req->usr_idx,
|
|
+ req->status, atomic_read(&req->pendcnt),
|
|
+ req->nr_pages, req->operation, req->time.tv_sec,
|
|
+ req->time.tv_usec);
|
|
+ }
|
|
+
|
|
+ up_read(&tap->tap_sem);
|
|
+ ret = (tmp - buf) + 1;
|
|
+
|
|
+out:
|
|
+ blktap_sysfs_put(tap);
|
|
+ BTDBG("%s\n", buf);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+static CLASS_DEVICE_ATTR(debug, S_IRUSR, blktap_sysfs_debug_device, NULL);
|
|
+
|
|
+int
|
|
+blktap_sysfs_create(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring;
|
|
+ struct class_device *dev;
|
|
+
|
|
+ if (!class)
|
|
+ return -ENODEV;
|
|
+
|
|
+ ring = &tap->ring;
|
|
+
|
|
+ dev = class_device_create(class, NULL, ring->devno,
|
|
+ NULL, "blktap%d", tap->minor);
|
|
+ if (IS_ERR(dev))
|
|
+ return PTR_ERR(dev);
|
|
+
|
|
+ ring->dev = dev;
|
|
+ dev->class_data = tap;
|
|
+
|
|
+ mutex_init(&ring->sysfs_mutex);
|
|
+ atomic_set(&ring->sysfs_refcnt, 0);
|
|
+ set_bit(BLKTAP_SYSFS, &tap->dev_inuse);
|
|
+
|
|
+ class_device_create_file(dev, &class_device_attr_name);
|
|
+ class_device_create_file(dev, &class_device_attr_remove);
|
|
+ class_device_create_file(dev, &class_device_attr_pause);
|
|
+ class_device_create_file(dev, &class_device_attr_debug);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int
|
|
+blktap_sysfs_destroy(struct blktap *tap)
|
|
+{
|
|
+ struct blktap_ring *ring;
|
|
+ struct class_device *dev;
|
|
+
|
|
+ ring = &tap->ring;
|
|
+ dev = ring->dev;
|
|
+ if (!class || !dev)
|
|
+ return 0;
|
|
+
|
|
+ ring->dev = NULL;
|
|
+ if (wait_event_interruptible(sysfs_wq,
|
|
+ !atomic_read(&tap->ring.sysfs_refcnt)))
|
|
+ return -EAGAIN;
|
|
+
|
|
+ /* XXX: is it safe to remove the class from a sysfs attribute? */
|
|
+ class_device_remove_file(dev, &class_device_attr_name);
|
|
+ class_device_remove_file(dev, &class_device_attr_remove);
|
|
+ class_device_remove_file(dev, &class_device_attr_pause);
|
|
+ class_device_remove_file(dev, &class_device_attr_resume);
|
|
+ class_device_remove_file(dev, &class_device_attr_debug);
|
|
+ class_device_destroy(class, ring->devno);
|
|
+
|
|
+ clear_bit(BLKTAP_SYSFS, &tap->dev_inuse);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_verbosity(struct class *class, char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%d\n", blktap_debug_level);
|
|
+}
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
|
|
+{
|
|
+ int level;
|
|
+
|
|
+ if (sscanf(buf, "%d", &level) == 1) {
|
|
+ blktap_debug_level = level;
|
|
+ return size;
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+static CLASS_ATTR(verbosity, S_IRUSR | S_IWUSR,
|
|
+ blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
|
|
+
|
|
+static ssize_t
|
|
+blktap_sysfs_show_devices(struct class *class, char *buf)
|
|
+{
|
|
+ int i, ret;
|
|
+ struct blktap *tap;
|
|
+
|
|
+ ret = 0;
|
|
+ for (i = 0; i < MAX_BLKTAP_DEVICE; i++) {
|
|
+ tap = blktaps[i];
|
|
+ if (!tap)
|
|
+ continue;
|
|
+
|
|
+ if (!test_bit(BLKTAP_DEVICE, &tap->dev_inuse))
|
|
+ continue;
|
|
+
|
|
+ ret += sprintf(buf + ret, "%d ", tap->minor);
|
|
+ ret += snprintf(buf + ret, sizeof(tap->params.name) - 1,
|
|
+ tap->params.name);
|
|
+ ret += sprintf(buf + ret, "\n");
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+static CLASS_ATTR(devices, S_IRUSR, blktap_sysfs_show_devices, NULL);
|
|
+
|
|
+void
|
|
+blktap_sysfs_free(void)
|
|
+{
|
|
+ if (!class)
|
|
+ return;
|
|
+
|
|
+ class_remove_file(class, &class_attr_verbosity);
|
|
+ class_remove_file(class, &class_attr_devices);
|
|
+
|
|
+ class_destroy(class);
|
|
+}
|
|
+
|
|
+int __init
|
|
+blktap_sysfs_init(void)
|
|
+{
|
|
+ struct class *cls;
|
|
+
|
|
+ if (class)
|
|
+ return -EEXIST;
|
|
+
|
|
+ cls = class_create(THIS_MODULE, "blktap2");
|
|
+ if (IS_ERR(cls))
|
|
+ return PTR_ERR(cls);
|
|
+
|
|
+ class_create_file(cls, &class_attr_verbosity);
|
|
+ class_create_file(cls, &class_attr_devices);
|
|
+
|
|
+ class = cls;
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/blktap2/wait_queue.c 2009-05-29 10:25:53.000000000 +0200
|
|
@@ -0,0 +1,40 @@
|
|
+#include <linux/list.h>
|
|
+#include <linux/spinlock.h>
|
|
+
|
|
+#include "blktap.h"
|
|
+
|
|
+static LIST_HEAD(deferred_work_queue);
|
|
+static DEFINE_SPINLOCK(deferred_work_lock);
|
|
+
|
|
+void
|
|
+blktap_run_deferred(void)
|
|
+{
|
|
+ LIST_HEAD(queue);
|
|
+ struct blktap *tap;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&deferred_work_lock, flags);
|
|
+ list_splice_init(&deferred_work_queue, &queue);
|
|
+ list_for_each_entry(tap, &queue, deferred_queue)
|
|
+ clear_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
|
|
+ spin_unlock_irqrestore(&deferred_work_lock, flags);
|
|
+
|
|
+ while (!list_empty(&queue)) {
|
|
+ tap = list_entry(queue.next, struct blktap, deferred_queue);
|
|
+ list_del_init(&tap->deferred_queue);
|
|
+ blktap_device_restart(tap);
|
|
+ }
|
|
+}
|
|
+
|
|
+void
|
|
+blktap_defer(struct blktap *tap)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&deferred_work_lock, flags);
|
|
+ if (!test_bit(BLKTAP_DEFERRED, &tap->dev_inuse)) {
|
|
+ set_bit(BLKTAP_DEFERRED, &tap->dev_inuse);
|
|
+ list_add_tail(&tap->deferred_queue, &deferred_work_queue);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&deferred_work_lock, flags);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/char/Makefile 2007-07-10 09:42:30.000000000 +0200
|
|
@@ -0,0 +1 @@
|
|
+obj-$(CONFIG_XEN_DEVMEM) := mem.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200
|
|
@@ -0,0 +1,190 @@
|
|
+/*
|
|
+ * Originally from linux/drivers/char/mem.c
|
|
+ *
|
|
+ * Copyright (C) 1991, 1992 Linus Torvalds
|
|
+ *
|
|
+ * Added devfs support.
|
|
+ * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
|
|
+ * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
|
|
+ */
|
|
+
|
|
+#include <linux/mm.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/mman.h>
|
|
+#include <linux/random.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/raw.h>
|
|
+#include <linux/tty.h>
|
|
+#include <linux/capability.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/device.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+static inline int uncached_access(struct file *file)
|
|
+{
|
|
+ if (file->f_flags & O_SYNC)
|
|
+ return 1;
|
|
+ /* Xen sets correct MTRR type on non-RAM for us. */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This funcion reads the *physical* memory. The f_pos points directly to the
|
|
+ * memory location.
|
|
+ */
|
|
+static ssize_t read_mem(struct file * file, char __user * buf,
|
|
+ size_t count, loff_t *ppos)
|
|
+{
|
|
+ unsigned long p = *ppos, ignored;
|
|
+ ssize_t read = 0, sz;
|
|
+ void __iomem *v;
|
|
+
|
|
+ while (count > 0) {
|
|
+ /*
|
|
+ * Handle first page in case it's not aligned
|
|
+ */
|
|
+ if (-p & (PAGE_SIZE - 1))
|
|
+ sz = -p & (PAGE_SIZE - 1);
|
|
+ else
|
|
+ sz = PAGE_SIZE;
|
|
+
|
|
+ sz = min_t(unsigned long, sz, count);
|
|
+
|
|
+ v = ioremap(p, sz);
|
|
+ if (IS_ERR(v) || v == NULL) {
|
|
+ /*
|
|
+ * Some programs (e.g., dmidecode) groove off into
|
|
+ * weird RAM areas where no tables can possibly exist
|
|
+ * (because Xen will have stomped on them!). These
|
|
+ * programs get rather upset if we let them know that
|
|
+ * Xen failed their access, so we fake out a read of
|
|
+ * all zeroes.
|
|
+ */
|
|
+ if (clear_user(buf, count))
|
|
+ return -EFAULT;
|
|
+ read += count;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ignored = copy_to_user(buf, v, sz);
|
|
+ iounmap(v);
|
|
+ if (ignored)
|
|
+ return -EFAULT;
|
|
+ buf += sz;
|
|
+ p += sz;
|
|
+ count -= sz;
|
|
+ read += sz;
|
|
+ }
|
|
+
|
|
+ *ppos += read;
|
|
+ return read;
|
|
+}
|
|
+
|
|
+static ssize_t write_mem(struct file * file, const char __user * buf,
|
|
+ size_t count, loff_t *ppos)
|
|
+{
|
|
+ unsigned long p = *ppos, ignored;
|
|
+ ssize_t written = 0, sz;
|
|
+ void __iomem *v;
|
|
+
|
|
+ while (count > 0) {
|
|
+ /*
|
|
+ * Handle first page in case it's not aligned
|
|
+ */
|
|
+ if (-p & (PAGE_SIZE - 1))
|
|
+ sz = -p & (PAGE_SIZE - 1);
|
|
+ else
|
|
+ sz = PAGE_SIZE;
|
|
+
|
|
+ sz = min_t(unsigned long, sz, count);
|
|
+
|
|
+ v = ioremap(p, sz);
|
|
+ if (v == NULL)
|
|
+ break;
|
|
+ if (IS_ERR(v)) {
|
|
+ if (written == 0)
|
|
+ return PTR_ERR(v);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ignored = copy_from_user(v, buf, sz);
|
|
+ iounmap(v);
|
|
+ if (ignored) {
|
|
+ written += sz - ignored;
|
|
+ if (written)
|
|
+ break;
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ buf += sz;
|
|
+ p += sz;
|
|
+ count -= sz;
|
|
+ written += sz;
|
|
+ }
|
|
+
|
|
+ *ppos += written;
|
|
+ return written;
|
|
+}
|
|
+
|
|
+#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
|
|
+static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
|
|
+{
|
|
+ size_t size = vma->vm_end - vma->vm_start;
|
|
+
|
|
+ if (uncached_access(file))
|
|
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
|
|
+
|
|
+ /* We want to return the real error code, not EAGAIN. */
|
|
+ return direct_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
|
|
+ size, vma->vm_page_prot, DOMID_IO);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * The memory devices use the full 32/64 bits of the offset, and so we cannot
|
|
+ * check against negative addresses: they are ok. The return value is weird,
|
|
+ * though, in that case (0).
|
|
+ *
|
|
+ * also note that seeking relative to the "end of file" isn't supported:
|
|
+ * it has no meaning, so it returns -EINVAL.
|
|
+ */
|
|
+static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
|
|
+{
|
|
+ loff_t ret;
|
|
+
|
|
+ mutex_lock(&file->f_dentry->d_inode->i_mutex);
|
|
+ switch (orig) {
|
|
+ case 0:
|
|
+ file->f_pos = offset;
|
|
+ ret = file->f_pos;
|
|
+ force_successful_syscall_return();
|
|
+ break;
|
|
+ case 1:
|
|
+ file->f_pos += offset;
|
|
+ ret = file->f_pos;
|
|
+ force_successful_syscall_return();
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EINVAL;
|
|
+ }
|
|
+ mutex_unlock(&file->f_dentry->d_inode->i_mutex);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int open_mem(struct inode * inode, struct file * filp)
|
|
+{
|
|
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
+}
|
|
+
|
|
+const struct file_operations mem_fops = {
|
|
+ .llseek = memory_lseek,
|
|
+ .read = read_mem,
|
|
+ .write = write_mem,
|
|
+ .mmap = xen_mmap_mem,
|
|
+ .open = open_mem,
|
|
+};
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/console/Makefile 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,2 @@
|
|
+
|
|
+obj-y := console.o xencons_ring.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/console/console.c 2009-03-18 10:39:31.000000000 +0100
|
|
@@ -0,0 +1,753 @@
|
|
+/******************************************************************************
|
|
+ * console.c
|
|
+ *
|
|
+ * Virtual console driver.
|
|
+ *
|
|
+ * Copyright (c) 2002-2004, K A Fraser.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/signal.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/tty.h>
|
|
+#include <linux/tty_flip.h>
|
|
+#include <linux/serial.h>
|
|
+#include <linux/major.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/console.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/sysrq.h>
|
|
+#include <linux/screen_info.h>
|
|
+#include <linux/vt.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/irq.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/event_channel.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/xencons.h>
|
|
+
|
|
+/*
|
|
+ * Modes:
|
|
+ * 'xencons=off' [XC_OFF]: Console is disabled.
|
|
+ * 'xencons=tty' [XC_TTY]: Console attached to '/dev/tty[0-9]+'.
|
|
+ * 'xencons=ttyS' [XC_SERIAL]: Console attached to '/dev/ttyS[0-9]+'.
|
|
+ * 'xencons=xvc' [XC_XVC]: Console attached to '/dev/xvc0'.
|
|
+ * 'xencons=hvc' [XC_HVC]: Console attached to '/dev/hvc0'.
|
|
+ * default: XC_XVC
|
|
+ *
|
|
+ * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
|
|
+ * warnings from standard distro startup scripts.
|
|
+ */
|
|
+static enum {
|
|
+ XC_OFF, XC_TTY, XC_SERIAL, XC_XVC, XC_HVC
|
|
+} xc_mode = XC_XVC;
|
|
+static int xc_num = -1;
|
|
+
|
|
+/* /dev/xvc0 device number allocated by lanana.org. */
|
|
+#define XEN_XVC_MAJOR 204
|
|
+#define XEN_XVC_MINOR 191
|
|
+
|
|
+/* /dev/hvc0 device number */
|
|
+#define XEN_HVC_MAJOR 229
|
|
+#define XEN_HVC_MINOR 0
|
|
+
|
|
+#ifdef CONFIG_MAGIC_SYSRQ
|
|
+static unsigned long sysrq_requested;
|
|
+extern int sysrq_enabled;
|
|
+#endif
|
|
+
|
|
+static int __init xencons_setup(char *str)
|
|
+{
|
|
+ char *q;
|
|
+ int n;
|
|
+ extern int console_use_vt;
|
|
+
|
|
+ console_use_vt = 1;
|
|
+ if (!strncmp(str, "ttyS", 4)) {
|
|
+ xc_mode = XC_SERIAL;
|
|
+ str += 4;
|
|
+ } else if (!strncmp(str, "tty", 3)) {
|
|
+ xc_mode = XC_TTY;
|
|
+ str += 3;
|
|
+ console_use_vt = 0;
|
|
+ } else if (!strncmp(str, "xvc", 3)) {
|
|
+ xc_mode = XC_XVC;
|
|
+ str += 3;
|
|
+ } else if (!strncmp(str, "hvc", 3)) {
|
|
+ xc_mode = XC_HVC;
|
|
+ str += 3;
|
|
+ } else if (!strncmp(str, "off", 3)) {
|
|
+ xc_mode = XC_OFF;
|
|
+ str += 3;
|
|
+ }
|
|
+
|
|
+ n = simple_strtol(str, &q, 10);
|
|
+ if (q != str)
|
|
+ xc_num = n;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+__setup("xencons=", xencons_setup);
|
|
+
|
|
+/* The kernel and user-land drivers share a common transmit buffer. */
|
|
+static unsigned int wbuf_size = 4096;
|
|
+#define WBUF_MASK(_i) ((_i)&(wbuf_size-1))
|
|
+static char *wbuf;
|
|
+static unsigned int wc, wp; /* write_cons, write_prod */
|
|
+
|
|
+static int __init xencons_bufsz_setup(char *str)
|
|
+{
|
|
+ unsigned int goal;
|
|
+ goal = simple_strtoul(str, NULL, 0);
|
|
+ if (goal) {
|
|
+ goal = roundup_pow_of_two(goal);
|
|
+ if (wbuf_size < goal)
|
|
+ wbuf_size = goal;
|
|
+ }
|
|
+ return 1;
|
|
+}
|
|
+__setup("xencons_bufsz=", xencons_bufsz_setup);
|
|
+
|
|
+/* This lock protects accesses to the common transmit buffer. */
|
|
+static DEFINE_SPINLOCK(xencons_lock);
|
|
+
|
|
+/* Common transmit-kick routine. */
|
|
+static void __xencons_tx_flush(void);
|
|
+
|
|
+static struct tty_driver *xencons_driver;
|
|
+
|
|
+/******************** Kernel console driver ********************************/
|
|
+
|
|
+static void kcons_write(struct console *c, const char *s, unsigned int count)
|
|
+{
|
|
+ int i = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+
|
|
+ while (i < count) {
|
|
+ for (; i < count; i++) {
|
|
+ if ((wp - wc) >= (wbuf_size - 1))
|
|
+ break;
|
|
+ if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
|
|
+ wbuf[WBUF_MASK(wp++)] = '\r';
|
|
+ }
|
|
+
|
|
+ __xencons_tx_flush();
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
|
|
+{
|
|
+
|
|
+ while (count > 0) {
|
|
+ int rc;
|
|
+ rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
|
|
+ if (rc <= 0)
|
|
+ break;
|
|
+ count -= rc;
|
|
+ s += rc;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct tty_driver *kcons_device(struct console *c, int *index)
|
|
+{
|
|
+ *index = 0;
|
|
+ return xencons_driver;
|
|
+}
|
|
+
|
|
+static struct console kcons_info = {
|
|
+ .device = kcons_device,
|
|
+ .flags = CON_PRINTBUFFER | CON_ENABLED,
|
|
+ .index = -1,
|
|
+};
|
|
+
|
|
+static int __init xen_console_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ goto out;
|
|
+
|
|
+ if (is_initial_xendomain()) {
|
|
+ kcons_info.write = kcons_write_dom0;
|
|
+ } else {
|
|
+ if (!xen_start_info->console.domU.evtchn)
|
|
+ goto out;
|
|
+ kcons_info.write = kcons_write;
|
|
+ }
|
|
+
|
|
+ switch (xc_mode) {
|
|
+ case XC_XVC:
|
|
+ strcpy(kcons_info.name, "xvc");
|
|
+ if (xc_num == -1)
|
|
+ xc_num = 0;
|
|
+ break;
|
|
+
|
|
+ case XC_HVC:
|
|
+ strcpy(kcons_info.name, "hvc");
|
|
+ if (xc_num == -1)
|
|
+ xc_num = 0;
|
|
+ if (!is_initial_xendomain())
|
|
+ add_preferred_console(kcons_info.name, xc_num, NULL);
|
|
+ break;
|
|
+
|
|
+ case XC_SERIAL:
|
|
+ strcpy(kcons_info.name, "ttyS");
|
|
+ if (xc_num == -1)
|
|
+ xc_num = 0;
|
|
+ break;
|
|
+
|
|
+ case XC_TTY:
|
|
+ strcpy(kcons_info.name, "tty");
|
|
+ if (xc_num == -1)
|
|
+ xc_num = 1;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ wbuf = alloc_bootmem(wbuf_size);
|
|
+
|
|
+ register_console(&kcons_info);
|
|
+
|
|
+ out:
|
|
+ return 0;
|
|
+}
|
|
+console_initcall(xen_console_init);
|
|
+
|
|
+/*** Useful function for console debugging -- goes straight to Xen. ***/
|
|
+asmlinkage int xprintk(const char *fmt, ...)
|
|
+{
|
|
+ va_list args;
|
|
+ int printk_len;
|
|
+ static char printk_buf[1024];
|
|
+
|
|
+ /* Emit the output into the temporary buffer */
|
|
+ va_start(args, fmt);
|
|
+ printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
|
|
+ va_end(args);
|
|
+
|
|
+ /* Send the processed output directly to Xen. */
|
|
+ kcons_write_dom0(NULL, printk_buf, printk_len);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*** Forcibly flush console data before dying. ***/
|
|
+void xencons_force_flush(void)
|
|
+{
|
|
+ int sz;
|
|
+
|
|
+ /* Emergency console is synchronous, so there's nothing to flush. */
|
|
+ if (!is_running_on_xen() ||
|
|
+ is_initial_xendomain() ||
|
|
+ !xen_start_info->console.domU.evtchn)
|
|
+ return;
|
|
+
|
|
+ /* Spin until console data is flushed through to the daemon. */
|
|
+ while (wc != wp) {
|
|
+ int sent = 0;
|
|
+ if ((sz = wp - wc) == 0)
|
|
+ continue;
|
|
+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
|
|
+ if (sent > 0)
|
|
+ wc += sent;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void __init dom0_init_screen_info(const struct dom0_vga_console_info *info, size_t size)
|
|
+{
|
|
+ /* This is drawn from a dump from vgacon:startup in
|
|
+ * standard Linux. */
|
|
+ screen_info.orig_video_mode = 3;
|
|
+ screen_info.orig_video_isVGA = 1;
|
|
+ screen_info.orig_video_lines = 25;
|
|
+ screen_info.orig_video_cols = 80;
|
|
+ screen_info.orig_video_ega_bx = 3;
|
|
+ screen_info.orig_video_points = 16;
|
|
+ screen_info.orig_y = screen_info.orig_video_lines - 1;
|
|
+
|
|
+ switch (info->video_type) {
|
|
+ case XEN_VGATYPE_TEXT_MODE_3:
|
|
+ if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
|
|
+ + sizeof(info->u.text_mode_3))
|
|
+ break;
|
|
+ screen_info.orig_video_lines = info->u.text_mode_3.rows;
|
|
+ screen_info.orig_video_cols = info->u.text_mode_3.columns;
|
|
+ screen_info.orig_x = info->u.text_mode_3.cursor_x;
|
|
+ screen_info.orig_y = info->u.text_mode_3.cursor_y;
|
|
+ screen_info.orig_video_points =
|
|
+ info->u.text_mode_3.font_height;
|
|
+ break;
|
|
+
|
|
+ case XEN_VGATYPE_VESA_LFB:
|
|
+ if (size < offsetof(struct dom0_vga_console_info,
|
|
+ u.vesa_lfb.gbl_caps))
|
|
+ break;
|
|
+ screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
|
|
+ screen_info.lfb_width = info->u.vesa_lfb.width;
|
|
+ screen_info.lfb_height = info->u.vesa_lfb.height;
|
|
+ screen_info.lfb_depth = info->u.vesa_lfb.bits_per_pixel;
|
|
+ screen_info.lfb_base = info->u.vesa_lfb.lfb_base;
|
|
+ screen_info.lfb_size = info->u.vesa_lfb.lfb_size;
|
|
+ screen_info.lfb_linelength = info->u.vesa_lfb.bytes_per_line;
|
|
+ screen_info.red_size = info->u.vesa_lfb.red_size;
|
|
+ screen_info.red_pos = info->u.vesa_lfb.red_pos;
|
|
+ screen_info.green_size = info->u.vesa_lfb.green_size;
|
|
+ screen_info.green_pos = info->u.vesa_lfb.green_pos;
|
|
+ screen_info.blue_size = info->u.vesa_lfb.blue_size;
|
|
+ screen_info.blue_pos = info->u.vesa_lfb.blue_pos;
|
|
+ screen_info.rsvd_size = info->u.vesa_lfb.rsvd_size;
|
|
+ screen_info.rsvd_pos = info->u.vesa_lfb.rsvd_pos;
|
|
+ if (size >= offsetof(struct dom0_vga_console_info,
|
|
+ u.vesa_lfb.gbl_caps)
|
|
+ + sizeof(info->u.vesa_lfb.gbl_caps))
|
|
+ screen_info.capabilities = info->u.vesa_lfb.gbl_caps;
|
|
+ if (size >= offsetof(struct dom0_vga_console_info,
|
|
+ u.vesa_lfb.mode_attrs)
|
|
+ + sizeof(info->u.vesa_lfb.mode_attrs))
|
|
+ screen_info.vesa_attributes = info->u.vesa_lfb.mode_attrs;
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/******************** User-space console driver (/dev/console) ************/
|
|
+
|
|
+#define DRV(_d) (_d)
|
|
+#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
|
|
+ ((_tty)->index != (xc_num - 1)))
|
|
+
|
|
+static struct termios *xencons_termios[MAX_NR_CONSOLES];
|
|
+static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
|
|
+static struct tty_struct *xencons_tty;
|
|
+static int xencons_priv_irq;
|
|
+static char x_char;
|
|
+
|
|
+void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ if (xencons_tty == NULL)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < len; i++) {
|
|
+#ifdef CONFIG_MAGIC_SYSRQ
|
|
+ if (sysrq_enabled) {
|
|
+ if (buf[i] == '\x0f') { /* ^O */
|
|
+ if (!sysrq_requested) {
|
|
+ sysrq_requested = jiffies;
|
|
+ continue; /* don't print sysrq key */
|
|
+ }
|
|
+ sysrq_requested = 0;
|
|
+ } else if (sysrq_requested) {
|
|
+ unsigned long sysrq_timeout =
|
|
+ sysrq_requested + HZ*2;
|
|
+ sysrq_requested = 0;
|
|
+ if (time_before(jiffies, sysrq_timeout)) {
|
|
+ spin_unlock_irqrestore(
|
|
+ &xencons_lock, flags);
|
|
+ handle_sysrq(
|
|
+ buf[i], regs, xencons_tty);
|
|
+ spin_lock_irqsave(
|
|
+ &xencons_lock, flags);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ tty_insert_flip_char(xencons_tty, buf[i], 0);
|
|
+ }
|
|
+ tty_flip_buffer_push(xencons_tty);
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static void __xencons_tx_flush(void)
|
|
+{
|
|
+ int sent, sz, work_done = 0;
|
|
+
|
|
+ if (x_char) {
|
|
+ if (is_initial_xendomain())
|
|
+ kcons_write_dom0(NULL, &x_char, 1);
|
|
+ else
|
|
+ while (x_char)
|
|
+ if (xencons_ring_send(&x_char, 1) == 1)
|
|
+ break;
|
|
+ x_char = 0;
|
|
+ work_done = 1;
|
|
+ }
|
|
+
|
|
+ while (wc != wp) {
|
|
+ sz = wp - wc;
|
|
+ if (sz > (wbuf_size - WBUF_MASK(wc)))
|
|
+ sz = wbuf_size - WBUF_MASK(wc);
|
|
+ if (is_initial_xendomain()) {
|
|
+ kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
|
|
+ wc += sz;
|
|
+ } else {
|
|
+ sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
|
|
+ if (sent == 0)
|
|
+ break;
|
|
+ wc += sent;
|
|
+ }
|
|
+ work_done = 1;
|
|
+ }
|
|
+
|
|
+ if (work_done && (xencons_tty != NULL)) {
|
|
+ wake_up_interruptible(&xencons_tty->write_wait);
|
|
+ if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
|
|
+ (xencons_tty->ldisc.write_wakeup != NULL))
|
|
+ (xencons_tty->ldisc.write_wakeup)(xencons_tty);
|
|
+ }
|
|
+}
|
|
+
|
|
+void xencons_tx(void)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ __xencons_tx_flush();
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+/* Privileged receive callback and transmit kicker. */
|
|
+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
|
|
+ struct pt_regs *regs)
|
|
+{
|
|
+ static char rbuf[16];
|
|
+ int l;
|
|
+
|
|
+ while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
|
|
+ xencons_rx(rbuf, l, regs);
|
|
+
|
|
+ xencons_tx();
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static int xencons_write_room(struct tty_struct *tty)
|
|
+{
|
|
+ return wbuf_size - (wp - wc);
|
|
+}
|
|
+
|
|
+static int xencons_chars_in_buffer(struct tty_struct *tty)
|
|
+{
|
|
+ return wp - wc;
|
|
+}
|
|
+
|
|
+static void xencons_send_xchar(struct tty_struct *tty, char ch)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ x_char = ch;
|
|
+ __xencons_tx_flush();
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static void xencons_throttle(struct tty_struct *tty)
|
|
+{
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ if (I_IXOFF(tty))
|
|
+ xencons_send_xchar(tty, STOP_CHAR(tty));
|
|
+}
|
|
+
|
|
+static void xencons_unthrottle(struct tty_struct *tty)
|
|
+{
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ if (I_IXOFF(tty)) {
|
|
+ if (x_char != 0)
|
|
+ x_char = 0;
|
|
+ else
|
|
+ xencons_send_xchar(tty, START_CHAR(tty));
|
|
+ }
|
|
+}
|
|
+
|
|
+static void xencons_flush_buffer(struct tty_struct *tty)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ wc = wp = 0;
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static inline int __xencons_put_char(int ch)
|
|
+{
|
|
+ char _ch = (char)ch;
|
|
+ if ((wp - wc) == wbuf_size)
|
|
+ return 0;
|
|
+ wbuf[WBUF_MASK(wp++)] = _ch;
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int xencons_write(
|
|
+ struct tty_struct *tty,
|
|
+ const unsigned char *buf,
|
|
+ int count)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return count;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+
|
|
+ for (i = 0; i < count; i++)
|
|
+ if (!__xencons_put_char(buf[i]))
|
|
+ break;
|
|
+
|
|
+ if (i != 0)
|
|
+ __xencons_tx_flush();
|
|
+
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+
|
|
+ return i;
|
|
+}
|
|
+
|
|
+static void xencons_put_char(struct tty_struct *tty, u_char ch)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ (void)__xencons_put_char(ch);
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static void xencons_flush_chars(struct tty_struct *tty)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ __xencons_tx_flush();
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
|
|
+{
|
|
+ unsigned long orig_jiffies = jiffies;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ while (DRV(tty->driver)->chars_in_buffer(tty)) {
|
|
+ set_current_state(TASK_INTERRUPTIBLE);
|
|
+ schedule_timeout(1);
|
|
+ if (signal_pending(current))
|
|
+ break;
|
|
+ if (timeout && time_after(jiffies, orig_jiffies + timeout))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ set_current_state(TASK_RUNNING);
|
|
+}
|
|
+
|
|
+static int xencons_open(struct tty_struct *tty, struct file *filp)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return 0;
|
|
+
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ tty->driver_data = NULL;
|
|
+ if (xencons_tty == NULL)
|
|
+ xencons_tty = tty;
|
|
+ __xencons_tx_flush();
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xencons_close(struct tty_struct *tty, struct file *filp)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (DUMMY_TTY(tty))
|
|
+ return;
|
|
+
|
|
+ mutex_lock(&tty_mutex);
|
|
+
|
|
+ if (tty->count != 1) {
|
|
+ mutex_unlock(&tty_mutex);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Prevent other threads from re-opening this tty. */
|
|
+ set_bit(TTY_CLOSING, &tty->flags);
|
|
+ mutex_unlock(&tty_mutex);
|
|
+
|
|
+ tty->closing = 1;
|
|
+ tty_wait_until_sent(tty, 0);
|
|
+ if (DRV(tty->driver)->flush_buffer != NULL)
|
|
+ DRV(tty->driver)->flush_buffer(tty);
|
|
+ if (tty->ldisc.flush_buffer != NULL)
|
|
+ tty->ldisc.flush_buffer(tty);
|
|
+ tty->closing = 0;
|
|
+ spin_lock_irqsave(&xencons_lock, flags);
|
|
+ xencons_tty = NULL;
|
|
+ spin_unlock_irqrestore(&xencons_lock, flags);
|
|
+}
|
|
+
|
|
+static struct tty_operations xencons_ops = {
|
|
+ .open = xencons_open,
|
|
+ .close = xencons_close,
|
|
+ .write = xencons_write,
|
|
+ .write_room = xencons_write_room,
|
|
+ .put_char = xencons_put_char,
|
|
+ .flush_chars = xencons_flush_chars,
|
|
+ .chars_in_buffer = xencons_chars_in_buffer,
|
|
+ .send_xchar = xencons_send_xchar,
|
|
+ .flush_buffer = xencons_flush_buffer,
|
|
+ .throttle = xencons_throttle,
|
|
+ .unthrottle = xencons_unthrottle,
|
|
+ .wait_until_sent = xencons_wait_until_sent,
|
|
+};
|
|
+
|
|
+static int __init xencons_init(void)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (xc_mode == XC_OFF)
|
|
+ return 0;
|
|
+
|
|
+ if (!is_initial_xendomain()) {
|
|
+ rc = xencons_ring_init();
|
|
+ if (rc)
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ xencons_driver = alloc_tty_driver((xc_mode == XC_TTY) ?
|
|
+ MAX_NR_CONSOLES : 1);
|
|
+ if (xencons_driver == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ DRV(xencons_driver)->name = "xencons";
|
|
+ DRV(xencons_driver)->major = TTY_MAJOR;
|
|
+ DRV(xencons_driver)->type = TTY_DRIVER_TYPE_SERIAL;
|
|
+ DRV(xencons_driver)->subtype = SERIAL_TYPE_NORMAL;
|
|
+ DRV(xencons_driver)->init_termios = tty_std_termios;
|
|
+ DRV(xencons_driver)->flags =
|
|
+ TTY_DRIVER_REAL_RAW |
|
|
+ TTY_DRIVER_RESET_TERMIOS;
|
|
+ DRV(xencons_driver)->termios = xencons_termios;
|
|
+ DRV(xencons_driver)->termios_locked = xencons_termios_locked;
|
|
+
|
|
+ switch (xc_mode) {
|
|
+ case XC_XVC:
|
|
+ DRV(xencons_driver)->name = "xvc";
|
|
+ DRV(xencons_driver)->major = XEN_XVC_MAJOR;
|
|
+ DRV(xencons_driver)->minor_start = XEN_XVC_MINOR;
|
|
+ DRV(xencons_driver)->name_base = xc_num;
|
|
+ break;
|
|
+ case XC_HVC:
|
|
+ DRV(xencons_driver)->name = "hvc";
|
|
+ DRV(xencons_driver)->major = XEN_HVC_MAJOR;
|
|
+ DRV(xencons_driver)->minor_start = XEN_HVC_MINOR;
|
|
+ DRV(xencons_driver)->name_base = xc_num;
|
|
+ break;
|
|
+ case XC_SERIAL:
|
|
+ DRV(xencons_driver)->name = "ttyS";
|
|
+ DRV(xencons_driver)->minor_start = 64 + xc_num;
|
|
+ DRV(xencons_driver)->name_base = xc_num;
|
|
+ break;
|
|
+ default:
|
|
+ DRV(xencons_driver)->name = "tty";
|
|
+ DRV(xencons_driver)->minor_start = 1;
|
|
+ DRV(xencons_driver)->name_base = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ tty_set_operations(xencons_driver, &xencons_ops);
|
|
+
|
|
+ if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
|
|
+ printk("WARNING: Failed to register Xen virtual "
|
|
+ "console driver as '%s%d'\n",
|
|
+ DRV(xencons_driver)->name,
|
|
+ DRV(xencons_driver)->name_base);
|
|
+ put_tty_driver(xencons_driver);
|
|
+ xencons_driver = NULL;
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ if (is_initial_xendomain()) {
|
|
+ xencons_priv_irq = bind_virq_to_irqhandler(
|
|
+ VIRQ_CONSOLE,
|
|
+ 0,
|
|
+ xencons_priv_interrupt,
|
|
+ 0,
|
|
+ "console",
|
|
+ NULL);
|
|
+ BUG_ON(xencons_priv_irq < 0);
|
|
+ }
|
|
+
|
|
+ printk("Xen virtual console successfully installed as %s%d\n",
|
|
+ DRV(xencons_driver)->name, xc_num);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+module_init(xencons_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/console/xencons_ring.c 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,143 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/signal.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/tty.h>
|
|
+#include <linux/tty_flip.h>
|
|
+#include <linux/serial.h>
|
|
+#include <linux/major.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/slab.h>
|
|
+
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/xencons.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/err.h>
|
|
+#include <xen/interface/io/console.h>
|
|
+
|
|
+static int xencons_irq;
|
|
+
|
|
+static inline struct xencons_interface *xencons_interface(void)
|
|
+{
|
|
+ return mfn_to_virt(xen_start_info->console.domU.mfn);
|
|
+}
|
|
+
|
|
+static inline void notify_daemon(void)
|
|
+{
|
|
+ /* Use evtchn: this is called early, before irq is set up. */
|
|
+ notify_remote_via_evtchn(xen_start_info->console.domU.evtchn);
|
|
+}
|
|
+
|
|
+int xencons_ring_send(const char *data, unsigned len)
|
|
+{
|
|
+ int sent = 0;
|
|
+ struct xencons_interface *intf = xencons_interface();
|
|
+ XENCONS_RING_IDX cons, prod;
|
|
+
|
|
+ cons = intf->out_cons;
|
|
+ prod = intf->out_prod;
|
|
+ mb();
|
|
+ BUG_ON((prod - cons) > sizeof(intf->out));
|
|
+
|
|
+ while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
|
|
+ intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
|
|
+
|
|
+ wmb();
|
|
+ intf->out_prod = prod;
|
|
+
|
|
+ notify_daemon();
|
|
+
|
|
+ return sent;
|
|
+}
|
|
+
|
|
+static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
|
|
+{
|
|
+ struct xencons_interface *intf = xencons_interface();
|
|
+ XENCONS_RING_IDX cons, prod;
|
|
+
|
|
+ cons = intf->in_cons;
|
|
+ prod = intf->in_prod;
|
|
+ mb();
|
|
+ BUG_ON((prod - cons) > sizeof(intf->in));
|
|
+
|
|
+ while (cons != prod) {
|
|
+ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
|
|
+ cons++;
|
|
+ }
|
|
+
|
|
+ mb();
|
|
+ intf->in_cons = cons;
|
|
+
|
|
+ notify_daemon();
|
|
+
|
|
+ xencons_tx();
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+int xencons_ring_init(void)
|
|
+{
|
|
+ int irq;
|
|
+
|
|
+ if (xencons_irq)
|
|
+ unbind_from_irqhandler(xencons_irq, NULL);
|
|
+ xencons_irq = 0;
|
|
+
|
|
+ if (!is_running_on_xen() ||
|
|
+ is_initial_xendomain() ||
|
|
+ !xen_start_info->console.domU.evtchn)
|
|
+ return -ENODEV;
|
|
+
|
|
+ irq = bind_caller_port_to_irqhandler(
|
|
+ xen_start_info->console.domU.evtchn,
|
|
+ handle_input, 0, "xencons", NULL);
|
|
+ if (irq < 0) {
|
|
+ printk(KERN_ERR "XEN console request irq failed %i\n", irq);
|
|
+ return irq;
|
|
+ }
|
|
+
|
|
+ xencons_irq = irq;
|
|
+
|
|
+ /* In case we have in-flight data after save/restore... */
|
|
+ notify_daemon();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void xencons_resume(void)
|
|
+{
|
|
+ (void)xencons_ring_init();
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/Makefile 2008-07-21 11:00:33.000000000 +0200
|
|
@@ -0,0 +1,14 @@
|
|
+#
|
|
+# Makefile for the linux kernel.
|
|
+#
|
|
+
|
|
+obj-y := evtchn.o gnttab.o features.o reboot.o machine_reboot.o firmware.o
|
|
+
|
|
+obj-$(CONFIG_PCI) += pci.o
|
|
+obj-$(CONFIG_PROC_FS) += xen_proc.o
|
|
+obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o
|
|
+obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
|
|
+obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
|
|
+obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
|
|
+obj-$(CONFIG_KEXEC) += machine_kexec.o
|
|
+obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/cpu_hotplug.c 2011-01-24 12:06:05.000000000 +0100
|
|
@@ -0,0 +1,179 @@
|
|
+#include <linux/init.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/kobject.h>
|
|
+#include <linux/notifier.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <xen/cpu_hotplug.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+/*
|
|
+ * Set of CPUs that remote admin software will allow us to bring online.
|
|
+ * Notified to us via xenbus.
|
|
+ */
|
|
+static cpumask_t xenbus_allowed_cpumask;
|
|
+
|
|
+/* Set of CPUs that local admin will allow us to bring online. */
|
|
+static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
|
|
+
|
|
+static int local_cpu_hotplug_request(void)
|
|
+{
|
|
+ /*
|
|
+ * We assume a CPU hotplug request comes from local admin if it is made
|
|
+ * via a userspace process (i.e., one with a real mm_struct).
|
|
+ */
|
|
+ return (current->mm != NULL);
|
|
+}
|
|
+
|
|
+static void vcpu_hotplug(unsigned int cpu, struct sys_device *dev)
|
|
+{
|
|
+ int err;
|
|
+ char dir[32], state[32];
|
|
+
|
|
+ if ((cpu >= NR_CPUS) || !cpu_possible(cpu))
|
|
+ return;
|
|
+
|
|
+ sprintf(dir, "cpu/%u", cpu);
|
|
+ err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
|
|
+ if (err != 1) {
|
|
+ printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (strcmp(state, "online") == 0) {
|
|
+ cpu_set(cpu, xenbus_allowed_cpumask);
|
|
+ if (!cpu_up(cpu) && dev)
|
|
+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
|
|
+ } else if (strcmp(state, "offline") == 0) {
|
|
+ cpu_clear(cpu, xenbus_allowed_cpumask);
|
|
+ if (!cpu_down(cpu) && dev)
|
|
+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
|
|
+ } else {
|
|
+ printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
|
|
+ state, cpu);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void handle_vcpu_hotplug_event(
|
|
+ struct xenbus_watch *watch, const char **vec, unsigned int len)
|
|
+{
|
|
+ unsigned int cpu;
|
|
+ char *cpustr;
|
|
+ const char *node = vec[XS_WATCH_PATH];
|
|
+
|
|
+ if ((cpustr = strstr(node, "cpu/")) != NULL) {
|
|
+ sscanf(cpustr, "cpu/%u", &cpu);
|
|
+ vcpu_hotplug(cpu, get_cpu_sysdev(cpu));
|
|
+ }
|
|
+}
|
|
+
|
|
+static int smpboot_cpu_notify(struct notifier_block *notifier,
|
|
+ unsigned long action, void *hcpu)
|
|
+{
|
|
+ unsigned int cpu = (long)hcpu;
|
|
+
|
|
+ /*
|
|
+ * We do this in a callback notifier rather than __cpu_disable()
|
|
+ * because local_cpu_hotplug_request() does not work in the latter
|
|
+ * as it's always executed from within a stopmachine kthread.
|
|
+ */
|
|
+ if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
|
|
+ cpu_clear(cpu, local_allowed_cpumask);
|
|
+
|
|
+ return NOTIFY_OK;
|
|
+}
|
|
+
|
|
+static int setup_cpu_watcher(struct notifier_block *notifier,
|
|
+ unsigned long event, void *data)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ static struct xenbus_watch cpu_watch = {
|
|
+ .node = "cpu",
|
|
+ .callback = handle_vcpu_hotplug_event,
|
|
+ .flags = XBWF_new_thread };
|
|
+ (void)register_xenbus_watch(&cpu_watch);
|
|
+
|
|
+ if (!is_initial_xendomain()) {
|
|
+ for_each_possible_cpu(i)
|
|
+ vcpu_hotplug(i, get_cpu_sysdev(i));
|
|
+ printk(KERN_INFO "Brought up %ld CPUs\n",
|
|
+ (long)num_online_cpus());
|
|
+ }
|
|
+
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+
|
|
+static int __init setup_vcpu_hotplug_event(void)
|
|
+{
|
|
+ static struct notifier_block hotplug_cpu = {
|
|
+ .notifier_call = smpboot_cpu_notify };
|
|
+ static struct notifier_block xsn_cpu = {
|
|
+ .notifier_call = setup_cpu_watcher };
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ register_cpu_notifier(&hotplug_cpu);
|
|
+ register_xenstore_notifier(&xsn_cpu);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+arch_initcall(setup_vcpu_hotplug_event);
|
|
+
|
|
+int smp_suspend(void)
|
|
+{
|
|
+ unsigned int cpu;
|
|
+ int err;
|
|
+
|
|
+ for_each_online_cpu(cpu) {
|
|
+ if (cpu == 0)
|
|
+ continue;
|
|
+ err = cpu_down(cpu);
|
|
+ if (err) {
|
|
+ printk(KERN_CRIT "Failed to take all CPUs "
|
|
+ "down: %d.\n", err);
|
|
+ for_each_possible_cpu(cpu)
|
|
+ vcpu_hotplug(cpu, NULL);
|
|
+ return err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void smp_resume(void)
|
|
+{
|
|
+ unsigned int cpu;
|
|
+
|
|
+ for_each_possible_cpu(cpu) {
|
|
+ if (cpu == 0)
|
|
+ continue;
|
|
+ vcpu_hotplug(cpu, NULL);
|
|
+ }
|
|
+}
|
|
+
|
|
+int cpu_up_check(unsigned int cpu)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ if (local_cpu_hotplug_request()) {
|
|
+ cpu_set(cpu, local_allowed_cpumask);
|
|
+ if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
|
|
+ printk("%s: attempt to bring up CPU %u disallowed by "
|
|
+ "remote admin.\n", __FUNCTION__, cpu);
|
|
+ rc = -EBUSY;
|
|
+ }
|
|
+ } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
|
|
+ !cpu_isset(cpu, xenbus_allowed_cpumask)) {
|
|
+ rc = -EBUSY;
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+void init_xenbus_allowed_cpumask(void)
|
|
+{
|
|
+ xenbus_allowed_cpumask = cpu_present_map;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/evtchn.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,1204 @@
|
|
+/******************************************************************************
|
|
+ * evtchn.c
|
|
+ *
|
|
+ * Communication via Xen event channels.
|
|
+ *
|
|
+ * Copyright (c) 2002-2005, K A Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/irq.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/kernel_stat.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/version.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/system.h>
|
|
+#include <asm/ptrace.h>
|
|
+#include <asm/synch_bitops.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/event_channel.h>
|
|
+#include <xen/interface/physdev.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <linux/mc146818rtc.h> /* RTC_IRQ */
|
|
+
|
|
+/*
|
|
+ * This lock protects updates to the following mapping and reference-count
|
|
+ * arrays. The lock does not need to be acquired to read the mapping tables.
|
|
+ */
|
|
+static DEFINE_SPINLOCK(irq_mapping_update_lock);
|
|
+
|
|
+/* IRQ <-> event-channel mappings. */
|
|
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
|
|
+ [0 ... NR_EVENT_CHANNELS-1] = -1 };
|
|
+
|
|
+/* Packed IRQ information: binding type, sub-type index, and event channel. */
|
|
+static u32 irq_info[NR_IRQS];
|
|
+
|
|
+/* Binding types. */
|
|
+enum {
|
|
+ IRQT_UNBOUND,
|
|
+ IRQT_PIRQ,
|
|
+ IRQT_VIRQ,
|
|
+ IRQT_IPI,
|
|
+ IRQT_LOCAL_PORT,
|
|
+ IRQT_CALLER_PORT,
|
|
+ _IRQT_COUNT
|
|
+};
|
|
+
|
|
+#define _IRQT_BITS 4
|
|
+#define _EVTCHN_BITS 12
|
|
+#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
|
|
+
|
|
+/* Constructor for packed IRQ information. */
|
|
+static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
|
|
+{
|
|
+ BUILD_BUG_ON(_IRQT_COUNT > (1U << _IRQT_BITS));
|
|
+
|
|
+ BUILD_BUG_ON(NR_PIRQS > (1U << _INDEX_BITS));
|
|
+ BUILD_BUG_ON(NR_VIRQS > (1U << _INDEX_BITS));
|
|
+ BUILD_BUG_ON(NR_IPIS > (1U << _INDEX_BITS));
|
|
+ BUG_ON(index >> _INDEX_BITS);
|
|
+
|
|
+ BUILD_BUG_ON(NR_EVENT_CHANNELS > (1U << _EVTCHN_BITS));
|
|
+
|
|
+ return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
|
|
+}
|
|
+
|
|
+/* Convenient shorthand for packed representation of an unbound IRQ. */
|
|
+#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
|
|
+
|
|
+/*
|
|
+ * Accessors for packed IRQ information.
|
|
+ */
|
|
+
|
|
+static inline unsigned int evtchn_from_irq(int irq)
|
|
+{
|
|
+ return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
|
|
+}
|
|
+
|
|
+static inline unsigned int index_from_irq(int irq)
|
|
+{
|
|
+ return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
|
|
+}
|
|
+
|
|
+static inline unsigned int type_from_irq(int irq)
|
|
+{
|
|
+ return irq_info[irq] >> (32 - _IRQT_BITS);
|
|
+}
|
|
+
|
|
+/* IRQ <-> VIRQ mapping. */
|
|
+DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
|
|
+
|
|
+/* IRQ <-> IPI mapping. */
|
|
+#ifndef NR_IPIS
|
|
+#define NR_IPIS 1
|
|
+#endif
|
|
+DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
|
|
+
|
|
+/* Reference counts for bindings to IRQs. */
|
|
+static int irq_bindcount[NR_IRQS];
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+
|
|
+static u8 cpu_evtchn[NR_EVENT_CHANNELS];
|
|
+static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
|
|
+
|
|
+static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
|
|
+ unsigned int idx)
|
|
+{
|
|
+ return (sh->evtchn_pending[idx] &
|
|
+ cpu_evtchn_mask[cpu][idx] &
|
|
+ ~sh->evtchn_mask[idx]);
|
|
+}
|
|
+
|
|
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
|
|
+{
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+ int irq = evtchn_to_irq[chn];
|
|
+
|
|
+ BUG_ON(!test_bit(chn, s->evtchn_mask));
|
|
+
|
|
+ if (irq != -1)
|
|
+ set_native_irq_info(irq, cpumask_of_cpu(cpu));
|
|
+
|
|
+ clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
|
|
+ set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
|
|
+ cpu_evtchn[chn] = cpu;
|
|
+}
|
|
+
|
|
+static void init_evtchn_cpu_bindings(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* By default all event channels notify CPU#0. */
|
|
+ for (i = 0; i < NR_IRQS; i++)
|
|
+ set_native_irq_info(i, cpumask_of_cpu(0));
|
|
+
|
|
+ memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
|
|
+ for_each_possible_cpu(i)
|
|
+ memset(cpu_evtchn_mask[i],
|
|
+ (i == 0) ? ~0 : 0,
|
|
+ sizeof(cpu_evtchn_mask[i]));
|
|
+}
|
|
+
|
|
+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
|
|
+{
|
|
+ return cpu_evtchn[evtchn];
|
|
+}
|
|
+
|
|
+#else
|
|
+
|
|
+static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
|
|
+ unsigned int idx)
|
|
+{
|
|
+ return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
|
|
+}
|
|
+
|
|
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
|
|
+{
|
|
+}
|
|
+
|
|
+static void init_evtchn_cpu_bindings(void)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+/* Upcall to generic IRQ layer. */
|
|
+#ifdef CONFIG_X86
|
|
+extern fastcall unsigned int do_IRQ(struct pt_regs *regs);
|
|
+void __init xen_init_IRQ(void);
|
|
+void __init init_IRQ(void)
|
|
+{
|
|
+ irq_ctx_init(0);
|
|
+ xen_init_IRQ();
|
|
+}
|
|
+#if defined (__i386__)
|
|
+static inline void exit_idle(void) {}
|
|
+#define IRQ_REG orig_eax
|
|
+#elif defined (__x86_64__)
|
|
+#include <asm/idle.h>
|
|
+#define IRQ_REG orig_rax
|
|
+#endif
|
|
+#define do_IRQ(irq, regs) do { \
|
|
+ (regs)->IRQ_REG = ~(irq); \
|
|
+ do_IRQ((regs)); \
|
|
+} while (0)
|
|
+#endif
|
|
+
|
|
+/* Xen will never allocate port zero for any purpose. */
|
|
+#define VALID_EVTCHN(chn) ((chn) != 0)
|
|
+
|
|
+/*
|
|
+ * Force a proper event-channel callback from Xen after clearing the
|
|
+ * callback mask. We do this in a very simple manner, by making a call
|
|
+ * down into Xen. The pending flag will be checked by Xen on return.
|
|
+ */
|
|
+void force_evtchn_callback(void)
|
|
+{
|
|
+ VOID(HYPERVISOR_xen_version(0, NULL));
|
|
+}
|
|
+/* Not a GPL symbol: used in ubiquitous macros, so too restrictive. */
|
|
+EXPORT_SYMBOL(force_evtchn_callback);
|
|
+
|
|
+static DEFINE_PER_CPU(unsigned int, upcall_count);
|
|
+static DEFINE_PER_CPU(unsigned int, current_l1i);
|
|
+static DEFINE_PER_CPU(unsigned int, current_l2i);
|
|
+
|
|
+/* NB. Interrupts are disabled on entry. */
|
|
+asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
|
|
+{
|
|
+ unsigned long l1, l2;
|
|
+ unsigned long masked_l1, masked_l2;
|
|
+ unsigned int l1i, l2i, start_l1i, start_l2i, port, count, i;
|
|
+ int irq;
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+ vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
|
|
+
|
|
+ exit_idle();
|
|
+ irq_enter();
|
|
+
|
|
+ do {
|
|
+ /* Avoid a callback storm when we reenable delivery. */
|
|
+ vcpu_info->evtchn_upcall_pending = 0;
|
|
+
|
|
+ /* Nested invocations bail immediately. */
|
|
+ if (unlikely(per_cpu(upcall_count, cpu)++))
|
|
+ break;
|
|
+
|
|
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
|
|
+ /* Clear master flag /before/ clearing selector flag. */
|
|
+ wmb();
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * Handle timer interrupts before all others, so that all
|
|
+ * hardirq handlers see an up-to-date system time even if we
|
|
+ * have just woken from a long idle period.
|
|
+ */
|
|
+ if ((irq = __get_cpu_var(virq_to_irq)[VIRQ_TIMER]) != -1) {
|
|
+ port = evtchn_from_irq(irq);
|
|
+ l1i = port / BITS_PER_LONG;
|
|
+ l2i = port % BITS_PER_LONG;
|
|
+ if (active_evtchns(cpu, s, l1i) & (1ul<<l2i))
|
|
+ do_IRQ(irq, regs);
|
|
+ }
|
|
+
|
|
+ l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
|
|
+
|
|
+ start_l1i = l1i = per_cpu(current_l1i, cpu);
|
|
+ start_l2i = per_cpu(current_l2i, cpu);
|
|
+
|
|
+ for (i = 0; l1 != 0; i++) {
|
|
+ masked_l1 = l1 & ((~0UL) << l1i);
|
|
+ /* If we masked out all events, wrap to beginning. */
|
|
+ if (masked_l1 == 0) {
|
|
+ l1i = l2i = 0;
|
|
+ continue;
|
|
+ }
|
|
+ l1i = __ffs(masked_l1);
|
|
+
|
|
+ l2 = active_evtchns(cpu, s, l1i);
|
|
+ l2i = 0; /* usually scan entire word from start */
|
|
+ if (l1i == start_l1i) {
|
|
+ /* We scan the starting word in two parts. */
|
|
+ if (i == 0)
|
|
+ /* 1st time: start in the middle */
|
|
+ l2i = start_l2i;
|
|
+ else
|
|
+ /* 2nd time: mask bits done already */
|
|
+ l2 &= (1ul << start_l2i) - 1;
|
|
+ }
|
|
+
|
|
+ do {
|
|
+ masked_l2 = l2 & ((~0UL) << l2i);
|
|
+ if (masked_l2 == 0)
|
|
+ break;
|
|
+ l2i = __ffs(masked_l2);
|
|
+
|
|
+ /* process port */
|
|
+ port = (l1i * BITS_PER_LONG) + l2i;
|
|
+ if ((irq = evtchn_to_irq[port]) != -1)
|
|
+ do_IRQ(irq, regs);
|
|
+ else
|
|
+ evtchn_device_upcall(port);
|
|
+
|
|
+ l2i = (l2i + 1) % BITS_PER_LONG;
|
|
+
|
|
+ /* Next caller starts at last processed + 1 */
|
|
+ per_cpu(current_l1i, cpu) =
|
|
+ l2i ? l1i : (l1i + 1) % BITS_PER_LONG;
|
|
+ per_cpu(current_l2i, cpu) = l2i;
|
|
+
|
|
+ } while (l2i != 0);
|
|
+
|
|
+ /* Scan start_l1i twice; all others once. */
|
|
+ if ((l1i != start_l1i) || (i != 0))
|
|
+ l1 &= ~(1UL << l1i);
|
|
+
|
|
+ l1i = (l1i + 1) % BITS_PER_LONG;
|
|
+ }
|
|
+
|
|
+ /* If there were nested callbacks then we have more to do. */
|
|
+ count = per_cpu(upcall_count, cpu);
|
|
+ per_cpu(upcall_count, cpu) = 0;
|
|
+ } while (unlikely(count != 1));
|
|
+
|
|
+ irq_exit();
|
|
+}
|
|
+
|
|
+static int find_unbound_irq(void)
|
|
+{
|
|
+ static int warned;
|
|
+ int irq;
|
|
+
|
|
+ for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++)
|
|
+ if (irq_bindcount[irq] == 0)
|
|
+ return irq;
|
|
+
|
|
+ if (!warned) {
|
|
+ warned = 1;
|
|
+ printk(KERN_WARNING "No available IRQ to bind to: "
|
|
+ "increase NR_DYNIRQS.\n");
|
|
+ }
|
|
+
|
|
+ return -ENOSPC;
|
|
+}
|
|
+
|
|
+static int bind_caller_port_to_irq(unsigned int caller_port)
|
|
+{
|
|
+ int irq;
|
|
+
|
|
+ spin_lock(&irq_mapping_update_lock);
|
|
+
|
|
+ if ((irq = evtchn_to_irq[caller_port]) == -1) {
|
|
+ if ((irq = find_unbound_irq()) < 0)
|
|
+ goto out;
|
|
+
|
|
+ evtchn_to_irq[caller_port] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
|
|
+ }
|
|
+
|
|
+ irq_bindcount[irq]++;
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+static int bind_local_port_to_irq(unsigned int local_port)
|
|
+{
|
|
+ int irq;
|
|
+
|
|
+ spin_lock(&irq_mapping_update_lock);
|
|
+
|
|
+ BUG_ON(evtchn_to_irq[local_port] != -1);
|
|
+
|
|
+ if ((irq = find_unbound_irq()) < 0) {
|
|
+ struct evtchn_close close = { .port = local_port };
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
|
|
+ BUG();
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ evtchn_to_irq[local_port] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
|
|
+ irq_bindcount[irq]++;
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+static int bind_listening_port_to_irq(unsigned int remote_domain)
|
|
+{
|
|
+ struct evtchn_alloc_unbound alloc_unbound;
|
|
+ int err;
|
|
+
|
|
+ alloc_unbound.dom = DOMID_SELF;
|
|
+ alloc_unbound.remote_dom = remote_domain;
|
|
+
|
|
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
|
|
+ &alloc_unbound);
|
|
+
|
|
+ return err ? : bind_local_port_to_irq(alloc_unbound.port);
|
|
+}
|
|
+
|
|
+static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
|
|
+ unsigned int remote_port)
|
|
+{
|
|
+ struct evtchn_bind_interdomain bind_interdomain;
|
|
+ int err;
|
|
+
|
|
+ bind_interdomain.remote_dom = remote_domain;
|
|
+ bind_interdomain.remote_port = remote_port;
|
|
+
|
|
+ err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
|
|
+ &bind_interdomain);
|
|
+
|
|
+ return err ? : bind_local_port_to_irq(bind_interdomain.local_port);
|
|
+}
|
|
+
|
|
+static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
|
|
+{
|
|
+ struct evtchn_bind_virq bind_virq;
|
|
+ int evtchn, irq;
|
|
+
|
|
+ spin_lock(&irq_mapping_update_lock);
|
|
+
|
|
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
|
|
+ if ((irq = find_unbound_irq()) < 0)
|
|
+ goto out;
|
|
+
|
|
+ bind_virq.virq = virq;
|
|
+ bind_virq.vcpu = cpu;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
|
|
+ &bind_virq) != 0)
|
|
+ BUG();
|
|
+ evtchn = bind_virq.port;
|
|
+
|
|
+ evtchn_to_irq[evtchn] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
+
|
|
+ per_cpu(virq_to_irq, cpu)[virq] = irq;
|
|
+
|
|
+ bind_evtchn_to_cpu(evtchn, cpu);
|
|
+ }
|
|
+
|
|
+ irq_bindcount[irq]++;
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
|
|
+{
|
|
+ struct evtchn_bind_ipi bind_ipi;
|
|
+ int evtchn, irq;
|
|
+
|
|
+ spin_lock(&irq_mapping_update_lock);
|
|
+
|
|
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
|
|
+ if ((irq = find_unbound_irq()) < 0)
|
|
+ goto out;
|
|
+
|
|
+ bind_ipi.vcpu = cpu;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
|
|
+ &bind_ipi) != 0)
|
|
+ BUG();
|
|
+ evtchn = bind_ipi.port;
|
|
+
|
|
+ evtchn_to_irq[evtchn] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
+
|
|
+ per_cpu(ipi_to_irq, cpu)[ipi] = irq;
|
|
+
|
|
+ bind_evtchn_to_cpu(evtchn, cpu);
|
|
+ }
|
|
+
|
|
+ irq_bindcount[irq]++;
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+static void unbind_from_irq(unsigned int irq)
|
|
+{
|
|
+ struct evtchn_close close;
|
|
+ unsigned int cpu;
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ spin_lock(&irq_mapping_update_lock);
|
|
+
|
|
+ if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
|
|
+ close.port = evtchn;
|
|
+ if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
|
|
+ HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
|
|
+ BUG();
|
|
+
|
|
+ switch (type_from_irq(irq)) {
|
|
+ case IRQT_VIRQ:
|
|
+ per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
|
|
+ [index_from_irq(irq)] = -1;
|
|
+ break;
|
|
+ case IRQT_IPI:
|
|
+ per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
|
|
+ [index_from_irq(irq)] = -1;
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Closed ports are implicitly re-bound to VCPU0. */
|
|
+ bind_evtchn_to_cpu(evtchn, 0);
|
|
+
|
|
+ evtchn_to_irq[evtchn] = -1;
|
|
+ irq_info[irq] = IRQ_UNBOUND;
|
|
+
|
|
+ /* Zap stats across IRQ changes of use. */
|
|
+ for_each_possible_cpu(cpu)
|
|
+ kstat_cpu(cpu).irqs[irq] = 0;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&irq_mapping_update_lock);
|
|
+}
|
|
+
|
|
+int bind_caller_port_to_irqhandler(
|
|
+ unsigned int caller_port,
|
|
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_caller_port_to_irq(caller_port);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_caller_port_to_irqhandler);
|
|
+
|
|
+int bind_listening_port_to_irqhandler(
|
|
+ unsigned int remote_domain,
|
|
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_listening_port_to_irq(remote_domain);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_listening_port_to_irqhandler);
|
|
+
|
|
+int bind_interdomain_evtchn_to_irqhandler(
|
|
+ unsigned int remote_domain,
|
|
+ unsigned int remote_port,
|
|
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);
|
|
+
|
|
+int bind_virq_to_irqhandler(
|
|
+ unsigned int virq,
|
|
+ unsigned int cpu,
|
|
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_virq_to_irq(virq, cpu);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
|
|
+
|
|
+int bind_ipi_to_irqhandler(
|
|
+ unsigned int ipi,
|
|
+ unsigned int cpu,
|
|
+ irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ unsigned long irqflags,
|
|
+ const char *devname,
|
|
+ void *dev_id)
|
|
+{
|
|
+ int irq, retval;
|
|
+
|
|
+ irq = bind_ipi_to_irq(ipi, cpu);
|
|
+ if (irq < 0)
|
|
+ return irq;
|
|
+
|
|
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ if (retval != 0) {
|
|
+ unbind_from_irq(irq);
|
|
+ return retval;
|
|
+ }
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(bind_ipi_to_irqhandler);
|
|
+
|
|
+void unbind_from_irqhandler(unsigned int irq, void *dev_id)
|
|
+{
|
|
+ free_irq(irq, dev_id);
|
|
+ unbind_from_irq(irq);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+void rebind_evtchn_to_cpu(int port, unsigned int cpu)
|
|
+{
|
|
+ struct evtchn_bind_vcpu ebv = { .port = port, .vcpu = cpu };
|
|
+ int masked;
|
|
+
|
|
+ masked = test_and_set_evtchn_mask(port);
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &ebv) == 0)
|
|
+ bind_evtchn_to_cpu(port, cpu);
|
|
+ if (!masked)
|
|
+ unmask_evtchn(port);
|
|
+}
|
|
+
|
|
+static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ rebind_evtchn_to_cpu(evtchn, tcpu);
|
|
+}
|
|
+
|
|
+static void set_affinity_irq(unsigned int irq, cpumask_t dest)
|
|
+{
|
|
+ unsigned tcpu = first_cpu(dest);
|
|
+ rebind_irq_to_cpu(irq, tcpu);
|
|
+}
|
|
+#endif
|
|
+
|
|
+int resend_irq_on_evtchn(unsigned int irq)
|
|
+{
|
|
+ int masked, evtchn = evtchn_from_irq(irq);
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+
|
|
+ if (!VALID_EVTCHN(evtchn))
|
|
+ return 1;
|
|
+
|
|
+ masked = test_and_set_evtchn_mask(evtchn);
|
|
+ synch_set_bit(evtchn, s->evtchn_pending);
|
|
+ if (!masked)
|
|
+ unmask_evtchn(evtchn);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Interface to generic handling in irq.c
|
|
+ */
|
|
+
|
|
+static unsigned int startup_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ unmask_evtchn(evtchn);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void shutdown_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ mask_evtchn(evtchn);
|
|
+}
|
|
+
|
|
+static void enable_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ unmask_evtchn(evtchn);
|
|
+}
|
|
+
|
|
+static void disable_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ mask_evtchn(evtchn);
|
|
+}
|
|
+
|
|
+static void ack_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ move_native_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn)) {
|
|
+ mask_evtchn(evtchn);
|
|
+ clear_evtchn(evtchn);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void end_dynirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED))
|
|
+ unmask_evtchn(evtchn);
|
|
+}
|
|
+
|
|
+static struct hw_interrupt_type dynirq_type = {
|
|
+ .typename = "Dynamic-irq",
|
|
+ .startup = startup_dynirq,
|
|
+ .shutdown = shutdown_dynirq,
|
|
+ .enable = enable_dynirq,
|
|
+ .disable = disable_dynirq,
|
|
+ .ack = ack_dynirq,
|
|
+ .end = end_dynirq,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_affinity_irq,
|
|
+#endif
|
|
+ .retrigger = resend_irq_on_evtchn,
|
|
+};
|
|
+
|
|
+/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
|
|
+static int pirq_eoi_does_unmask;
|
|
+static unsigned long *pirq_needs_eoi;
|
|
+
|
|
+static void pirq_unmask_and_notify(unsigned int evtchn, unsigned int irq)
|
|
+{
|
|
+ struct physdev_eoi eoi = { .irq = evtchn_get_xen_pirq(irq) };
|
|
+
|
|
+ if (pirq_eoi_does_unmask) {
|
|
+ if (test_bit(eoi.irq, pirq_needs_eoi))
|
|
+ VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi));
|
|
+ else
|
|
+ unmask_evtchn(evtchn);
|
|
+ } else if (test_bit(irq - PIRQ_BASE, pirq_needs_eoi)) {
|
|
+ if (smp_processor_id() != cpu_from_evtchn(evtchn)) {
|
|
+ struct evtchn_unmask unmask = { .port = evtchn };
|
|
+ struct multicall_entry mcl[2];
|
|
+
|
|
+ mcl[0].op = __HYPERVISOR_event_channel_op;
|
|
+ mcl[0].args[0] = EVTCHNOP_unmask;
|
|
+ mcl[0].args[1] = (unsigned long)&unmask;
|
|
+ mcl[1].op = __HYPERVISOR_physdev_op;
|
|
+ mcl[1].args[0] = PHYSDEVOP_eoi;
|
|
+ mcl[1].args[1] = (unsigned long)&eoi;
|
|
+
|
|
+ if (HYPERVISOR_multicall(mcl, 2))
|
|
+ BUG();
|
|
+ } else {
|
|
+ unmask_evtchn(evtchn);
|
|
+ VOID(HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi));
|
|
+ }
|
|
+ } else
|
|
+ unmask_evtchn(evtchn);
|
|
+}
|
|
+
|
|
+static inline void pirq_query_unmask(int irq)
|
|
+{
|
|
+ struct physdev_irq_status_query irq_status;
|
|
+
|
|
+ if (pirq_eoi_does_unmask)
|
|
+ return;
|
|
+ irq_status.irq = evtchn_get_xen_pirq(irq);
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
|
|
+ irq_status.flags = 0;
|
|
+ clear_bit(irq - PIRQ_BASE, pirq_needs_eoi);
|
|
+ if (irq_status.flags & XENIRQSTAT_needs_eoi)
|
|
+ set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * On startup, if there is no action associated with the IRQ then we are
|
|
+ * probing. In this case we should not share with others as it will confuse us.
|
|
+ */
|
|
+#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
|
|
+
|
|
+static unsigned int startup_pirq(unsigned int irq)
|
|
+{
|
|
+ struct evtchn_bind_pirq bind_pirq;
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ goto out;
|
|
+
|
|
+ bind_pirq.pirq = evtchn_get_xen_pirq(irq);
|
|
+ /* NB. We are happy to share unless we are probing. */
|
|
+ bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
|
|
+ if (!probing_irq(irq))
|
|
+ printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
|
|
+ irq);
|
|
+ return 0;
|
|
+ }
|
|
+ evtchn = bind_pirq.port;
|
|
+
|
|
+ pirq_query_unmask(irq);
|
|
+
|
|
+ evtchn_to_irq[evtchn] = irq;
|
|
+ bind_evtchn_to_cpu(evtchn, 0);
|
|
+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
|
|
+
|
|
+ out:
|
|
+ pirq_unmask_and_notify(evtchn, irq);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void shutdown_pirq(unsigned int irq)
|
|
+{
|
|
+ struct evtchn_close close;
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (!VALID_EVTCHN(evtchn))
|
|
+ return;
|
|
+
|
|
+ mask_evtchn(evtchn);
|
|
+
|
|
+ close.port = evtchn;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
|
|
+ BUG();
|
|
+
|
|
+ bind_evtchn_to_cpu(evtchn, 0);
|
|
+ evtchn_to_irq[evtchn] = -1;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
|
|
+}
|
|
+
|
|
+static void enable_pirq(unsigned int irq)
|
|
+{
|
|
+ startup_pirq(irq);
|
|
+}
|
|
+
|
|
+static void disable_pirq(unsigned int irq)
|
|
+{
|
|
+}
|
|
+
|
|
+static void ack_pirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ move_native_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn)) {
|
|
+ mask_evtchn(evtchn);
|
|
+ clear_evtchn(evtchn);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void end_pirq(unsigned int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) ==
|
|
+ (IRQ_DISABLED|IRQ_PENDING)) {
|
|
+ shutdown_pirq(irq);
|
|
+ } else if (VALID_EVTCHN(evtchn))
|
|
+ pirq_unmask_and_notify(evtchn, irq);
|
|
+}
|
|
+
|
|
+static struct hw_interrupt_type pirq_type = {
|
|
+ .typename = "Phys-irq",
|
|
+ .startup = startup_pirq,
|
|
+ .shutdown = shutdown_pirq,
|
|
+ .enable = enable_pirq,
|
|
+ .disable = disable_pirq,
|
|
+ .ack = ack_pirq,
|
|
+ .end = end_pirq,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_affinity_irq,
|
|
+#endif
|
|
+ .retrigger = resend_irq_on_evtchn,
|
|
+};
|
|
+
|
|
+int irq_ignore_unhandled(unsigned int irq)
|
|
+{
|
|
+ struct physdev_irq_status_query irq_status = { .irq = irq };
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return 0;
|
|
+
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
|
|
+ return 0;
|
|
+ return !!(irq_status.flags & XENIRQSTAT_shared);
|
|
+}
|
|
+
|
|
+void notify_remote_via_irq(int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ notify_remote_via_evtchn(evtchn);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(notify_remote_via_irq);
|
|
+
|
|
+int irq_to_evtchn_port(int irq)
|
|
+{
|
|
+ return evtchn_from_irq(irq);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(irq_to_evtchn_port);
|
|
+
|
|
+void mask_evtchn(int port)
|
|
+{
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+ synch_set_bit(port, s->evtchn_mask);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(mask_evtchn);
|
|
+
|
|
+void unmask_evtchn(int port)
|
|
+{
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+ vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
|
|
+
|
|
+ BUG_ON(!irqs_disabled());
|
|
+
|
|
+ /* Slow path (hypercall) if this is a non-local port. */
|
|
+ if (unlikely(cpu != cpu_from_evtchn(port))) {
|
|
+ struct evtchn_unmask unmask = { .port = port };
|
|
+ VOID(HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ synch_clear_bit(port, s->evtchn_mask);
|
|
+
|
|
+ /* Did we miss an interrupt 'edge'? Re-fire if so. */
|
|
+ if (synch_test_bit(port, s->evtchn_pending) &&
|
|
+ !synch_test_and_set_bit(port / BITS_PER_LONG,
|
|
+ &vcpu_info->evtchn_pending_sel))
|
|
+ vcpu_info->evtchn_upcall_pending = 1;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(unmask_evtchn);
|
|
+
|
|
+void disable_all_local_evtchn(void)
|
|
+{
|
|
+ unsigned i, cpu = smp_processor_id();
|
|
+ shared_info_t *s = HYPERVISOR_shared_info;
|
|
+
|
|
+ for (i = 0; i < NR_EVENT_CHANNELS; ++i)
|
|
+ if (cpu_from_evtchn(i) == cpu)
|
|
+ synch_set_bit(i, &s->evtchn_mask[0]);
|
|
+}
|
|
+
|
|
+static void restore_cpu_virqs(unsigned int cpu)
|
|
+{
|
|
+ struct evtchn_bind_virq bind_virq;
|
|
+ int virq, irq, evtchn;
|
|
+
|
|
+ for (virq = 0; virq < NR_VIRQS; virq++) {
|
|
+ if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
|
|
+ continue;
|
|
+
|
|
+ BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
|
|
+
|
|
+ /* Get a new binding from Xen. */
|
|
+ bind_virq.virq = virq;
|
|
+ bind_virq.vcpu = cpu;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
|
|
+ &bind_virq) != 0)
|
|
+ BUG();
|
|
+ evtchn = bind_virq.port;
|
|
+
|
|
+ /* Record the new mapping. */
|
|
+ evtchn_to_irq[evtchn] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
+ bind_evtchn_to_cpu(evtchn, cpu);
|
|
+
|
|
+ /* Ready for use. */
|
|
+ unmask_evtchn(evtchn);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void restore_cpu_ipis(unsigned int cpu)
|
|
+{
|
|
+ struct evtchn_bind_ipi bind_ipi;
|
|
+ int ipi, irq, evtchn;
|
|
+
|
|
+ for (ipi = 0; ipi < NR_IPIS; ipi++) {
|
|
+ if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
|
|
+ continue;
|
|
+
|
|
+ BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
|
|
+
|
|
+ /* Get a new binding from Xen. */
|
|
+ bind_ipi.vcpu = cpu;
|
|
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
|
|
+ &bind_ipi) != 0)
|
|
+ BUG();
|
|
+ evtchn = bind_ipi.port;
|
|
+
|
|
+ /* Record the new mapping. */
|
|
+ evtchn_to_irq[evtchn] = irq;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
+ bind_evtchn_to_cpu(evtchn, cpu);
|
|
+
|
|
+ /* Ready for use. */
|
|
+ unmask_evtchn(evtchn);
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+void irq_resume(void)
|
|
+{
|
|
+ unsigned int cpu, irq, evtchn;
|
|
+
|
|
+ init_evtchn_cpu_bindings();
|
|
+
|
|
+ if (pirq_eoi_does_unmask) {
|
|
+ struct physdev_pirq_eoi_gmfn eoi_gmfn;
|
|
+
|
|
+ eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn))
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ /* New event-channel space is not 'live' yet. */
|
|
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
|
|
+ mask_evtchn(evtchn);
|
|
+
|
|
+ /* Check that no PIRQs are still bound. */
|
|
+ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++)
|
|
+ BUG_ON(irq_info[irq] != IRQ_UNBOUND);
|
|
+
|
|
+ /* No IRQ <-> event-channel mappings. */
|
|
+ for (irq = 0; irq < NR_IRQS; irq++)
|
|
+ irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1);
|
|
+ for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
|
|
+ evtchn_to_irq[evtchn] = -1;
|
|
+
|
|
+ for_each_possible_cpu(cpu) {
|
|
+ restore_cpu_virqs(cpu);
|
|
+ restore_cpu_ipis(cpu);
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+#if defined(CONFIG_X86_IO_APIC)
|
|
+#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
|
|
+#elif defined(CONFIG_X86)
|
|
+#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16)
|
|
+#else
|
|
+#define identity_mapped_irq(irq) (1)
|
|
+#endif
|
|
+
|
|
+void evtchn_register_pirq(int irq)
|
|
+{
|
|
+ BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS);
|
|
+ if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
|
|
+ return;
|
|
+ irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
|
|
+ irq_desc[irq].chip = &pirq_type;
|
|
+}
|
|
+
|
|
+int evtchn_map_pirq(int irq, int xen_pirq)
|
|
+{
|
|
+ if (irq < 0) {
|
|
+ static DEFINE_SPINLOCK(irq_alloc_lock);
|
|
+
|
|
+ irq = PIRQ_BASE + NR_PIRQS - 1;
|
|
+ spin_lock(&irq_alloc_lock);
|
|
+ do {
|
|
+ if (identity_mapped_irq(irq))
|
|
+ continue;
|
|
+ if (!index_from_irq(irq)) {
|
|
+ BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
|
|
+ irq_info[irq] = mk_irq_info(IRQT_PIRQ,
|
|
+ xen_pirq, 0);
|
|
+ break;
|
|
+ }
|
|
+ } while (--irq >= PIRQ_BASE);
|
|
+ spin_unlock(&irq_alloc_lock);
|
|
+ if (irq < PIRQ_BASE)
|
|
+ return -ENOSPC;
|
|
+ irq_desc[irq].chip = &pirq_type;
|
|
+ } else if (!xen_pirq) {
|
|
+ if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
|
|
+ return -EINVAL;
|
|
+ irq_desc[irq].chip = &no_irq_type;
|
|
+ irq_info[irq] = IRQ_UNBOUND;
|
|
+ return 0;
|
|
+ } else if (type_from_irq(irq) != IRQT_PIRQ
|
|
+ || index_from_irq(irq) != xen_pirq) {
|
|
+ printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - "
|
|
+ "cannot map to PIRQ#%u\n",
|
|
+ irq, type_from_irq(irq), index_from_irq(irq), xen_pirq);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return index_from_irq(irq) ? irq : -EINVAL;
|
|
+}
|
|
+
|
|
+int evtchn_get_xen_pirq(int irq)
|
|
+{
|
|
+ if (identity_mapped_irq(irq))
|
|
+ return irq;
|
|
+ BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
|
|
+ return index_from_irq(irq);
|
|
+}
|
|
+
|
|
+void __init xen_init_IRQ(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+ struct physdev_pirq_eoi_gmfn eoi_gmfn;
|
|
+
|
|
+ init_evtchn_cpu_bindings();
|
|
+
|
|
+ pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long)
|
|
+ * BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8)));
|
|
+ eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
|
|
+ pirq_eoi_does_unmask = 1;
|
|
+
|
|
+ /* No event channels are 'live' right now. */
|
|
+ for (i = 0; i < NR_EVENT_CHANNELS; i++)
|
|
+ mask_evtchn(i);
|
|
+
|
|
+ /* No IRQ -> event-channel mappings. */
|
|
+ for (i = 0; i < NR_IRQS; i++)
|
|
+ irq_info[i] = IRQ_UNBOUND;
|
|
+
|
|
+ /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
|
|
+ for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
|
|
+ irq_bindcount[i] = 0;
|
|
+
|
|
+ irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
|
|
+ irq_desc[i].action = NULL;
|
|
+ irq_desc[i].depth = 1;
|
|
+ irq_desc[i].chip = &dynirq_type;
|
|
+ }
|
|
+
|
|
+ /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
|
|
+ for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) {
|
|
+ irq_bindcount[i] = 1;
|
|
+
|
|
+ if (!identity_mapped_irq(i))
|
|
+ continue;
|
|
+
|
|
+#ifdef RTC_IRQ
|
|
+ /* If not domain 0, force our RTC driver to fail its probe. */
|
|
+ if (i - PIRQ_BASE == RTC_IRQ && !is_initial_xendomain())
|
|
+ continue;
|
|
+#endif
|
|
+
|
|
+ irq_desc[i].status = IRQ_DISABLED;
|
|
+ irq_desc[i].action = NULL;
|
|
+ irq_desc[i].depth = 1;
|
|
+ irq_desc[i].chip = &pirq_type;
|
|
+ }
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200
|
|
@@ -0,0 +1,74 @@
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/edd.h>
|
|
+#include <video/edid.h>
|
|
+#include <xen/interface/platform.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
+void __init copy_edd(void)
|
|
+{
|
|
+ int ret;
|
|
+ struct xen_platform_op op;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+
|
|
+ op.cmd = XENPF_firmware_info;
|
|
+
|
|
+ op.u.firmware_info.type = XEN_FW_DISK_INFO;
|
|
+ for (op.u.firmware_info.index = 0;
|
|
+ edd.edd_info_nr < EDDMAXNR;
|
|
+ op.u.firmware_info.index++) {
|
|
+ struct edd_info *info = edd.edd_info + edd.edd_info_nr;
|
|
+
|
|
+ info->params.length = sizeof(info->params);
|
|
+ set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
|
|
+ &info->params);
|
|
+ ret = HYPERVISOR_platform_op(&op);
|
|
+ if (ret)
|
|
+ break;
|
|
+
|
|
+#define C(x) info->x = op.u.firmware_info.u.disk_info.x
|
|
+ C(device);
|
|
+ C(version);
|
|
+ C(interface_support);
|
|
+ C(legacy_max_cylinder);
|
|
+ C(legacy_max_head);
|
|
+ C(legacy_sectors_per_track);
|
|
+#undef C
|
|
+
|
|
+ edd.edd_info_nr++;
|
|
+ }
|
|
+
|
|
+ op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
|
|
+ for (op.u.firmware_info.index = 0;
|
|
+ edd.mbr_signature_nr < EDD_MBR_SIG_MAX;
|
|
+ op.u.firmware_info.index++) {
|
|
+ ret = HYPERVISOR_platform_op(&op);
|
|
+ if (ret)
|
|
+ break;
|
|
+ edd.mbr_signature[edd.mbr_signature_nr++] =
|
|
+ op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+void __init copy_edid(void)
|
|
+{
|
|
+#if defined(CONFIG_FIRMWARE_EDID) && defined(CONFIG_X86)
|
|
+ struct xen_platform_op op;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+
|
|
+ op.cmd = XENPF_firmware_info;
|
|
+ op.u.firmware_info.index = 0;
|
|
+ op.u.firmware_info.type = XEN_FW_VBEDDC_INFO;
|
|
+ set_xen_guest_handle(op.u.firmware_info.u.vbeddc_info.edid,
|
|
+ edid_info.dummy);
|
|
+ if (HYPERVISOR_platform_op(&op) != 0)
|
|
+ memset(edid_info.dummy, 0x13, sizeof(edid_info.dummy));
|
|
+#endif
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/gnttab.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,773 @@
|
|
+/******************************************************************************
|
|
+ * gnttab.c
|
|
+ *
|
|
+ * Granting foreign access to our memory reservation.
|
|
+ *
|
|
+ * Copyright (c) 2005-2006, Christopher Clark
|
|
+ * Copyright (c) 2004-2005, K A Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/seqlock.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/synch_bitops.h>
|
|
+#include <asm/io.h>
|
|
+#include <xen/interface/memory.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <asm/gnttab_dma.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+/* External tools reserve first few grant table entries. */
|
|
+#define NR_RESERVED_ENTRIES 8
|
|
+#define GNTTAB_LIST_END 0xffffffff
|
|
+#define ENTRIES_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
|
|
+
|
|
+static grant_ref_t **gnttab_list;
|
|
+static unsigned int nr_grant_frames;
|
|
+static unsigned int boot_max_nr_grant_frames;
|
|
+static int gnttab_free_count;
|
|
+static grant_ref_t gnttab_free_head;
|
|
+static DEFINE_SPINLOCK(gnttab_list_lock);
|
|
+
|
|
+static struct grant_entry *shared;
|
|
+
|
|
+static struct gnttab_free_callback *gnttab_free_callback_list;
|
|
+
|
|
+static int gnttab_expand(unsigned int req_entries);
|
|
+
|
|
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
|
|
+#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
|
|
+
|
|
+#define nr_freelist_frames(grant_frames) \
|
|
+ (((grant_frames) * ENTRIES_PER_GRANT_FRAME + RPP - 1) / RPP)
|
|
+
|
|
+static int get_free_entries(int count)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int ref, rc;
|
|
+ grant_ref_t head;
|
|
+
|
|
+ spin_lock_irqsave(&gnttab_list_lock, flags);
|
|
+
|
|
+ if ((gnttab_free_count < count) &&
|
|
+ ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ ref = head = gnttab_free_head;
|
|
+ gnttab_free_count -= count;
|
|
+ while (count-- > 1)
|
|
+ head = gnttab_entry(head);
|
|
+ gnttab_free_head = gnttab_entry(head);
|
|
+ gnttab_entry(head) = GNTTAB_LIST_END;
|
|
+
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+
|
|
+ return ref;
|
|
+}
|
|
+
|
|
+#define get_free_entry() get_free_entries(1)
|
|
+
|
|
+static void do_free_callbacks(void)
|
|
+{
|
|
+ struct gnttab_free_callback *callback, *next;
|
|
+
|
|
+ callback = gnttab_free_callback_list;
|
|
+ gnttab_free_callback_list = NULL;
|
|
+
|
|
+ while (callback != NULL) {
|
|
+ next = callback->next;
|
|
+ if (gnttab_free_count >= callback->count) {
|
|
+ callback->next = NULL;
|
|
+ callback->queued = 0;
|
|
+ callback->fn(callback->arg);
|
|
+ } else {
|
|
+ callback->next = gnttab_free_callback_list;
|
|
+ gnttab_free_callback_list = callback;
|
|
+ }
|
|
+ callback = next;
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline void check_free_callbacks(void)
|
|
+{
|
|
+ if (unlikely(gnttab_free_callback_list))
|
|
+ do_free_callbacks();
|
|
+}
|
|
+
|
|
+static void put_free_entry(grant_ref_t ref)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&gnttab_list_lock, flags);
|
|
+ gnttab_entry(ref) = gnttab_free_head;
|
|
+ gnttab_free_head = ref;
|
|
+ gnttab_free_count++;
|
|
+ check_free_callbacks();
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Public grant-issuing interface functions
|
|
+ */
|
|
+
|
|
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
|
|
+ int flags)
|
|
+{
|
|
+ int ref;
|
|
+
|
|
+ if (unlikely((ref = get_free_entry()) < 0))
|
|
+ return -ENOSPC;
|
|
+
|
|
+ shared[ref].frame = frame;
|
|
+ shared[ref].domid = domid;
|
|
+ wmb();
|
|
+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
|
|
+ shared[ref].flags = GTF_permit_access | flags;
|
|
+
|
|
+ return ref;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
|
|
+
|
|
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
|
|
+ unsigned long frame, int flags)
|
|
+{
|
|
+ shared[ref].frame = frame;
|
|
+ shared[ref].domid = domid;
|
|
+ wmb();
|
|
+ BUG_ON(flags & (GTF_accept_transfer | GTF_reading | GTF_writing));
|
|
+ shared[ref].flags = GTF_permit_access | flags;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
|
|
+
|
|
+
|
|
+int gnttab_query_foreign_access(grant_ref_t ref)
|
|
+{
|
|
+ u16 nflags;
|
|
+
|
|
+ nflags = shared[ref].flags;
|
|
+
|
|
+ return (nflags & (GTF_reading|GTF_writing));
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
|
|
+
|
|
+int gnttab_end_foreign_access_ref(grant_ref_t ref)
|
|
+{
|
|
+ u16 flags, nflags;
|
|
+
|
|
+ nflags = shared[ref].flags;
|
|
+ do {
|
|
+ if ((flags = nflags) & (GTF_reading|GTF_writing)) {
|
|
+ printk(KERN_DEBUG "WARNING: g.e. still in use!\n");
|
|
+ return 0;
|
|
+ }
|
|
+ } while ((nflags = synch_cmpxchg_subword(&shared[ref].flags, flags, 0)) !=
|
|
+ flags);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
|
|
+
|
|
+void gnttab_end_foreign_access(grant_ref_t ref, unsigned long page)
|
|
+{
|
|
+ if (gnttab_end_foreign_access_ref(ref)) {
|
|
+ put_free_entry(ref);
|
|
+ if (page != 0)
|
|
+ free_page(page);
|
|
+ } else {
|
|
+ /* XXX This needs to be fixed so that the ref and page are
|
|
+ placed on a list to be freed up later. */
|
|
+ printk(KERN_DEBUG
|
|
+ "WARNING: leaking g.e. and page still in use!\n");
|
|
+ }
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
|
|
+
|
|
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn)
|
|
+{
|
|
+ int ref;
|
|
+
|
|
+ if (unlikely((ref = get_free_entry()) < 0))
|
|
+ return -ENOSPC;
|
|
+ gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
|
|
+
|
|
+ return ref;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer);
|
|
+
|
|
+void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
|
|
+ unsigned long pfn)
|
|
+{
|
|
+ shared[ref].frame = pfn;
|
|
+ shared[ref].domid = domid;
|
|
+ wmb();
|
|
+ shared[ref].flags = GTF_accept_transfer;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref);
|
|
+
|
|
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref)
|
|
+{
|
|
+ unsigned long frame;
|
|
+ u16 flags;
|
|
+
|
|
+ /*
|
|
+ * If a transfer is not even yet started, try to reclaim the grant
|
|
+ * reference and return failure (== 0).
|
|
+ */
|
|
+ while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
|
|
+ if (synch_cmpxchg_subword(&shared[ref].flags, flags, 0) == flags)
|
|
+ return 0;
|
|
+ cpu_relax();
|
|
+ }
|
|
+
|
|
+ /* If a transfer is in progress then wait until it is completed. */
|
|
+ while (!(flags & GTF_transfer_completed)) {
|
|
+ flags = shared[ref].flags;
|
|
+ cpu_relax();
|
|
+ }
|
|
+
|
|
+ /* Read the frame number /after/ reading completion status. */
|
|
+ rmb();
|
|
+ frame = shared[ref].frame;
|
|
+ BUG_ON(frame == 0);
|
|
+
|
|
+ return frame;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref);
|
|
+
|
|
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref)
|
|
+{
|
|
+ unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
|
|
+ put_free_entry(ref);
|
|
+ return frame;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer);
|
|
+
|
|
+void gnttab_free_grant_reference(grant_ref_t ref)
|
|
+{
|
|
+ put_free_entry(ref);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
|
|
+
|
|
+void gnttab_free_grant_references(grant_ref_t head)
|
|
+{
|
|
+ grant_ref_t ref;
|
|
+ unsigned long flags;
|
|
+ int count = 1;
|
|
+ if (head == GNTTAB_LIST_END)
|
|
+ return;
|
|
+ spin_lock_irqsave(&gnttab_list_lock, flags);
|
|
+ ref = head;
|
|
+ while (gnttab_entry(ref) != GNTTAB_LIST_END) {
|
|
+ ref = gnttab_entry(ref);
|
|
+ count++;
|
|
+ }
|
|
+ gnttab_entry(ref) = gnttab_free_head;
|
|
+ gnttab_free_head = head;
|
|
+ gnttab_free_count += count;
|
|
+ check_free_callbacks();
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
|
|
+
|
|
+int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
|
|
+{
|
|
+ int h = get_free_entries(count);
|
|
+
|
|
+ if (h < 0)
|
|
+ return -ENOSPC;
|
|
+
|
|
+ *head = h;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
|
|
+
|
|
+int gnttab_empty_grant_references(const grant_ref_t *private_head)
|
|
+{
|
|
+ return (*private_head == GNTTAB_LIST_END);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
|
|
+
|
|
+int gnttab_claim_grant_reference(grant_ref_t *private_head)
|
|
+{
|
|
+ grant_ref_t g = *private_head;
|
|
+ if (unlikely(g == GNTTAB_LIST_END))
|
|
+ return -ENOSPC;
|
|
+ *private_head = gnttab_entry(g);
|
|
+ return g;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
|
|
+
|
|
+void gnttab_release_grant_reference(grant_ref_t *private_head,
|
|
+ grant_ref_t release)
|
|
+{
|
|
+ gnttab_entry(release) = *private_head;
|
|
+ *private_head = release;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
|
|
+
|
|
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
|
|
+ void (*fn)(void *), void *arg, u16 count)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&gnttab_list_lock, flags);
|
|
+ if (callback->queued)
|
|
+ goto out;
|
|
+ callback->fn = fn;
|
|
+ callback->arg = arg;
|
|
+ callback->count = count;
|
|
+ callback->queued = 1;
|
|
+ callback->next = gnttab_free_callback_list;
|
|
+ gnttab_free_callback_list = callback;
|
|
+ check_free_callbacks();
|
|
+out:
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
|
|
+
|
|
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
|
|
+{
|
|
+ struct gnttab_free_callback **pcb;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&gnttab_list_lock, flags);
|
|
+ for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
|
|
+ if (*pcb == callback) {
|
|
+ *pcb = callback->next;
|
|
+ callback->queued = 0;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
|
|
+
|
|
+static int grow_gnttab_list(unsigned int more_frames)
|
|
+{
|
|
+ unsigned int new_nr_grant_frames, extra_entries, i;
|
|
+ unsigned int nr_glist_frames, new_nr_glist_frames;
|
|
+
|
|
+ new_nr_grant_frames = nr_grant_frames + more_frames;
|
|
+ extra_entries = more_frames * ENTRIES_PER_GRANT_FRAME;
|
|
+
|
|
+ nr_glist_frames = nr_freelist_frames(nr_grant_frames);
|
|
+ new_nr_glist_frames = nr_freelist_frames(new_nr_grant_frames);
|
|
+ for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
|
|
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
|
|
+ if (!gnttab_list[i])
|
|
+ goto grow_nomem;
|
|
+ }
|
|
+
|
|
+ for (i = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
|
|
+ i < ENTRIES_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
|
|
+ gnttab_entry(i) = i + 1;
|
|
+
|
|
+ gnttab_entry(i) = gnttab_free_head;
|
|
+ gnttab_free_head = ENTRIES_PER_GRANT_FRAME * nr_grant_frames;
|
|
+ gnttab_free_count += extra_entries;
|
|
+
|
|
+ nr_grant_frames = new_nr_grant_frames;
|
|
+
|
|
+ check_free_callbacks();
|
|
+
|
|
+ return 0;
|
|
+
|
|
+grow_nomem:
|
|
+ for ( ; i >= nr_glist_frames; i--)
|
|
+ free_page((unsigned long) gnttab_list[i]);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static unsigned int __max_nr_grant_frames(void)
|
|
+{
|
|
+ struct gnttab_query_size query;
|
|
+ int rc;
|
|
+
|
|
+ query.dom = DOMID_SELF;
|
|
+
|
|
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
|
|
+ if ((rc < 0) || (query.status != GNTST_okay))
|
|
+ return 4; /* Legacy max supported number of frames */
|
|
+
|
|
+ return query.max_nr_frames;
|
|
+}
|
|
+
|
|
+static inline unsigned int max_nr_grant_frames(void)
|
|
+{
|
|
+ unsigned int xen_max = __max_nr_grant_frames();
|
|
+
|
|
+ if (xen_max > boot_max_nr_grant_frames)
|
|
+ return boot_max_nr_grant_frames;
|
|
+ return xen_max;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+
|
|
+static DEFINE_SEQLOCK(gnttab_dma_lock);
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
|
|
+ unsigned long addr, void *data)
|
|
+{
|
|
+ unsigned long **frames = (unsigned long **)data;
|
|
+
|
|
+ set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
|
|
+ (*frames)++;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
|
|
+ unsigned long addr, void *data)
|
|
+{
|
|
+
|
|
+ set_pte_at(&init_mm, addr, pte, __pte(0));
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void *arch_gnttab_alloc_shared(unsigned long *frames)
|
|
+{
|
|
+ struct vm_struct *area;
|
|
+ area = alloc_vm_area(PAGE_SIZE * max_nr_grant_frames());
|
|
+ BUG_ON(area == NULL);
|
|
+ return area->addr;
|
|
+}
|
|
+#endif /* CONFIG_X86 */
|
|
+
|
|
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
|
|
+{
|
|
+ struct gnttab_setup_table setup;
|
|
+ unsigned long *frames;
|
|
+ unsigned int nr_gframes = end_idx + 1;
|
|
+ int rc;
|
|
+
|
|
+ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC);
|
|
+ if (!frames)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ setup.dom = DOMID_SELF;
|
|
+ setup.nr_frames = nr_gframes;
|
|
+ set_xen_guest_handle(setup.frame_list, frames);
|
|
+
|
|
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
|
|
+ if (rc == -ENOSYS) {
|
|
+ kfree(frames);
|
|
+ return -ENOSYS;
|
|
+ }
|
|
+
|
|
+ BUG_ON(rc || setup.status != GNTST_okay);
|
|
+
|
|
+ if (shared == NULL)
|
|
+ shared = arch_gnttab_alloc_shared(frames);
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+ rc = apply_to_page_range(&init_mm, (unsigned long)shared,
|
|
+ PAGE_SIZE * nr_gframes,
|
|
+ map_pte_fn, &frames);
|
|
+ BUG_ON(rc);
|
|
+ frames -= nr_gframes; /* adjust after map_pte_fn() */
|
|
+#endif /* CONFIG_X86 */
|
|
+
|
|
+ kfree(frames);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void gnttab_page_free(struct page *page, unsigned int order)
|
|
+{
|
|
+ BUG_ON(order);
|
|
+ ClearPageForeign(page);
|
|
+ gnttab_reset_grant_page(page);
|
|
+ put_page(page);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Must not be called with IRQs off. This should only be used on the
|
|
+ * slow path.
|
|
+ *
|
|
+ * Copy a foreign granted page to local memory.
|
|
+ */
|
|
+int gnttab_copy_grant_page(grant_ref_t ref, struct page **pagep)
|
|
+{
|
|
+ struct gnttab_unmap_and_replace unmap;
|
|
+ mmu_update_t mmu;
|
|
+ struct page *page;
|
|
+ struct page *new_page;
|
|
+ void *new_addr;
|
|
+ void *addr;
|
|
+ paddr_t pfn;
|
|
+ maddr_t mfn;
|
|
+ maddr_t new_mfn;
|
|
+ int err;
|
|
+
|
|
+ page = *pagep;
|
|
+ if (!get_page_unless_zero(page))
|
|
+ return -ENOENT;
|
|
+
|
|
+ err = -ENOMEM;
|
|
+ new_page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (!new_page)
|
|
+ goto out;
|
|
+
|
|
+ new_addr = page_address(new_page);
|
|
+ addr = page_address(page);
|
|
+ memcpy(new_addr, addr, PAGE_SIZE);
|
|
+
|
|
+ pfn = page_to_pfn(page);
|
|
+ mfn = pfn_to_mfn(pfn);
|
|
+ new_mfn = virt_to_mfn(new_addr);
|
|
+
|
|
+ write_seqlock(&gnttab_dma_lock);
|
|
+
|
|
+ /* Make seq visible before checking page_mapped. */
|
|
+ smp_mb();
|
|
+
|
|
+ /* Has the page been DMA-mapped? */
|
|
+ if (unlikely(page_mapped(page))) {
|
|
+ write_sequnlock(&gnttab_dma_lock);
|
|
+ put_page(new_page);
|
|
+ err = -EBUSY;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ set_phys_to_machine(pfn, new_mfn);
|
|
+
|
|
+ gnttab_set_replace_op(&unmap, (unsigned long)addr,
|
|
+ (unsigned long)new_addr, ref);
|
|
+
|
|
+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
|
|
+ &unmap, 1);
|
|
+ BUG_ON(err);
|
|
+ BUG_ON(unmap.status != GNTST_okay);
|
|
+
|
|
+ write_sequnlock(&gnttab_dma_lock);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ set_phys_to_machine(page_to_pfn(new_page), INVALID_P2M_ENTRY);
|
|
+
|
|
+ mmu.ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
|
|
+ mmu.val = pfn;
|
|
+ err = HYPERVISOR_mmu_update(&mmu, 1, NULL, DOMID_SELF);
|
|
+ BUG_ON(err);
|
|
+ }
|
|
+
|
|
+ new_page->mapping = page->mapping;
|
|
+ new_page->index = page->index;
|
|
+ set_bit(PG_foreign, &new_page->flags);
|
|
+ *pagep = new_page;
|
|
+
|
|
+ SetPageForeign(page, gnttab_page_free);
|
|
+ page->mapping = NULL;
|
|
+
|
|
+out:
|
|
+ put_page(page);
|
|
+ return err;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_copy_grant_page);
|
|
+
|
|
+void gnttab_reset_grant_page(struct page *page)
|
|
+{
|
|
+ init_page_count(page);
|
|
+ reset_page_mapcount(page);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(gnttab_reset_grant_page);
|
|
+
|
|
+/*
|
|
+ * Keep track of foreign pages marked as PageForeign so that we don't
|
|
+ * return them to the remote domain prematurely.
|
|
+ *
|
|
+ * PageForeign pages are pinned down by increasing their mapcount.
|
|
+ *
|
|
+ * All other pages are simply returned as is.
|
|
+ */
|
|
+void __gnttab_dma_map_page(struct page *page)
|
|
+{
|
|
+ unsigned int seq;
|
|
+
|
|
+ if (!is_running_on_xen() || !PageForeign(page))
|
|
+ return;
|
|
+
|
|
+ do {
|
|
+ seq = read_seqbegin(&gnttab_dma_lock);
|
|
+
|
|
+ if (gnttab_dma_local_pfn(page))
|
|
+ break;
|
|
+
|
|
+ atomic_set(&page->_mapcount, 0);
|
|
+
|
|
+ /* Make _mapcount visible before read_seqretry. */
|
|
+ smp_mb();
|
|
+ } while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
|
|
+}
|
|
+
|
|
+int gnttab_resume(void)
|
|
+{
|
|
+ if (max_nr_grant_frames() < nr_grant_frames)
|
|
+ return -ENOSYS;
|
|
+ return gnttab_map(0, nr_grant_frames - 1);
|
|
+}
|
|
+
|
|
+int gnttab_suspend(void)
|
|
+{
|
|
+#ifdef CONFIG_X86
|
|
+ apply_to_page_range(&init_mm, (unsigned long)shared,
|
|
+ PAGE_SIZE * nr_grant_frames,
|
|
+ unmap_pte_fn, NULL);
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#else /* !CONFIG_XEN */
|
|
+
|
|
+#include <platform-pci.h>
|
|
+
|
|
+static unsigned long resume_frames;
|
|
+
|
|
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
|
|
+{
|
|
+ struct xen_add_to_physmap xatp;
|
|
+ unsigned int i = end_idx;
|
|
+
|
|
+ /* Loop backwards, so that the first hypercall has the largest index,
|
|
+ * ensuring that the table will grow only once.
|
|
+ */
|
|
+ do {
|
|
+ xatp.domid = DOMID_SELF;
|
|
+ xatp.idx = i;
|
|
+ xatp.space = XENMAPSPACE_grant_table;
|
|
+ xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
|
|
+ if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
|
|
+ BUG();
|
|
+ } while (i-- > start_idx);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int gnttab_resume(void)
|
|
+{
|
|
+ unsigned int max_nr_gframes, nr_gframes;
|
|
+
|
|
+ nr_gframes = nr_grant_frames;
|
|
+ max_nr_gframes = max_nr_grant_frames();
|
|
+ if (max_nr_gframes < nr_gframes)
|
|
+ return -ENOSYS;
|
|
+
|
|
+ if (!resume_frames) {
|
|
+ resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes);
|
|
+ shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes);
|
|
+ if (shared == NULL) {
|
|
+ printk("error to ioremap gnttab share frames\n");
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ gnttab_map(0, nr_gframes - 1);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+static int gnttab_expand(unsigned int req_entries)
|
|
+{
|
|
+ int rc;
|
|
+ unsigned int cur, extra;
|
|
+
|
|
+ cur = nr_grant_frames;
|
|
+ extra = ((req_entries + (ENTRIES_PER_GRANT_FRAME-1)) /
|
|
+ ENTRIES_PER_GRANT_FRAME);
|
|
+ if (cur + extra > max_nr_grant_frames())
|
|
+ return -ENOSPC;
|
|
+
|
|
+ if ((rc = gnttab_map(cur, cur + extra - 1)) == 0)
|
|
+ rc = grow_gnttab_list(extra);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+int __devinit gnttab_init(void)
|
|
+{
|
|
+ int i;
|
|
+ unsigned int max_nr_glist_frames, nr_glist_frames;
|
|
+ unsigned int nr_init_grefs;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ nr_grant_frames = 1;
|
|
+ boot_max_nr_grant_frames = __max_nr_grant_frames();
|
|
+
|
|
+ /* Determine the maximum number of frames required for the
|
|
+ * grant reference free list on the current hypervisor.
|
|
+ */
|
|
+ max_nr_glist_frames = nr_freelist_frames(boot_max_nr_grant_frames);
|
|
+
|
|
+ gnttab_list = kmalloc(max_nr_glist_frames * sizeof(grant_ref_t *),
|
|
+ GFP_KERNEL);
|
|
+ if (gnttab_list == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ nr_glist_frames = nr_freelist_frames(nr_grant_frames);
|
|
+ for (i = 0; i < nr_glist_frames; i++) {
|
|
+ gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
|
|
+ if (gnttab_list[i] == NULL)
|
|
+ goto ini_nomem;
|
|
+ }
|
|
+
|
|
+ if (gnttab_resume() < 0)
|
|
+ return -ENODEV;
|
|
+
|
|
+ nr_init_grefs = nr_grant_frames * ENTRIES_PER_GRANT_FRAME;
|
|
+
|
|
+ for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
|
|
+ gnttab_entry(i) = i + 1;
|
|
+
|
|
+ gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
|
|
+ gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
|
|
+ gnttab_free_head = NR_RESERVED_ENTRIES;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ ini_nomem:
|
|
+ for (i--; i >= 0; i--)
|
|
+ free_page((unsigned long)gnttab_list[i]);
|
|
+ kfree(gnttab_list);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+core_initcall(gnttab_init);
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/hypervisor_sysfs.c 2007-07-10 09:42:30.000000000 +0200
|
|
@@ -0,0 +1,57 @@
|
|
+/*
|
|
+ * copyright (c) 2006 IBM Corporation
|
|
+ * Authored by: Mike D. Day <ncmike@us.ibm.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License version 2 as
|
|
+ * published by the Free Software Foundation.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kobject.h>
|
|
+#include <xen/hypervisor_sysfs.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
|
|
+ struct attribute *attr,
|
|
+ char *buffer)
|
|
+{
|
|
+ struct hyp_sysfs_attr *hyp_attr;
|
|
+ hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
|
|
+ if (hyp_attr->show)
|
|
+ return hyp_attr->show(hyp_attr, buffer);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
|
|
+ struct attribute *attr,
|
|
+ const char *buffer,
|
|
+ size_t len)
|
|
+{
|
|
+ struct hyp_sysfs_attr *hyp_attr;
|
|
+ hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
|
|
+ if (hyp_attr->store)
|
|
+ return hyp_attr->store(hyp_attr, buffer, len);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct sysfs_ops hyp_sysfs_ops = {
|
|
+ .show = hyp_sysfs_show,
|
|
+ .store = hyp_sysfs_store,
|
|
+};
|
|
+
|
|
+static struct kobj_type hyp_sysfs_kobj_type = {
|
|
+ .sysfs_ops = &hyp_sysfs_ops,
|
|
+};
|
|
+
|
|
+static int __init hypervisor_subsys_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+device_initcall(hypervisor_subsys_init);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/machine_kexec.c 2009-07-13 14:25:35.000000000 +0200
|
|
@@ -0,0 +1,230 @@
|
|
+/*
|
|
+ * drivers/xen/core/machine_kexec.c
|
|
+ * handle transition of Linux booting another kernel
|
|
+ */
|
|
+
|
|
+#include <linux/kexec.h>
|
|
+#include <xen/interface/kexec.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/bootmem.h>
|
|
+
|
|
+extern void machine_kexec_setup_load_arg(xen_kexec_image_t *xki,
|
|
+ struct kimage *image);
|
|
+extern int machine_kexec_setup_resources(struct resource *hypervisor,
|
|
+ struct resource *phys_cpus,
|
|
+ int nr_phys_cpus);
|
|
+extern void machine_kexec_register_resources(struct resource *res);
|
|
+
|
|
+static int __initdata xen_max_nr_phys_cpus;
|
|
+static struct resource xen_hypervisor_res;
|
|
+static struct resource *xen_phys_cpus;
|
|
+
|
|
+size_t vmcoreinfo_size_xen;
|
|
+unsigned long paddr_vmcoreinfo_xen;
|
|
+
|
|
+void __init xen_machine_kexec_setup_resources(void)
|
|
+{
|
|
+ xen_kexec_range_t range;
|
|
+ struct resource *res;
|
|
+ int k = 0;
|
|
+ int rc;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+
|
|
+ /* determine maximum number of physical cpus */
|
|
+
|
|
+ while (1) {
|
|
+ memset(&range, 0, sizeof(range));
|
|
+ range.range = KEXEC_RANGE_MA_CPU;
|
|
+ range.nr = k;
|
|
+
|
|
+ if(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
|
|
+ break;
|
|
+
|
|
+ k++;
|
|
+ }
|
|
+
|
|
+ if (k == 0)
|
|
+ return;
|
|
+
|
|
+ xen_max_nr_phys_cpus = k;
|
|
+
|
|
+ /* allocate xen_phys_cpus */
|
|
+
|
|
+ xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
|
|
+ BUG_ON(xen_phys_cpus == NULL);
|
|
+
|
|
+ /* fill in xen_phys_cpus with per-cpu crash note information */
|
|
+
|
|
+ for (k = 0; k < xen_max_nr_phys_cpus; k++) {
|
|
+ memset(&range, 0, sizeof(range));
|
|
+ range.range = KEXEC_RANGE_MA_CPU;
|
|
+ range.nr = k;
|
|
+
|
|
+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
|
|
+ goto err;
|
|
+
|
|
+ res = xen_phys_cpus + k;
|
|
+
|
|
+ memset(res, 0, sizeof(*res));
|
|
+ res->name = "Crash note";
|
|
+ res->start = range.start;
|
|
+ res->end = range.start + range.size - 1;
|
|
+ res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
|
+ }
|
|
+
|
|
+ /* fill in xen_hypervisor_res with hypervisor machine address range */
|
|
+
|
|
+ memset(&range, 0, sizeof(range));
|
|
+ range.range = KEXEC_RANGE_MA_XEN;
|
|
+
|
|
+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
|
|
+ goto err;
|
|
+
|
|
+ xen_hypervisor_res.name = "Hypervisor code and data";
|
|
+ xen_hypervisor_res.start = range.start;
|
|
+ xen_hypervisor_res.end = range.start + range.size - 1;
|
|
+ xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
|
+
|
|
+ /* fill in crashk_res if range is reserved by hypervisor */
|
|
+
|
|
+ memset(&range, 0, sizeof(range));
|
|
+ range.range = KEXEC_RANGE_MA_CRASH;
|
|
+
|
|
+ if (HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range))
|
|
+ goto err;
|
|
+
|
|
+ if (range.size) {
|
|
+ crashk_res.start = range.start;
|
|
+ crashk_res.end = range.start + range.size - 1;
|
|
+ }
|
|
+
|
|
+ /* get physical address of vmcoreinfo */
|
|
+ memset(&range, 0, sizeof(range));
|
|
+ range.range = KEXEC_RANGE_MA_VMCOREINFO;
|
|
+
|
|
+ rc = HYPERVISOR_kexec_op(KEXEC_CMD_kexec_get_range, &range);
|
|
+
|
|
+ if (rc == 0) {
|
|
+ /* Hypercall succeeded */
|
|
+ vmcoreinfo_size_xen = range.size;
|
|
+ paddr_vmcoreinfo_xen = range.start;
|
|
+
|
|
+ } else {
|
|
+ /* Hypercall failed.
|
|
+ * Indicate not to create sysfs file by resetting globals
|
|
+ */
|
|
+ vmcoreinfo_size_xen = 0;
|
|
+ paddr_vmcoreinfo_xen = 0;
|
|
+
|
|
+ /* The KEXEC_CMD_kexec_get_range hypercall did not implement
|
|
+ * KEXEC_RANGE_MA_VMCOREINFO until Xen 3.3.
|
|
+ * Do not bail out if it fails for this reason.
|
|
+ */
|
|
+ if (rc != -EINVAL)
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (machine_kexec_setup_resources(&xen_hypervisor_res, xen_phys_cpus,
|
|
+ xen_max_nr_phys_cpus))
|
|
+ goto err;
|
|
+
|
|
+ return;
|
|
+
|
|
+ err:
|
|
+ /*
|
|
+ * It isn't possible to free xen_phys_cpus this early in the
|
|
+ * boot. Failure at this stage is unexpected and the amount of
|
|
+ * memory is small therefore we tolerate the potential leak.
|
|
+ */
|
|
+ xen_max_nr_phys_cpus = 0;
|
|
+ return;
|
|
+}
|
|
+
|
|
+void __init xen_machine_kexec_register_resources(struct resource *res)
|
|
+{
|
|
+ int k;
|
|
+ struct resource *r;
|
|
+
|
|
+ request_resource(res, &xen_hypervisor_res);
|
|
+ for (k = 0; k < xen_max_nr_phys_cpus; k++) {
|
|
+ r = xen_phys_cpus + k;
|
|
+ if (r->parent == NULL) /* out of xen_hypervisor_res range */
|
|
+ request_resource(res, r);
|
|
+ }
|
|
+ machine_kexec_register_resources(res);
|
|
+}
|
|
+
|
|
+static void setup_load_arg(xen_kexec_image_t *xki, struct kimage *image)
|
|
+{
|
|
+ machine_kexec_setup_load_arg(xki, image);
|
|
+
|
|
+ xki->indirection_page = image->head;
|
|
+ xki->start_address = image->start;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Load the image into xen so xen can kdump itself
|
|
+ * This might have been done in prepare, but prepare
|
|
+ * is currently called too early. It might make sense
|
|
+ * to move prepare, but for now, just add an extra hook.
|
|
+ */
|
|
+int xen_machine_kexec_load(struct kimage *image)
|
|
+{
|
|
+ xen_kexec_load_t xkl;
|
|
+
|
|
+ memset(&xkl, 0, sizeof(xkl));
|
|
+ xkl.type = image->type;
|
|
+ setup_load_arg(&xkl.image, image);
|
|
+ return HYPERVISOR_kexec_op(KEXEC_CMD_kexec_load, &xkl);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Unload the image that was stored by machine_kexec_load()
|
|
+ * This might have been done in machine_kexec_cleanup() but it
|
|
+ * is called too late, and its possible xen could try and kdump
|
|
+ * using resources that have been freed.
|
|
+ */
|
|
+void xen_machine_kexec_unload(struct kimage *image)
|
|
+{
|
|
+ xen_kexec_load_t xkl;
|
|
+
|
|
+ memset(&xkl, 0, sizeof(xkl));
|
|
+ xkl.type = image->type;
|
|
+ WARN_ON(HYPERVISOR_kexec_op(KEXEC_CMD_kexec_unload, &xkl));
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Do not allocate memory (or fail in any way) in machine_kexec().
|
|
+ * We are past the point of no return, committed to rebooting now.
|
|
+ *
|
|
+ * This has the hypervisor move to the prefered reboot CPU,
|
|
+ * stop all CPUs and kexec. That is it combines machine_shutdown()
|
|
+ * and machine_kexec() in Linux kexec terms.
|
|
+ */
|
|
+NORET_TYPE void machine_kexec(struct kimage *image)
|
|
+{
|
|
+ xen_kexec_exec_t xke;
|
|
+
|
|
+ memset(&xke, 0, sizeof(xke));
|
|
+ xke.type = image->type;
|
|
+ VOID(HYPERVISOR_kexec_op(KEXEC_CMD_kexec, &xke));
|
|
+ panic("KEXEC_CMD_kexec hypercall should not return\n");
|
|
+}
|
|
+
|
|
+void machine_shutdown(void)
|
|
+{
|
|
+ /* do nothing */
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Local variables:
|
|
+ * c-file-style: "linux"
|
|
+ * indent-tabs-mode: t
|
|
+ * c-indent-level: 8
|
|
+ * c-basic-offset: 8
|
|
+ * tab-width: 8
|
|
+ * End:
|
|
+ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/machine_reboot.c 2011-01-24 12:06:05.000000000 +0100
|
|
@@ -0,0 +1,242 @@
|
|
+#include <linux/version.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/unistd.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/reboot.h>
|
|
+#include <linux/sysrq.h>
|
|
+#include <linux/stringify.h>
|
|
+#include <linux/stop_machine.h>
|
|
+#include <asm/irq.h>
|
|
+#include <asm/mmu_context.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/xencons.h>
|
|
+#include <xen/cpu_hotplug.h>
|
|
+#include <xen/interface/vcpu.h>
|
|
+
|
|
+#if defined(__i386__) || defined(__x86_64__)
|
|
+
|
|
+/*
|
|
+ * Power off function, if any
|
|
+ */
|
|
+void (*pm_power_off)(void);
|
|
+EXPORT_SYMBOL(pm_power_off);
|
|
+
|
|
+void machine_emergency_restart(void)
|
|
+{
|
|
+ /* We really want to get pending console data out before we die. */
|
|
+ xencons_force_flush();
|
|
+ HYPERVISOR_shutdown(SHUTDOWN_reboot);
|
|
+}
|
|
+
|
|
+void machine_restart(char * __unused)
|
|
+{
|
|
+ machine_emergency_restart();
|
|
+}
|
|
+
|
|
+void machine_halt(void)
|
|
+{
|
|
+ machine_power_off();
|
|
+}
|
|
+
|
|
+void machine_power_off(void)
|
|
+{
|
|
+ /* We really want to get pending console data out before we die. */
|
|
+ xencons_force_flush();
|
|
+ if (pm_power_off)
|
|
+ pm_power_off();
|
|
+ HYPERVISOR_shutdown(SHUTDOWN_poweroff);
|
|
+}
|
|
+
|
|
+static void pre_suspend(void)
|
|
+{
|
|
+ HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
+ WARN_ON(HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
|
|
+ __pte_ma(0), 0));
|
|
+
|
|
+ xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
|
|
+ xen_start_info->console.domU.mfn =
|
|
+ mfn_to_pfn(xen_start_info->console.domU.mfn);
|
|
+}
|
|
+
|
|
+static void post_suspend(int suspend_cancelled)
|
|
+{
|
|
+ int i, j, k, fpp;
|
|
+ unsigned long shinfo_mfn;
|
|
+ extern unsigned long max_pfn;
|
|
+ extern unsigned long *pfn_to_mfn_frame_list_list;
|
|
+ extern unsigned long *pfn_to_mfn_frame_list[];
|
|
+
|
|
+ if (suspend_cancelled) {
|
|
+ xen_start_info->store_mfn =
|
|
+ pfn_to_mfn(xen_start_info->store_mfn);
|
|
+ xen_start_info->console.domU.mfn =
|
|
+ pfn_to_mfn(xen_start_info->console.domU.mfn);
|
|
+ } else {
|
|
+#ifdef CONFIG_SMP
|
|
+ cpu_initialized_map = cpu_online_map;
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
|
|
+ if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
|
|
+ pfn_pte_ma(shinfo_mfn, PAGE_KERNEL),
|
|
+ 0))
|
|
+ BUG();
|
|
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
+
|
|
+ memset(empty_zero_page, 0, PAGE_SIZE);
|
|
+
|
|
+ fpp = PAGE_SIZE/sizeof(unsigned long);
|
|
+ for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
|
|
+ if ((j % fpp) == 0) {
|
|
+ k++;
|
|
+ pfn_to_mfn_frame_list_list[k] =
|
|
+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
|
|
+ j = 0;
|
|
+ }
|
|
+ pfn_to_mfn_frame_list[k][j] =
|
|
+ virt_to_mfn(&phys_to_machine_mapping[i]);
|
|
+ }
|
|
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
|
|
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
|
+ virt_to_mfn(pfn_to_mfn_frame_list_list);
|
|
+}
|
|
+
|
|
+#else /* !(defined(__i386__) || defined(__x86_64__)) */
|
|
+
|
|
+#ifndef HAVE_XEN_PRE_SUSPEND
|
|
+#define xen_pre_suspend() ((void)0)
|
|
+#endif
|
|
+
|
|
+#ifndef HAVE_XEN_POST_SUSPEND
|
|
+#define xen_post_suspend(x) ((void)0)
|
|
+#endif
|
|
+
|
|
+#define switch_idle_mm() ((void)0)
|
|
+#define mm_pin_all() ((void)0)
|
|
+#define pre_suspend() xen_pre_suspend()
|
|
+#define post_suspend(x) xen_post_suspend(x)
|
|
+
|
|
+#endif
|
|
+
|
|
+struct suspend {
|
|
+ int fast_suspend;
|
|
+ void (*resume_notifier)(int);
|
|
+};
|
|
+
|
|
+static int take_machine_down(void *_suspend)
|
|
+{
|
|
+ struct suspend *suspend = _suspend;
|
|
+ int suspend_cancelled, err;
|
|
+ extern void time_resume(void);
|
|
+
|
|
+ if (suspend->fast_suspend) {
|
|
+ BUG_ON(!irqs_disabled());
|
|
+ } else {
|
|
+ BUG_ON(irqs_disabled());
|
|
+
|
|
+ for (;;) {
|
|
+ err = smp_suspend();
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ xenbus_suspend();
|
|
+ preempt_disable();
|
|
+
|
|
+ if (num_online_cpus() == 1)
|
|
+ break;
|
|
+
|
|
+ preempt_enable();
|
|
+ xenbus_suspend_cancel();
|
|
+ }
|
|
+
|
|
+ local_irq_disable();
|
|
+ }
|
|
+
|
|
+ mm_pin_all();
|
|
+ gnttab_suspend();
|
|
+ pre_suspend();
|
|
+
|
|
+ /*
|
|
+ * This hypercall returns 1 if suspend was cancelled or the domain was
|
|
+ * merely checkpointed, and 0 if it is resuming in a new domain.
|
|
+ */
|
|
+ suspend_cancelled = HYPERVISOR_suspend(virt_to_mfn(xen_start_info));
|
|
+
|
|
+ suspend->resume_notifier(suspend_cancelled);
|
|
+ post_suspend(suspend_cancelled);
|
|
+ gnttab_resume();
|
|
+ if (!suspend_cancelled) {
|
|
+ irq_resume();
|
|
+#ifdef __x86_64__
|
|
+ /*
|
|
+ * Older versions of Xen do not save/restore the user %cr3.
|
|
+ * We do it here just in case, but there's no need if we are
|
|
+ * in fast-suspend mode as that implies a new enough Xen.
|
|
+ */
|
|
+ if (!suspend->fast_suspend)
|
|
+ xen_new_user_pt(__pa(__user_pgd(
|
|
+ current->active_mm->pgd)));
|
|
+#endif
|
|
+ }
|
|
+ time_resume();
|
|
+
|
|
+ if (!suspend->fast_suspend)
|
|
+ local_irq_enable();
|
|
+
|
|
+ return suspend_cancelled;
|
|
+}
|
|
+
|
|
+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int))
|
|
+{
|
|
+ int err, suspend_cancelled;
|
|
+ struct suspend suspend;
|
|
+
|
|
+ BUG_ON(smp_processor_id() != 0);
|
|
+ BUG_ON(in_interrupt());
|
|
+
|
|
+#if defined(__i386__) || defined(__x86_64__)
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ printk(KERN_WARNING "Cannot suspend in "
|
|
+ "auto_translated_physmap mode.\n");
|
|
+ return -EOPNOTSUPP;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /* If we are definitely UP then 'slow mode' is actually faster. */
|
|
+ if (num_possible_cpus() == 1)
|
|
+ fast_suspend = 0;
|
|
+
|
|
+ suspend.fast_suspend = fast_suspend;
|
|
+ suspend.resume_notifier = resume_notifier;
|
|
+
|
|
+ if (fast_suspend) {
|
|
+ xenbus_suspend();
|
|
+ err = stop_machine_run(take_machine_down, &suspend, 0);
|
|
+ if (err < 0)
|
|
+ xenbus_suspend_cancel();
|
|
+ } else {
|
|
+ err = take_machine_down(&suspend);
|
|
+ }
|
|
+
|
|
+ if (err < 0)
|
|
+ return err;
|
|
+
|
|
+ suspend_cancelled = err;
|
|
+ if (!suspend_cancelled) {
|
|
+ xencons_resume();
|
|
+ xenbus_resume();
|
|
+ } else {
|
|
+ xenbus_suspend_cancel();
|
|
+ }
|
|
+
|
|
+ if (!fast_suspend)
|
|
+ smp_resume();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/pci.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,83 @@
|
|
+/*
|
|
+ * vim:shiftwidth=8:noexpandtab
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/pci.h>
|
|
+#include <xen/interface/physdev.h>
|
|
+#include "../../pci/pci.h"
|
|
+
|
|
+static int (*pci_bus_probe)(struct device *dev);
|
|
+static int (*pci_bus_remove)(struct device *dev);
|
|
+
|
|
+static int pci_bus_probe_wrapper(struct device *dev)
|
|
+{
|
|
+ int r;
|
|
+ struct pci_dev *pci_dev = to_pci_dev(dev);
|
|
+ struct physdev_manage_pci manage_pci;
|
|
+ struct physdev_manage_pci_ext manage_pci_ext;
|
|
+
|
|
+#ifdef CONFIG_PCI_IOV
|
|
+ if (pci_dev->is_virtfn) {
|
|
+ memset(&manage_pci_ext, 0, sizeof(manage_pci_ext));
|
|
+ manage_pci_ext.bus = pci_dev->bus->number;
|
|
+ manage_pci_ext.devfn = pci_dev->devfn;
|
|
+ manage_pci_ext.is_virtfn = 1;
|
|
+ manage_pci_ext.physfn.bus = pci_dev->physfn->bus->number;
|
|
+ manage_pci_ext.physfn.devfn = pci_dev->physfn->devfn;
|
|
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
|
|
+ &manage_pci_ext);
|
|
+ } else
|
|
+#endif
|
|
+ if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
|
|
+ memset(&manage_pci_ext, 0, sizeof(manage_pci_ext));
|
|
+ manage_pci_ext.bus = pci_dev->bus->number;
|
|
+ manage_pci_ext.devfn = pci_dev->devfn;
|
|
+ manage_pci_ext.is_extfn = 1;
|
|
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
|
|
+ &manage_pci_ext);
|
|
+ } else {
|
|
+ manage_pci.bus = pci_dev->bus->number;
|
|
+ manage_pci.devfn = pci_dev->devfn;
|
|
+ r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add,
|
|
+ &manage_pci);
|
|
+ }
|
|
+ if (r && r != -ENOSYS)
|
|
+ return r;
|
|
+
|
|
+ r = pci_bus_probe(dev);
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static int pci_bus_remove_wrapper(struct device *dev)
|
|
+{
|
|
+ int r;
|
|
+ struct pci_dev *pci_dev = to_pci_dev(dev);
|
|
+ struct physdev_manage_pci manage_pci;
|
|
+ manage_pci.bus = pci_dev->bus->number;
|
|
+ manage_pci.devfn = pci_dev->devfn;
|
|
+
|
|
+ r = pci_bus_remove(dev);
|
|
+ /* dev and pci_dev are no longer valid!! */
|
|
+
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
|
|
+ &manage_pci));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static int __init hook_pci_bus(void)
|
|
+{
|
|
+ if (!is_running_on_xen() || !is_initial_xendomain())
|
|
+ return 0;
|
|
+
|
|
+ pci_bus_probe = pci_bus_type.probe;
|
|
+ pci_bus_type.probe = pci_bus_probe_wrapper;
|
|
+
|
|
+ pci_bus_remove = pci_bus_type.remove;
|
|
+ pci_bus_type.remove = pci_bus_remove_wrapper;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+core_initcall(hook_pci_bus);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/reboot.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,338 @@
|
|
+#define __KERNEL_SYSCALLS__
|
|
+#include <linux/version.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/unistd.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/reboot.h>
|
|
+#include <linux/sysrq.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <linux/kmod.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/workqueue.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
+
|
|
+#define SHUTDOWN_INVALID -1
|
|
+#define SHUTDOWN_POWEROFF 0
|
|
+#define SHUTDOWN_SUSPEND 2
|
|
+#define SHUTDOWN_RESUMING 3
|
|
+#define SHUTDOWN_HALT 4
|
|
+
|
|
+/* Ignore multiple shutdown requests. */
|
|
+static int shutting_down = SHUTDOWN_INVALID;
|
|
+
|
|
+/* Was last suspend request cancelled? */
|
|
+static int suspend_cancelled;
|
|
+
|
|
+/* Can we leave APs online when we suspend? */
|
|
+static int fast_suspend;
|
|
+
|
|
+static void __shutdown_handler(void *unused);
|
|
+static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
|
|
+
|
|
+static int setup_suspend_evtchn(void);
|
|
+
|
|
+int __xen_suspend(int fast_suspend, void (*resume_notifier)(int));
|
|
+
|
|
+static int shutdown_process(void *__unused)
|
|
+{
|
|
+ static char *envp[] = { "HOME=/", "TERM=linux",
|
|
+ "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
|
|
+ static char *poweroff_argv[] = { "/sbin/poweroff", NULL };
|
|
+
|
|
+ extern asmlinkage long sys_reboot(int magic1, int magic2,
|
|
+ unsigned int cmd, void *arg);
|
|
+
|
|
+ if ((shutting_down == SHUTDOWN_POWEROFF) ||
|
|
+ (shutting_down == SHUTDOWN_HALT)) {
|
|
+ if (call_usermodehelper("/sbin/poweroff", poweroff_argv,
|
|
+ envp, 0) < 0) {
|
|
+#ifdef CONFIG_XEN
|
|
+ sys_reboot(LINUX_REBOOT_MAGIC1,
|
|
+ LINUX_REBOOT_MAGIC2,
|
|
+ LINUX_REBOOT_CMD_POWER_OFF,
|
|
+ NULL);
|
|
+#endif /* CONFIG_XEN */
|
|
+ }
|
|
+ }
|
|
+
|
|
+ shutting_down = SHUTDOWN_INVALID; /* could try again */
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xen_resume_notifier(int _suspend_cancelled)
|
|
+{
|
|
+ int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
|
|
+ BUG_ON(old_state != SHUTDOWN_SUSPEND);
|
|
+ suspend_cancelled = _suspend_cancelled;
|
|
+}
|
|
+
|
|
+static int xen_suspend(void *__unused)
|
|
+{
|
|
+ int err, old_state;
|
|
+
|
|
+ daemonize("suspend");
|
|
+ err = set_cpus_allowed(current, cpumask_of_cpu(0));
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ do {
|
|
+ err = __xen_suspend(fast_suspend, xen_resume_notifier);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "Xen suspend failed (%d)\n", err);
|
|
+ goto fail;
|
|
+ }
|
|
+ if (!suspend_cancelled)
|
|
+ setup_suspend_evtchn();
|
|
+ old_state = cmpxchg(
|
|
+ &shutting_down, SHUTDOWN_RESUMING, SHUTDOWN_INVALID);
|
|
+ } while (old_state == SHUTDOWN_SUSPEND);
|
|
+
|
|
+ switch (old_state) {
|
|
+ case SHUTDOWN_INVALID:
|
|
+ case SHUTDOWN_SUSPEND:
|
|
+ BUG();
|
|
+ case SHUTDOWN_RESUMING:
|
|
+ break;
|
|
+ default:
|
|
+ schedule_work(&shutdown_work);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ old_state = xchg(&shutting_down, SHUTDOWN_INVALID);
|
|
+ BUG_ON(old_state != SHUTDOWN_SUSPEND);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void switch_shutdown_state(int new_state)
|
|
+{
|
|
+ int prev_state, old_state = SHUTDOWN_INVALID;
|
|
+
|
|
+ /* We only drive shutdown_state into an active state. */
|
|
+ if (new_state == SHUTDOWN_INVALID)
|
|
+ return;
|
|
+
|
|
+ do {
|
|
+ /* We drop this transition if already in an active state. */
|
|
+ if ((old_state != SHUTDOWN_INVALID) &&
|
|
+ (old_state != SHUTDOWN_RESUMING))
|
|
+ return;
|
|
+ /* Attempt to transition. */
|
|
+ prev_state = old_state;
|
|
+ old_state = cmpxchg(&shutting_down, old_state, new_state);
|
|
+ } while (old_state != prev_state);
|
|
+
|
|
+ /* Either we kick off the work, or we leave it to xen_suspend(). */
|
|
+ if (old_state == SHUTDOWN_INVALID)
|
|
+ schedule_work(&shutdown_work);
|
|
+ else
|
|
+ BUG_ON(old_state != SHUTDOWN_RESUMING);
|
|
+}
|
|
+
|
|
+static void __shutdown_handler(void *unused)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = kernel_thread((shutting_down == SHUTDOWN_SUSPEND) ?
|
|
+ xen_suspend : shutdown_process,
|
|
+ NULL, CLONE_FS | CLONE_FILES);
|
|
+
|
|
+ if (err < 0) {
|
|
+ printk(KERN_WARNING "Error creating shutdown process (%d): "
|
|
+ "retrying...\n", -err);
|
|
+ schedule_delayed_work(&shutdown_work, HZ/2);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void shutdown_handler(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ extern void ctrl_alt_del(void);
|
|
+ char *str;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err, new_state = SHUTDOWN_INVALID;
|
|
+
|
|
+ if ((shutting_down != SHUTDOWN_INVALID) &&
|
|
+ (shutting_down != SHUTDOWN_RESUMING))
|
|
+ return;
|
|
+
|
|
+ again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err)
|
|
+ return;
|
|
+
|
|
+ str = (char *)xenbus_read(xbt, "control", "shutdown", NULL);
|
|
+ /* Ignore read errors and empty reads. */
|
|
+ if (XENBUS_IS_ERR_READ(str)) {
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ xenbus_write(xbt, "control", "shutdown", "");
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN) {
|
|
+ kfree(str);
|
|
+ goto again;
|
|
+ }
|
|
+
|
|
+ if (strcmp(str, "poweroff") == 0)
|
|
+ new_state = SHUTDOWN_POWEROFF;
|
|
+ else if (strcmp(str, "reboot") == 0)
|
|
+ ctrl_alt_del();
|
|
+ else if (strcmp(str, "suspend") == 0)
|
|
+ new_state = SHUTDOWN_SUSPEND;
|
|
+ else if (strcmp(str, "halt") == 0)
|
|
+ new_state = SHUTDOWN_HALT;
|
|
+ else
|
|
+ printk("Ignoring shutdown request: %s\n", str);
|
|
+
|
|
+ switch_shutdown_state(new_state);
|
|
+
|
|
+ kfree(str);
|
|
+}
|
|
+
|
|
+static void sysrq_handler(struct xenbus_watch *watch, const char **vec,
|
|
+ unsigned int len)
|
|
+{
|
|
+ char sysrq_key = '\0';
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+
|
|
+ again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err)
|
|
+ return;
|
|
+ if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) {
|
|
+ printk(KERN_ERR "Unable to read sysrq code in "
|
|
+ "control/sysrq\n");
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (sysrq_key != '\0')
|
|
+ xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+
|
|
+#ifdef CONFIG_MAGIC_SYSRQ
|
|
+ if (sysrq_key != '\0')
|
|
+ handle_sysrq(sysrq_key, NULL, NULL);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static struct xenbus_watch shutdown_watch = {
|
|
+ .node = "control/shutdown",
|
|
+ .callback = shutdown_handler
|
|
+};
|
|
+
|
|
+static struct xenbus_watch sysrq_watch = {
|
|
+ .node = "control/sysrq",
|
|
+ .callback = sysrq_handler
|
|
+};
|
|
+
|
|
+static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
|
|
+{
|
|
+ switch_shutdown_state(SHUTDOWN_SUSPEND);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static int setup_suspend_evtchn(void)
|
|
+{
|
|
+ static int irq;
|
|
+ int port;
|
|
+ char portstr[16];
|
|
+
|
|
+ if (irq > 0)
|
|
+ unbind_from_irqhandler(irq, NULL);
|
|
+
|
|
+ irq = bind_listening_port_to_irqhandler(0, suspend_int, 0, "suspend",
|
|
+ NULL);
|
|
+ if (irq <= 0)
|
|
+ return -1;
|
|
+
|
|
+ port = irq_to_evtchn_port(irq);
|
|
+ printk(KERN_INFO "suspend: event channel %d\n", port);
|
|
+ sprintf(portstr, "%d", port);
|
|
+ xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int setup_shutdown_watcher(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = register_xenbus_watch(&sysrq_watch);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "Failed to set sysrq watcher\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ if (is_initial_xendomain())
|
|
+ return 0;
|
|
+
|
|
+ xenbus_scanf(XBT_NIL, "control",
|
|
+ "platform-feature-multiprocessor-suspend",
|
|
+ "%d", &fast_suspend);
|
|
+
|
|
+ err = register_xenbus_watch(&shutdown_watch);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "Failed to set shutdown watcher\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ /* suspend event channel */
|
|
+ err = setup_suspend_evtchn();
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "Failed to register suspend event channel\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+
|
|
+static int shutdown_event(struct notifier_block *notifier,
|
|
+ unsigned long event,
|
|
+ void *data)
|
|
+{
|
|
+ setup_shutdown_watcher();
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+
|
|
+static int __init setup_shutdown_event(void)
|
|
+{
|
|
+ static struct notifier_block xenstore_notifier = {
|
|
+ .notifier_call = shutdown_event
|
|
+ };
|
|
+ register_xenstore_notifier(&xenstore_notifier);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+subsys_initcall(setup_shutdown_event);
|
|
+
|
|
+#else /* !defined(CONFIG_XEN) */
|
|
+
|
|
+int xen_reboot_init(void)
|
|
+{
|
|
+ return setup_shutdown_watcher();
|
|
+}
|
|
+
|
|
+#endif /* !defined(CONFIG_XEN) */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/smpboot.c 2010-11-08 17:27:03.000000000 +0100
|
|
@@ -0,0 +1,456 @@
|
|
+/*
|
|
+ * Xen SMP booting functions
|
|
+ *
|
|
+ * See arch/i386/kernel/smpboot.c for copyright and credits for derived
|
|
+ * portions of this file.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/kernel_stat.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/irq.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/notifier.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <linux/percpu.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/arch_hooks.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/vcpu.h>
|
|
+#include <xen/cpu_hotplug.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
|
|
+extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
|
|
+
|
|
+extern int local_setup_timer(unsigned int cpu);
|
|
+extern void local_teardown_timer(unsigned int cpu);
|
|
+
|
|
+extern void hypervisor_callback(void);
|
|
+extern void failsafe_callback(void);
|
|
+extern void system_call(void);
|
|
+extern void smp_trap_init(trap_info_t *);
|
|
+
|
|
+/* Number of siblings per CPU package */
|
|
+int smp_num_siblings = 1;
|
|
+
|
|
+cpumask_t cpu_online_map;
|
|
+EXPORT_SYMBOL(cpu_online_map);
|
|
+cpumask_t cpu_possible_map;
|
|
+EXPORT_SYMBOL(cpu_possible_map);
|
|
+cpumask_t cpu_initialized_map;
|
|
+
|
|
+struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
|
|
+EXPORT_SYMBOL(cpu_data);
|
|
+
|
|
+static DEFINE_PER_CPU(int, resched_irq);
|
|
+static DEFINE_PER_CPU(int, callfunc_irq);
|
|
+static char resched_name[NR_CPUS][15];
|
|
+static char callfunc_name[NR_CPUS][15];
|
|
+
|
|
+cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
|
|
+cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
|
|
+EXPORT_SYMBOL(cpu_core_map);
|
|
+
|
|
+#if defined(__i386__)
|
|
+u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
|
|
+EXPORT_SYMBOL(x86_cpu_to_apicid);
|
|
+#elif !defined(CONFIG_X86_IO_APIC)
|
|
+unsigned int maxcpus = NR_CPUS;
|
|
+#endif
|
|
+
|
|
+void __init prefill_possible_map(void)
|
|
+{
|
|
+ int i, rc;
|
|
+
|
|
+ for_each_possible_cpu(i)
|
|
+ if (i != smp_processor_id())
|
|
+ return;
|
|
+
|
|
+ for (i = 0; i < NR_CPUS; i++) {
|
|
+ rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
|
|
+ if (rc >= 0)
|
|
+ cpu_set(i, cpu_possible_map);
|
|
+ }
|
|
+}
|
|
+
|
|
+void __init smp_alloc_memory(void)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void
|
|
+set_cpu_sibling_map(unsigned int cpu)
|
|
+{
|
|
+ cpu_data[cpu].phys_proc_id = cpu;
|
|
+ cpu_data[cpu].cpu_core_id = 0;
|
|
+
|
|
+ cpu_sibling_map[cpu] = cpumask_of_cpu(cpu);
|
|
+ cpu_core_map[cpu] = cpumask_of_cpu(cpu);
|
|
+
|
|
+ cpu_data[cpu].booted_cores = 1;
|
|
+}
|
|
+
|
|
+static void
|
|
+remove_siblinginfo(unsigned int cpu)
|
|
+{
|
|
+ cpu_data[cpu].phys_proc_id = BAD_APICID;
|
|
+ cpu_data[cpu].cpu_core_id = BAD_APICID;
|
|
+
|
|
+ cpus_clear(cpu_sibling_map[cpu]);
|
|
+ cpus_clear(cpu_core_map[cpu]);
|
|
+
|
|
+ cpu_data[cpu].booted_cores = 0;
|
|
+}
|
|
+
|
|
+static int __cpuinit xen_smp_intr_init(unsigned int cpu)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
|
|
+
|
|
+ sprintf(resched_name[cpu], "resched%u", cpu);
|
|
+ rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
|
|
+ cpu,
|
|
+ smp_reschedule_interrupt,
|
|
+ SA_INTERRUPT,
|
|
+ resched_name[cpu],
|
|
+ NULL);
|
|
+ if (rc < 0)
|
|
+ goto fail;
|
|
+ per_cpu(resched_irq, cpu) = rc;
|
|
+
|
|
+ sprintf(callfunc_name[cpu], "callfunc%u", cpu);
|
|
+ rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
|
|
+ cpu,
|
|
+ smp_call_function_interrupt,
|
|
+ SA_INTERRUPT,
|
|
+ callfunc_name[cpu],
|
|
+ NULL);
|
|
+ if (rc < 0)
|
|
+ goto fail;
|
|
+ per_cpu(callfunc_irq, cpu) = rc;
|
|
+
|
|
+ if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ if (per_cpu(resched_irq, cpu) >= 0)
|
|
+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
|
|
+ if (per_cpu(callfunc_irq, cpu) >= 0)
|
|
+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+static void xen_smp_intr_exit(unsigned int cpu)
|
|
+{
|
|
+ if (cpu != 0)
|
|
+ local_teardown_timer(cpu);
|
|
+
|
|
+ unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
|
|
+ unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
|
|
+}
|
|
+#endif
|
|
+
|
|
+void __cpuinit cpu_bringup(void)
|
|
+{
|
|
+ cpu_init();
|
|
+ identify_cpu(cpu_data + smp_processor_id());
|
|
+ touch_softlockup_watchdog();
|
|
+ preempt_disable();
|
|
+ local_irq_enable();
|
|
+}
|
|
+
|
|
+static void __cpuinit cpu_bringup_and_idle(void)
|
|
+{
|
|
+ cpu_bringup();
|
|
+ cpu_idle();
|
|
+}
|
|
+
|
|
+static void __cpuinit cpu_initialize_context(unsigned int cpu)
|
|
+{
|
|
+ /* vcpu_guest_context_t is too large to allocate on the stack.
|
|
+ * Hence we allocate statically and protect it with a lock */
|
|
+ static vcpu_guest_context_t ctxt;
|
|
+ static DEFINE_SPINLOCK(ctxt_lock);
|
|
+
|
|
+ struct task_struct *idle = idle_task(cpu);
|
|
+#ifdef __x86_64__
|
|
+ struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
|
|
+#else
|
|
+ struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+#endif
|
|
+
|
|
+ if (cpu_test_and_set(cpu, cpu_initialized_map))
|
|
+ return;
|
|
+
|
|
+ spin_lock(&ctxt_lock);
|
|
+
|
|
+ memset(&ctxt, 0, sizeof(ctxt));
|
|
+
|
|
+ ctxt.flags = VGCF_IN_KERNEL;
|
|
+ ctxt.user_regs.ds = __USER_DS;
|
|
+ ctxt.user_regs.es = __USER_DS;
|
|
+ ctxt.user_regs.fs = 0;
|
|
+ ctxt.user_regs.gs = 0;
|
|
+ ctxt.user_regs.ss = __KERNEL_DS;
|
|
+ ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
|
|
+ ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
|
|
+
|
|
+ memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
|
|
+
|
|
+ smp_trap_init(ctxt.trap_ctxt);
|
|
+
|
|
+ ctxt.ldt_ents = 0;
|
|
+
|
|
+ ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
|
|
+ ctxt.gdt_ents = gdt_descr->size / 8;
|
|
+
|
|
+#ifdef __i386__
|
|
+ ctxt.user_regs.cs = __KERNEL_CS;
|
|
+ ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
|
|
+
|
|
+ ctxt.kernel_ss = __KERNEL_DS;
|
|
+ ctxt.kernel_sp = idle->thread.esp0;
|
|
+
|
|
+ ctxt.event_callback_cs = __KERNEL_CS;
|
|
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
|
|
+ ctxt.failsafe_callback_cs = __KERNEL_CS;
|
|
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
|
|
+
|
|
+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
|
+#else /* __x86_64__ */
|
|
+ ctxt.user_regs.cs = __KERNEL_CS;
|
|
+ ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
|
|
+
|
|
+ ctxt.kernel_ss = __KERNEL_DS;
|
|
+ ctxt.kernel_sp = idle->thread.rsp0;
|
|
+
|
|
+ ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
|
|
+ ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
|
|
+ ctxt.syscall_callback_eip = (unsigned long)system_call;
|
|
+
|
|
+ ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
|
|
+
|
|
+ ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
|
|
+#endif
|
|
+
|
|
+ if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
|
|
+ BUG();
|
|
+
|
|
+ spin_unlock(&ctxt_lock);
|
|
+}
|
|
+
|
|
+void __init smp_prepare_cpus(unsigned int max_cpus)
|
|
+{
|
|
+ unsigned int cpu;
|
|
+ struct task_struct *idle;
|
|
+ int apicid, acpiid;
|
|
+ struct vcpu_get_physid cpu_id;
|
|
+#ifdef __x86_64__
|
|
+ struct desc_ptr *gdt_descr;
|
|
+#else
|
|
+ struct Xgt_desc_struct *gdt_descr;
|
|
+#endif
|
|
+
|
|
+ apicid = 0;
|
|
+ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) {
|
|
+ apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
|
|
+ acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
|
|
+#ifdef CONFIG_ACPI
|
|
+ if (acpiid != 0xff)
|
|
+ x86_acpiid_to_apicid[acpiid] = apicid;
|
|
+#endif
|
|
+ }
|
|
+ boot_cpu_data.apicid = apicid;
|
|
+ cpu_data[0] = boot_cpu_data;
|
|
+
|
|
+ x86_cpu_to_apicid[0] = apicid;
|
|
+
|
|
+ current_thread_info()->cpu = 0;
|
|
+
|
|
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
|
|
+ cpus_clear(cpu_sibling_map[cpu]);
|
|
+ cpus_clear(cpu_core_map[cpu]);
|
|
+ }
|
|
+
|
|
+ set_cpu_sibling_map(0);
|
|
+
|
|
+ if (xen_smp_intr_init(0))
|
|
+ BUG();
|
|
+
|
|
+ cpu_initialized_map = cpumask_of_cpu(0);
|
|
+
|
|
+ /* Restrict the possible_map according to max_cpus. */
|
|
+ while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
|
|
+ for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
|
|
+ continue;
|
|
+ cpu_clear(cpu, cpu_possible_map);
|
|
+ }
|
|
+
|
|
+ for_each_possible_cpu (cpu) {
|
|
+ if (cpu == 0)
|
|
+ continue;
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ gdt_descr = &cpu_gdt_descr[cpu];
|
|
+#else
|
|
+ gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+#endif
|
|
+ gdt_descr->address = get_zeroed_page(GFP_KERNEL);
|
|
+ if (unlikely(!gdt_descr->address)) {
|
|
+ printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
|
|
+ cpu);
|
|
+ continue;
|
|
+ }
|
|
+ gdt_descr->size = GDT_SIZE;
|
|
+ memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
|
|
+ make_page_readonly(
|
|
+ (void *)gdt_descr->address,
|
|
+ XENFEAT_writable_descriptor_tables);
|
|
+
|
|
+ apicid = cpu;
|
|
+ if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) {
|
|
+ apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
|
|
+ acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id);
|
|
+#ifdef CONFIG_ACPI
|
|
+ if (acpiid != 0xff)
|
|
+ x86_acpiid_to_apicid[acpiid] = apicid;
|
|
+#endif
|
|
+ }
|
|
+ cpu_data[cpu] = boot_cpu_data;
|
|
+ cpu_data[cpu].apicid = apicid;
|
|
+
|
|
+ x86_cpu_to_apicid[cpu] = apicid;
|
|
+
|
|
+ idle = fork_idle(cpu);
|
|
+ if (IS_ERR(idle))
|
|
+ panic("failed fork for CPU %d", cpu);
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ cpu_pda(cpu)->pcurrent = idle;
|
|
+ cpu_pda(cpu)->cpunumber = cpu;
|
|
+ clear_tsk_thread_flag(idle, TIF_FORK);
|
|
+#endif
|
|
+
|
|
+ irq_ctx_init(cpu);
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+ if (is_initial_xendomain())
|
|
+ cpu_set(cpu, cpu_present_map);
|
|
+#else
|
|
+ cpu_set(cpu, cpu_present_map);
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ init_xenbus_allowed_cpumask();
|
|
+
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ /*
|
|
+ * Here we can be sure that there is an IO-APIC in the system. Let's
|
|
+ * go and set it up:
|
|
+ */
|
|
+ if (!skip_ioapic_setup && nr_ioapics)
|
|
+ setup_IO_APIC();
|
|
+#endif
|
|
+}
|
|
+
|
|
+void __devinit smp_prepare_boot_cpu(void)
|
|
+{
|
|
+ prefill_possible_map();
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+
|
|
+/*
|
|
+ * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
|
|
+ * But do it early enough to catch critical for_each_present_cpu() loops
|
|
+ * in i386-specific code.
|
|
+ */
|
|
+static int __init initialize_cpu_present_map(void)
|
|
+{
|
|
+ cpu_present_map = cpu_possible_map;
|
|
+ return 0;
|
|
+}
|
|
+core_initcall(initialize_cpu_present_map);
|
|
+
|
|
+int __cpu_disable(void)
|
|
+{
|
|
+ cpumask_t map = cpu_online_map;
|
|
+ unsigned int cpu = smp_processor_id();
|
|
+
|
|
+ if (cpu == 0)
|
|
+ return -EBUSY;
|
|
+
|
|
+ remove_siblinginfo(cpu);
|
|
+
|
|
+ cpu_clear(cpu, map);
|
|
+ fixup_irqs(map);
|
|
+ cpu_clear(cpu, cpu_online_map);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void __cpu_die(unsigned int cpu)
|
|
+{
|
|
+ while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
|
|
+ current->state = TASK_UNINTERRUPTIBLE;
|
|
+ schedule_timeout(HZ/10);
|
|
+ }
|
|
+
|
|
+ xen_smp_intr_exit(cpu);
|
|
+
|
|
+ if (num_online_cpus() == 1)
|
|
+ alternatives_smp_switch(0);
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_HOTPLUG_CPU */
|
|
+
|
|
+int __cpuinit __cpu_up(unsigned int cpu)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ rc = cpu_up_check(cpu);
|
|
+ if (rc)
|
|
+ return rc;
|
|
+
|
|
+ cpu_initialize_context(cpu);
|
|
+
|
|
+ if (num_online_cpus() == 1)
|
|
+ alternatives_smp_switch(1);
|
|
+
|
|
+ /* This must be done before setting cpu_online_map */
|
|
+ set_cpu_sibling_map(cpu);
|
|
+ wmb();
|
|
+
|
|
+ rc = xen_smp_intr_init(cpu);
|
|
+ if (rc) {
|
|
+ remove_siblinginfo(cpu);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ cpu_set(cpu, cpu_online_map);
|
|
+
|
|
+ rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
|
|
+ BUG_ON(rc);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void __init smp_cpus_done(unsigned int max_cpus)
|
|
+{
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_X86_LOCAL_APIC
|
|
+int setup_profiling_timer(unsigned int multiplier)
|
|
+{
|
|
+ return -EINVAL;
|
|
+}
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/xen_proc.c 2007-06-12 13:13:44.000000000 +0200
|
|
@@ -0,0 +1,23 @@
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/proc_fs.h>
|
|
+#include <xen/xen_proc.h>
|
|
+
|
|
+static struct proc_dir_entry *xen_base;
|
|
+
|
|
+struct proc_dir_entry *create_xen_proc_entry(const char *name, mode_t mode)
|
|
+{
|
|
+ if ( xen_base == NULL )
|
|
+ if ( (xen_base = proc_mkdir("xen", &proc_root)) == NULL )
|
|
+ panic("Couldn't create /proc/xen");
|
|
+ return create_proc_entry(name, mode, xen_base);
|
|
+}
|
|
+
|
|
+EXPORT_SYMBOL_GPL(create_xen_proc_entry);
|
|
+
|
|
+void remove_xen_proc_entry(const char *name)
|
|
+{
|
|
+ remove_proc_entry(name, xen_base);
|
|
+}
|
|
+
|
|
+EXPORT_SYMBOL_GPL(remove_xen_proc_entry);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/core/xen_sysfs.c 2009-05-29 10:25:53.000000000 +0200
|
|
@@ -0,0 +1,427 @@
|
|
+/*
|
|
+ * copyright (c) 2006 IBM Corporation
|
|
+ * Authored by: Mike D. Day <ncmike@us.ibm.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License version 2 as
|
|
+ * published by the Free Software Foundation.
|
|
+ */
|
|
+
|
|
+#include <linux/err.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/features.h>
|
|
+#include <xen/hypervisor_sysfs.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/kexec.h>
|
|
+#include "../xenbus/xenbus_comms.h"
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
+MODULE_AUTHOR("Mike D. Day <ncmike@us.ibm.com>");
|
|
+
|
|
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ return sprintf(buffer, "xen\n");
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(type);
|
|
+
|
|
+static int __init xen_sysfs_type_init(void)
|
|
+{
|
|
+ return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
|
|
+}
|
|
+
|
|
+static void xen_sysfs_type_destroy(void)
|
|
+{
|
|
+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
|
|
+}
|
|
+
|
|
+/* xen version attributes */
|
|
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int version = HYPERVISOR_xen_version(XENVER_version, NULL);
|
|
+ if (version)
|
|
+ return sprintf(buffer, "%d\n", version >> 16);
|
|
+ return -ENODEV;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(major);
|
|
+
|
|
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int version = HYPERVISOR_xen_version(XENVER_version, NULL);
|
|
+ if (version)
|
|
+ return sprintf(buffer, "%d\n", version & 0xff);
|
|
+ return -ENODEV;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(minor);
|
|
+
|
|
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ char *extra;
|
|
+
|
|
+ extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
|
|
+ if (extra) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", extra);
|
|
+ kfree(extra);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(extra);
|
|
+
|
|
+static struct attribute *version_attrs[] = {
|
|
+ &major_attr.attr,
|
|
+ &minor_attr.attr,
|
|
+ &extra_attr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group version_group = {
|
|
+ .name = "version",
|
|
+ .attrs = version_attrs,
|
|
+};
|
|
+
|
|
+static int __init xen_sysfs_version_init(void)
|
|
+{
|
|
+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ &version_group);
|
|
+}
|
|
+
|
|
+static void xen_sysfs_version_destroy(void)
|
|
+{
|
|
+ sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
|
|
+}
|
|
+
|
|
+/* UUID */
|
|
+
|
|
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ char *vm, *val;
|
|
+ int ret;
|
|
+
|
|
+ if (!is_xenstored_ready())
|
|
+ return -EBUSY;
|
|
+
|
|
+ vm = xenbus_read(XBT_NIL, "vm", "", NULL);
|
|
+ if (IS_ERR(vm))
|
|
+ return PTR_ERR(vm);
|
|
+ val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
|
|
+ kfree(vm);
|
|
+ if (IS_ERR(val))
|
|
+ return PTR_ERR(val);
|
|
+ ret = sprintf(buffer, "%s\n", val);
|
|
+ kfree(val);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(uuid);
|
|
+
|
|
+static int __init xen_sysfs_uuid_init(void)
|
|
+{
|
|
+ return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
|
|
+}
|
|
+
|
|
+static void xen_sysfs_uuid_destroy(void)
|
|
+{
|
|
+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
|
|
+}
|
|
+
|
|
+/* xen compilation attributes */
|
|
+
|
|
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ struct xen_compile_info *info;
|
|
+
|
|
+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
|
|
+ if (info) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", info->compiler);
|
|
+ kfree(info);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(compiler);
|
|
+
|
|
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ struct xen_compile_info *info;
|
|
+
|
|
+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
|
|
+ if (info) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", info->compile_by);
|
|
+ kfree(info);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(compiled_by);
|
|
+
|
|
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ struct xen_compile_info *info;
|
|
+
|
|
+ info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
|
|
+ if (info) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", info->compile_date);
|
|
+ kfree(info);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(compile_date);
|
|
+
|
|
+static struct attribute *xen_compile_attrs[] = {
|
|
+ &compiler_attr.attr,
|
|
+ &compiled_by_attr.attr,
|
|
+ &compile_date_attr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group xen_compilation_group = {
|
|
+ .name = "compilation",
|
|
+ .attrs = xen_compile_attrs,
|
|
+};
|
|
+
|
|
+int __init static xen_compilation_init(void)
|
|
+{
|
|
+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ &xen_compilation_group);
|
|
+}
|
|
+
|
|
+static void xen_compilation_destroy(void)
|
|
+{
|
|
+ sysfs_remove_group(&hypervisor_subsys.kset.kobj,
|
|
+ &xen_compilation_group);
|
|
+}
|
|
+
|
|
+/* xen properties info */
|
|
+
|
|
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ char *caps;
|
|
+
|
|
+ caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
|
|
+ if (caps) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", caps);
|
|
+ kfree(caps);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(capabilities);
|
|
+
|
|
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ char *cset;
|
|
+
|
|
+ cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
|
|
+ if (cset) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%s\n", cset);
|
|
+ kfree(cset);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(changeset);
|
|
+
|
|
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ struct xen_platform_parameters *parms;
|
|
+
|
|
+ parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
|
|
+ if (parms) {
|
|
+ ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
|
|
+ parms);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%lx\n", parms->virt_start);
|
|
+ kfree(parms);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(virtual_start);
|
|
+
|
|
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
|
|
+ if (ret > 0)
|
|
+ ret = sprintf(buffer, "%x\n", ret);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(pagesize);
|
|
+
|
|
+/* eventually there will be several more features to export */
|
|
+static ssize_t xen_feature_show(int index, char *buffer)
|
|
+{
|
|
+ int ret = -ENOMEM;
|
|
+ struct xen_feature_info *info;
|
|
+
|
|
+ info = kmalloc(sizeof(struct xen_feature_info), GFP_KERNEL);
|
|
+ if (info) {
|
|
+ info->submap_idx = index;
|
|
+ ret = HYPERVISOR_xen_version(XENVER_get_features, info);
|
|
+ if (!ret)
|
|
+ ret = sprintf(buffer, "%d\n", info->submap);
|
|
+ kfree(info);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t writable_pt_show(struct hyp_sysfs_attr *attr, char *buffer)
|
|
+{
|
|
+ return xen_feature_show(XENFEAT_writable_page_tables, buffer);
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(writable_pt);
|
|
+
|
|
+static struct attribute *xen_properties_attrs[] = {
|
|
+ &capabilities_attr.attr,
|
|
+ &changeset_attr.attr,
|
|
+ &virtual_start_attr.attr,
|
|
+ &pagesize_attr.attr,
|
|
+ &writable_pt_attr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static struct attribute_group xen_properties_group = {
|
|
+ .name = "properties",
|
|
+ .attrs = xen_properties_attrs,
|
|
+};
|
|
+
|
|
+static int __init xen_properties_init(void)
|
|
+{
|
|
+ return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ &xen_properties_group);
|
|
+}
|
|
+
|
|
+static void xen_properties_destroy(void)
|
|
+{
|
|
+ sysfs_remove_group(&hypervisor_subsys.kset.kobj,
|
|
+ &xen_properties_group);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_KEXEC
|
|
+
|
|
+extern size_t vmcoreinfo_size_xen;
|
|
+extern unsigned long paddr_vmcoreinfo_xen;
|
|
+
|
|
+static ssize_t vmcoreinfo_show(struct hyp_sysfs_attr *attr, char *page)
|
|
+{
|
|
+ return sprintf(page, "%lx %zx\n",
|
|
+ paddr_vmcoreinfo_xen, vmcoreinfo_size_xen);
|
|
+}
|
|
+
|
|
+HYPERVISOR_ATTR_RO(vmcoreinfo);
|
|
+
|
|
+static int __init xen_sysfs_vmcoreinfo_init(void)
|
|
+{
|
|
+ return sysfs_create_file(&hypervisor_subsys.kset.kobj,
|
|
+ &vmcoreinfo_attr.attr);
|
|
+}
|
|
+
|
|
+static void xen_sysfs_vmcoreinfo_destroy(void)
|
|
+{
|
|
+ sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr);
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+static int __init hyper_sysfs_init(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ ret = xen_sysfs_type_init();
|
|
+ if (ret)
|
|
+ goto out;
|
|
+ ret = xen_sysfs_version_init();
|
|
+ if (ret)
|
|
+ goto version_out;
|
|
+ ret = xen_compilation_init();
|
|
+ if (ret)
|
|
+ goto comp_out;
|
|
+ ret = xen_sysfs_uuid_init();
|
|
+ if (ret)
|
|
+ goto uuid_out;
|
|
+ ret = xen_properties_init();
|
|
+ if (ret)
|
|
+ goto prop_out;
|
|
+#ifdef CONFIG_KEXEC
|
|
+ if (vmcoreinfo_size_xen != 0) {
|
|
+ ret = xen_sysfs_vmcoreinfo_init();
|
|
+ if (ret)
|
|
+ goto vmcoreinfo_out;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ goto out;
|
|
+
|
|
+#ifdef CONFIG_KEXEC
|
|
+vmcoreinfo_out:
|
|
+#endif
|
|
+ xen_properties_destroy();
|
|
+prop_out:
|
|
+ xen_sysfs_uuid_destroy();
|
|
+uuid_out:
|
|
+ xen_compilation_destroy();
|
|
+comp_out:
|
|
+ xen_sysfs_version_destroy();
|
|
+version_out:
|
|
+ xen_sysfs_type_destroy();
|
|
+out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void __exit hyper_sysfs_exit(void)
|
|
+{
|
|
+#ifdef CONFIG_KEXEC
|
|
+ if (vmcoreinfo_size_xen != 0)
|
|
+ xen_sysfs_vmcoreinfo_destroy();
|
|
+#endif
|
|
+ xen_properties_destroy();
|
|
+ xen_compilation_destroy();
|
|
+ xen_sysfs_uuid_destroy();
|
|
+ xen_sysfs_version_destroy();
|
|
+ xen_sysfs_type_destroy();
|
|
+
|
|
+}
|
|
+
|
|
+module_init(hyper_sysfs_init);
|
|
+module_exit(hyper_sysfs_exit);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/fbfront/Makefile 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,2 @@
|
|
+obj-$(CONFIG_XEN_FRAMEBUFFER) := xenfb.o
|
|
+obj-$(CONFIG_XEN_KEYBOARD) += xenkbd.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/fbfront/xenfb.c 2011-03-02 12:00:16.000000000 +0100
|
|
@@ -0,0 +1,890 @@
|
|
+/*
|
|
+ * linux/drivers/video/xenfb.c -- Xen para-virtual frame buffer device
|
|
+ *
|
|
+ * Copyright (C) 2005-2006 Anthony Liguori <aliguori@us.ibm.com>
|
|
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
|
|
+ *
|
|
+ * Based on linux/drivers/video/q40fb.c
|
|
+ *
|
|
+ * This file is subject to the terms and conditions of the GNU General Public
|
|
+ * License. See the file COPYING in the main directory of this archive for
|
|
+ * more details.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * TODO:
|
|
+ *
|
|
+ * Switch to grant tables when they become capable of dealing with the
|
|
+ * frame buffer.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/fb.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/io/fbif.h>
|
|
+#include <xen/interface/io/protocols.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <linux/kthread.h>
|
|
+
|
|
+struct xenfb_mapping
|
|
+{
|
|
+ struct list_head link;
|
|
+ struct vm_area_struct *vma;
|
|
+ atomic_t map_refs;
|
|
+ int faults;
|
|
+ struct xenfb_info *info;
|
|
+};
|
|
+
|
|
+struct xenfb_info
|
|
+{
|
|
+ struct task_struct *kthread;
|
|
+ wait_queue_head_t wq;
|
|
+
|
|
+ unsigned char *fb;
|
|
+ struct fb_info *fb_info;
|
|
+ struct timer_list refresh;
|
|
+ int dirty;
|
|
+ int x1, y1, x2, y2; /* dirty rectangle,
|
|
+ protected by dirty_lock */
|
|
+ spinlock_t dirty_lock;
|
|
+ struct mutex mm_lock;
|
|
+ int nr_pages;
|
|
+ struct page **pages;
|
|
+ struct list_head mappings; /* protected by mm_lock */
|
|
+
|
|
+ int irq;
|
|
+ struct xenfb_page *page;
|
|
+ unsigned long *mfns;
|
|
+ int feature_resize; /* Backend has resize feature */
|
|
+ struct xenfb_resize resize;
|
|
+ int resize_dpy;
|
|
+ spinlock_t resize_lock;
|
|
+
|
|
+ struct xenbus_device *xbdev;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * There are three locks:
|
|
+ * spinlock resize_lock protecting resize_dpy and resize
|
|
+ * spinlock dirty_lock protecting the dirty rectangle
|
|
+ * mutex mm_lock protecting mappings.
|
|
+ *
|
|
+ * How the dirty and mapping locks work together
|
|
+ *
|
|
+ * The problem is that dirty rectangle and mappings aren't
|
|
+ * independent: the dirty rectangle must cover all faulted pages in
|
|
+ * mappings. We need to prove that our locking maintains this
|
|
+ * invariant.
|
|
+ *
|
|
+ * There are several kinds of critical regions:
|
|
+ *
|
|
+ * 1. Holding only dirty_lock: xenfb_refresh(). May run in
|
|
+ * interrupts. Extends the dirty rectangle. Trivially preserves
|
|
+ * invariant.
|
|
+ *
|
|
+ * 2. Holding only mm_lock: xenfb_mmap() and xenfb_vm_close(). Touch
|
|
+ * only mappings. The former creates unfaulted pages. Preserves
|
|
+ * invariant. The latter removes pages. Preserves invariant.
|
|
+ *
|
|
+ * 3. Holding both locks: xenfb_vm_nopage(). Extends the dirty
|
|
+ * rectangle and updates mappings consistently. Preserves
|
|
+ * invariant.
|
|
+ *
|
|
+ * 4. The ugliest one: xenfb_update_screen(). Clear the dirty
|
|
+ * rectangle and update mappings consistently.
|
|
+ *
|
|
+ * We can't simply hold both locks, because zap_page_range() cannot
|
|
+ * be called with a spinlock held.
|
|
+ *
|
|
+ * Therefore, we first clear the dirty rectangle with both locks
|
|
+ * held. Then we unlock dirty_lock and update the mappings.
|
|
+ * Critical regions that hold only dirty_lock may interfere with
|
|
+ * that. This can only be region 1: xenfb_refresh(). But that
|
|
+ * just extends the dirty rectangle, which can't harm the
|
|
+ * invariant.
|
|
+ *
|
|
+ * But FIXME: the invariant is too weak. It misses that the fault
|
|
+ * record in mappings must be consistent with the mapping of pages in
|
|
+ * the associated address space! do_no_page() updates the PTE after
|
|
+ * xenfb_vm_nopage() returns, i.e. outside the critical region. This
|
|
+ * allows the following race:
|
|
+ *
|
|
+ * X writes to some address in the Xen frame buffer
|
|
+ * Fault - call do_no_page()
|
|
+ * call xenfb_vm_nopage()
|
|
+ * grab mm_lock
|
|
+ * map->faults++;
|
|
+ * release mm_lock
|
|
+ * return back to do_no_page()
|
|
+ * (preempted, or SMP)
|
|
+ * Xen worker thread runs.
|
|
+ * grab mm_lock
|
|
+ * look at mappings
|
|
+ * find this mapping, zaps its pages (but page not in pte yet)
|
|
+ * clear map->faults
|
|
+ * releases mm_lock
|
|
+ * (back to X process)
|
|
+ * put page in X's pte
|
|
+ *
|
|
+ * Oh well, we wont be updating the writes to this page anytime soon.
|
|
+ */
|
|
+#define MB_ (1024*1024)
|
|
+#define XENFB_DEFAULT_FB_LEN (XENFB_WIDTH * XENFB_HEIGHT * XENFB_DEPTH / 8)
|
|
+
|
|
+enum {KPARAM_MEM, KPARAM_WIDTH, KPARAM_HEIGHT, KPARAM_CNT};
|
|
+static int video[KPARAM_CNT] = {2, XENFB_WIDTH, XENFB_HEIGHT};
|
|
+module_param_array(video, int, NULL, 0);
|
|
+MODULE_PARM_DESC(video,
|
|
+ "Size of video memory in MB and width,height in pixels, default = (2,800,600)");
|
|
+
|
|
+static int xenfb_fps = 20;
|
|
+
|
|
+static int xenfb_remove(struct xenbus_device *);
|
|
+static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
|
|
+static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
|
|
+static void xenfb_disconnect_backend(struct xenfb_info *);
|
|
+
|
|
+static void xenfb_send_event(struct xenfb_info *info,
|
|
+ union xenfb_out_event *event)
|
|
+{
|
|
+ __u32 prod;
|
|
+
|
|
+ prod = info->page->out_prod;
|
|
+ /* caller ensures !xenfb_queue_full() */
|
|
+ mb(); /* ensure ring space available */
|
|
+ XENFB_OUT_RING_REF(info->page, prod) = *event;
|
|
+ wmb(); /* ensure ring contents visible */
|
|
+ info->page->out_prod = prod + 1;
|
|
+
|
|
+ notify_remote_via_irq(info->irq);
|
|
+}
|
|
+
|
|
+static void xenfb_do_update(struct xenfb_info *info,
|
|
+ int x, int y, int w, int h)
|
|
+{
|
|
+ union xenfb_out_event event;
|
|
+
|
|
+ memset(&event, 0, sizeof(event));
|
|
+ event.type = XENFB_TYPE_UPDATE;
|
|
+ event.update.x = x;
|
|
+ event.update.y = y;
|
|
+ event.update.width = w;
|
|
+ event.update.height = h;
|
|
+
|
|
+ /* caller ensures !xenfb_queue_full() */
|
|
+ xenfb_send_event(info, &event);
|
|
+}
|
|
+
|
|
+static void xenfb_do_resize(struct xenfb_info *info)
|
|
+{
|
|
+ union xenfb_out_event event;
|
|
+
|
|
+ memset(&event, 0, sizeof(event));
|
|
+ event.resize = info->resize;
|
|
+
|
|
+ /* caller ensures !xenfb_queue_full() */
|
|
+ xenfb_send_event(info, &event);
|
|
+}
|
|
+
|
|
+static int xenfb_queue_full(struct xenfb_info *info)
|
|
+{
|
|
+ __u32 cons, prod;
|
|
+
|
|
+ prod = info->page->out_prod;
|
|
+ cons = info->page->out_cons;
|
|
+ return prod - cons == XENFB_OUT_RING_LEN;
|
|
+}
|
|
+
|
|
+static void xenfb_update_screen(struct xenfb_info *info)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int y1, y2, x1, x2;
|
|
+ struct xenfb_mapping *map;
|
|
+
|
|
+ if (xenfb_queue_full(info))
|
|
+ return;
|
|
+
|
|
+ mutex_lock(&info->mm_lock);
|
|
+
|
|
+ spin_lock_irqsave(&info->dirty_lock, flags);
|
|
+ if (info->dirty){
|
|
+ info->dirty = 0;
|
|
+ y1 = info->y1;
|
|
+ y2 = info->y2;
|
|
+ x1 = info->x1;
|
|
+ x2 = info->x2;
|
|
+ info->x1 = info->y1 = INT_MAX;
|
|
+ info->x2 = info->y2 = 0;
|
|
+ } else {
|
|
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
|
|
+ mutex_unlock(&info->mm_lock);
|
|
+ return;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
|
|
+
|
|
+ list_for_each_entry(map, &info->mappings, link) {
|
|
+ if (!map->faults)
|
|
+ continue;
|
|
+ zap_page_range(map->vma, map->vma->vm_start,
|
|
+ map->vma->vm_end - map->vma->vm_start, NULL);
|
|
+ map->faults = 0;
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&info->mm_lock);
|
|
+
|
|
+ if (x2 < x1 || y2 < y1) {
|
|
+ printk("xenfb_update_screen bogus rect %d %d %d %d\n",
|
|
+ x1, x2, y1, y2);
|
|
+ WARN_ON(1);
|
|
+ }
|
|
+ xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1);
|
|
+}
|
|
+
|
|
+static void xenfb_handle_resize_dpy(struct xenfb_info *info)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->resize_lock, flags);
|
|
+ if (info->resize_dpy) {
|
|
+ if (!xenfb_queue_full(info)) {
|
|
+ info->resize_dpy = 0;
|
|
+ xenfb_do_resize(info);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&info->resize_lock, flags);
|
|
+}
|
|
+
|
|
+static int xenfb_thread(void *data)
|
|
+{
|
|
+ struct xenfb_info *info = data;
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ xenfb_handle_resize_dpy(info);
|
|
+ xenfb_update_screen(info);
|
|
+ wait_event_interruptible(info->wq,
|
|
+ kthread_should_stop() || info->dirty);
|
|
+ try_to_freeze();
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenfb_setcolreg(unsigned regno, unsigned red, unsigned green,
|
|
+ unsigned blue, unsigned transp,
|
|
+ struct fb_info *info)
|
|
+{
|
|
+ u32 v;
|
|
+
|
|
+ if (regno > info->cmap.len)
|
|
+ return 1;
|
|
+
|
|
+ red >>= (16 - info->var.red.length);
|
|
+ green >>= (16 - info->var.green.length);
|
|
+ blue >>= (16 - info->var.blue.length);
|
|
+
|
|
+ v = (red << info->var.red.offset) |
|
|
+ (green << info->var.green.offset) |
|
|
+ (blue << info->var.blue.offset);
|
|
+
|
|
+ /* FIXME is this sane? check against xxxfb_setcolreg()! */
|
|
+ switch (info->var.bits_per_pixel) {
|
|
+ case 16:
|
|
+ case 24:
|
|
+ case 32:
|
|
+ ((u32 *)info->pseudo_palette)[regno] = v;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xenfb_timer(unsigned long data)
|
|
+{
|
|
+ struct xenfb_info *info = (struct xenfb_info *)data;
|
|
+ wake_up(&info->wq);
|
|
+}
|
|
+
|
|
+static void __xenfb_refresh(struct xenfb_info *info,
|
|
+ int x1, int y1, int w, int h)
|
|
+{
|
|
+ int y2, x2;
|
|
+
|
|
+ y2 = y1 + h;
|
|
+ x2 = x1 + w;
|
|
+
|
|
+ if (info->y1 > y1)
|
|
+ info->y1 = y1;
|
|
+ if (info->y2 < y2)
|
|
+ info->y2 = y2;
|
|
+ if (info->x1 > x1)
|
|
+ info->x1 = x1;
|
|
+ if (info->x2 < x2)
|
|
+ info->x2 = x2;
|
|
+ info->dirty = 1;
|
|
+
|
|
+ if (timer_pending(&info->refresh))
|
|
+ return;
|
|
+
|
|
+ mod_timer(&info->refresh, jiffies + HZ/xenfb_fps);
|
|
+}
|
|
+
|
|
+static void xenfb_refresh(struct xenfb_info *info,
|
|
+ int x1, int y1, int w, int h)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->dirty_lock, flags);
|
|
+ __xenfb_refresh(info, x1, y1, w, h);
|
|
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
|
|
+}
|
|
+
|
|
+static void xenfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
|
|
+{
|
|
+ struct xenfb_info *info = p->par;
|
|
+
|
|
+ cfb_fillrect(p, rect);
|
|
+ xenfb_refresh(info, rect->dx, rect->dy, rect->width, rect->height);
|
|
+}
|
|
+
|
|
+static void xenfb_imageblit(struct fb_info *p, const struct fb_image *image)
|
|
+{
|
|
+ struct xenfb_info *info = p->par;
|
|
+
|
|
+ cfb_imageblit(p, image);
|
|
+ xenfb_refresh(info, image->dx, image->dy, image->width, image->height);
|
|
+}
|
|
+
|
|
+static void xenfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
|
|
+{
|
|
+ struct xenfb_info *info = p->par;
|
|
+
|
|
+ cfb_copyarea(p, area);
|
|
+ xenfb_refresh(info, area->dx, area->dy, area->width, area->height);
|
|
+}
|
|
+
|
|
+static void xenfb_vm_open(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct xenfb_mapping *map = vma->vm_private_data;
|
|
+ atomic_inc(&map->map_refs);
|
|
+}
|
|
+
|
|
+static void xenfb_vm_close(struct vm_area_struct *vma)
|
|
+{
|
|
+ struct xenfb_mapping *map = vma->vm_private_data;
|
|
+ struct xenfb_info *info = map->info;
|
|
+
|
|
+ mutex_lock(&info->mm_lock);
|
|
+ if (atomic_dec_and_test(&map->map_refs)) {
|
|
+ list_del(&map->link);
|
|
+ kfree(map);
|
|
+ }
|
|
+ mutex_unlock(&info->mm_lock);
|
|
+}
|
|
+
|
|
+static struct page *xenfb_vm_nopage(struct vm_area_struct *vma,
|
|
+ unsigned long vaddr, int *type)
|
|
+{
|
|
+ struct xenfb_mapping *map = vma->vm_private_data;
|
|
+ struct xenfb_info *info = map->info;
|
|
+ int pgnr = (vaddr - vma->vm_start) >> PAGE_SHIFT;
|
|
+ unsigned long flags;
|
|
+ struct page *page;
|
|
+ int y1, y2;
|
|
+
|
|
+ if (pgnr >= info->nr_pages)
|
|
+ return NOPAGE_SIGBUS;
|
|
+
|
|
+ mutex_lock(&info->mm_lock);
|
|
+ spin_lock_irqsave(&info->dirty_lock, flags);
|
|
+ page = info->pages[pgnr];
|
|
+ get_page(page);
|
|
+ map->faults++;
|
|
+
|
|
+ y1 = pgnr * PAGE_SIZE / info->fb_info->fix.line_length;
|
|
+ y2 = (pgnr * PAGE_SIZE + PAGE_SIZE - 1) / info->fb_info->fix.line_length;
|
|
+ if (y2 > info->fb_info->var.yres)
|
|
+ y2 = info->fb_info->var.yres;
|
|
+ __xenfb_refresh(info, 0, y1, info->fb_info->var.xres, y2 - y1);
|
|
+ spin_unlock_irqrestore(&info->dirty_lock, flags);
|
|
+ mutex_unlock(&info->mm_lock);
|
|
+
|
|
+ if (type)
|
|
+ *type = VM_FAULT_MINOR;
|
|
+
|
|
+ return page;
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct xenfb_vm_ops = {
|
|
+ .open = xenfb_vm_open,
|
|
+ .close = xenfb_vm_close,
|
|
+ .nopage = xenfb_vm_nopage,
|
|
+};
|
|
+
|
|
+static int xenfb_mmap(struct fb_info *fb_info, struct vm_area_struct *vma)
|
|
+{
|
|
+ struct xenfb_info *info = fb_info->par;
|
|
+ struct xenfb_mapping *map;
|
|
+ int map_pages;
|
|
+
|
|
+ if (!(vma->vm_flags & VM_WRITE))
|
|
+ return -EINVAL;
|
|
+ if (!(vma->vm_flags & VM_SHARED))
|
|
+ return -EINVAL;
|
|
+ if (vma->vm_pgoff != 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ map_pages = (vma->vm_end - vma->vm_start + PAGE_SIZE-1) >> PAGE_SHIFT;
|
|
+ if (map_pages > info->nr_pages)
|
|
+ return -EINVAL;
|
|
+
|
|
+ map = kzalloc(sizeof(*map), GFP_KERNEL);
|
|
+ if (map == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ map->vma = vma;
|
|
+ map->faults = 0;
|
|
+ map->info = info;
|
|
+ atomic_set(&map->map_refs, 1);
|
|
+
|
|
+ mutex_lock(&info->mm_lock);
|
|
+ list_add(&map->link, &info->mappings);
|
|
+ mutex_unlock(&info->mm_lock);
|
|
+
|
|
+ vma->vm_ops = &xenfb_vm_ops;
|
|
+ vma->vm_flags |= (VM_DONTEXPAND | VM_RESERVED);
|
|
+ vma->vm_private_data = map;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+xenfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
|
|
+{
|
|
+ struct xenfb_info *xenfb_info;
|
|
+ int required_mem_len;
|
|
+
|
|
+ xenfb_info = info->par;
|
|
+
|
|
+ if (!xenfb_info->feature_resize) {
|
|
+ if (var->xres == video[KPARAM_WIDTH] &&
|
|
+ var->yres == video[KPARAM_HEIGHT] &&
|
|
+ var->bits_per_pixel == xenfb_info->page->depth) {
|
|
+ return 0;
|
|
+ }
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Can't resize past initial width and height */
|
|
+ if (var->xres > video[KPARAM_WIDTH] || var->yres > video[KPARAM_HEIGHT])
|
|
+ return -EINVAL;
|
|
+
|
|
+ required_mem_len = var->xres * var->yres * (xenfb_info->page->depth / 8);
|
|
+ if (var->bits_per_pixel == xenfb_info->page->depth &&
|
|
+ var->xres <= info->fix.line_length / (XENFB_DEPTH / 8) &&
|
|
+ required_mem_len <= info->fix.smem_len) {
|
|
+ var->xres_virtual = var->xres;
|
|
+ var->yres_virtual = var->yres;
|
|
+ return 0;
|
|
+ }
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static int xenfb_set_par(struct fb_info *info)
|
|
+{
|
|
+ struct xenfb_info *xenfb_info;
|
|
+ unsigned long flags;
|
|
+
|
|
+ xenfb_info = info->par;
|
|
+
|
|
+ spin_lock_irqsave(&xenfb_info->resize_lock, flags);
|
|
+ xenfb_info->resize.type = XENFB_TYPE_RESIZE;
|
|
+ xenfb_info->resize.width = info->var.xres;
|
|
+ xenfb_info->resize.height = info->var.yres;
|
|
+ xenfb_info->resize.stride = info->fix.line_length;
|
|
+ xenfb_info->resize.depth = info->var.bits_per_pixel;
|
|
+ xenfb_info->resize.offset = 0;
|
|
+ xenfb_info->resize_dpy = 1;
|
|
+ spin_unlock_irqrestore(&xenfb_info->resize_lock, flags);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct fb_ops xenfb_fb_ops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .fb_setcolreg = xenfb_setcolreg,
|
|
+ .fb_fillrect = xenfb_fillrect,
|
|
+ .fb_copyarea = xenfb_copyarea,
|
|
+ .fb_imageblit = xenfb_imageblit,
|
|
+ .fb_mmap = xenfb_mmap,
|
|
+ .fb_check_var = xenfb_check_var,
|
|
+ .fb_set_par = xenfb_set_par,
|
|
+};
|
|
+
|
|
+static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
|
|
+ struct pt_regs *regs)
|
|
+{
|
|
+ /*
|
|
+ * No in events recognized, simply ignore them all.
|
|
+ * If you need to recognize some, see xenbkd's input_handler()
|
|
+ * for how to do that.
|
|
+ */
|
|
+ struct xenfb_info *info = dev_id;
|
|
+ struct xenfb_page *page = info->page;
|
|
+
|
|
+ if (page->in_cons != page->in_prod) {
|
|
+ info->page->in_cons = info->page->in_prod;
|
|
+ notify_remote_via_irq(info->irq);
|
|
+ }
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static unsigned long vmalloc_to_mfn(void *address)
|
|
+{
|
|
+ return pfn_to_mfn(vmalloc_to_pfn(address));
|
|
+}
|
|
+
|
|
+static int __devinit xenfb_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ struct xenfb_info *info;
|
|
+ struct fb_info *fb_info;
|
|
+ int fb_size;
|
|
+ int val;
|
|
+ int ret;
|
|
+
|
|
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
|
|
+ if (info == NULL) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /* Limit kernel param videoram amount to what is in xenstore */
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "videoram", "%d", &val) == 1) {
|
|
+ if (val < video[KPARAM_MEM])
|
|
+ video[KPARAM_MEM] = val;
|
|
+ }
|
|
+
|
|
+ /* If requested res does not fit in available memory, use default */
|
|
+ fb_size = video[KPARAM_MEM] * MB_;
|
|
+ if (video[KPARAM_WIDTH] * video[KPARAM_HEIGHT] * XENFB_DEPTH/8 > fb_size) {
|
|
+ video[KPARAM_WIDTH] = XENFB_WIDTH;
|
|
+ video[KPARAM_HEIGHT] = XENFB_HEIGHT;
|
|
+ fb_size = XENFB_DEFAULT_FB_LEN;
|
|
+ }
|
|
+
|
|
+ dev->dev.driver_data = info;
|
|
+ info->xbdev = dev;
|
|
+ info->irq = -1;
|
|
+ info->x1 = info->y1 = INT_MAX;
|
|
+ spin_lock_init(&info->dirty_lock);
|
|
+ spin_lock_init(&info->resize_lock);
|
|
+ mutex_init(&info->mm_lock);
|
|
+ init_waitqueue_head(&info->wq);
|
|
+ init_timer(&info->refresh);
|
|
+ info->refresh.function = xenfb_timer;
|
|
+ info->refresh.data = (unsigned long)info;
|
|
+ INIT_LIST_HEAD(&info->mappings);
|
|
+
|
|
+ info->fb = vmalloc(fb_size);
|
|
+ if (info->fb == NULL)
|
|
+ goto error_nomem;
|
|
+ memset(info->fb, 0, fb_size);
|
|
+
|
|
+ info->nr_pages = (fb_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+
|
|
+ info->pages = kmalloc(sizeof(struct page *) * info->nr_pages,
|
|
+ GFP_KERNEL);
|
|
+ if (info->pages == NULL)
|
|
+ goto error_nomem;
|
|
+
|
|
+ info->mfns = vmalloc(sizeof(unsigned long) * info->nr_pages);
|
|
+ if (!info->mfns)
|
|
+ goto error_nomem;
|
|
+
|
|
+ /* set up shared page */
|
|
+ info->page = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
|
|
+ if (!info->page)
|
|
+ goto error_nomem;
|
|
+
|
|
+ fb_info = framebuffer_alloc(sizeof(u32) * 256, NULL);
|
|
+ /* see fishy hackery below */
|
|
+ if (fb_info == NULL)
|
|
+ goto error_nomem;
|
|
+
|
|
+ /* FIXME fishy hackery */
|
|
+ fb_info->pseudo_palette = fb_info->par;
|
|
+ fb_info->par = info;
|
|
+ /* /FIXME */
|
|
+ fb_info->screen_base = info->fb;
|
|
+
|
|
+ fb_info->fbops = &xenfb_fb_ops;
|
|
+ fb_info->var.xres_virtual = fb_info->var.xres = video[KPARAM_WIDTH];
|
|
+ fb_info->var.yres_virtual = fb_info->var.yres = video[KPARAM_HEIGHT];
|
|
+ fb_info->var.bits_per_pixel = XENFB_DEPTH;
|
|
+
|
|
+ fb_info->var.red = (struct fb_bitfield){16, 8, 0};
|
|
+ fb_info->var.green = (struct fb_bitfield){8, 8, 0};
|
|
+ fb_info->var.blue = (struct fb_bitfield){0, 8, 0};
|
|
+
|
|
+ fb_info->var.activate = FB_ACTIVATE_NOW;
|
|
+ fb_info->var.height = -1;
|
|
+ fb_info->var.width = -1;
|
|
+ fb_info->var.vmode = FB_VMODE_NONINTERLACED;
|
|
+
|
|
+ fb_info->fix.visual = FB_VISUAL_TRUECOLOR;
|
|
+ fb_info->fix.line_length = fb_info->var.xres * (XENFB_DEPTH / 8);
|
|
+ fb_info->fix.smem_start = 0;
|
|
+ fb_info->fix.smem_len = fb_size;
|
|
+ strcpy(fb_info->fix.id, "xen");
|
|
+ fb_info->fix.type = FB_TYPE_PACKED_PIXELS;
|
|
+ fb_info->fix.accel = FB_ACCEL_NONE;
|
|
+
|
|
+ fb_info->flags = FBINFO_FLAG_DEFAULT;
|
|
+
|
|
+ ret = fb_alloc_cmap(&fb_info->cmap, 256, 0);
|
|
+ if (ret < 0) {
|
|
+ framebuffer_release(fb_info);
|
|
+ xenbus_dev_fatal(dev, ret, "fb_alloc_cmap");
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
+ xenfb_init_shared_page(info, fb_info);
|
|
+
|
|
+ ret = register_framebuffer(fb_info);
|
|
+ if (ret) {
|
|
+ fb_dealloc_cmap(&info->fb_info->cmap);
|
|
+ framebuffer_release(fb_info);
|
|
+ xenbus_dev_fatal(dev, ret, "register_framebuffer");
|
|
+ goto error;
|
|
+ }
|
|
+ info->fb_info = fb_info;
|
|
+
|
|
+ ret = xenfb_connect_backend(dev, info);
|
|
+ if (ret < 0)
|
|
+ goto error;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ error_nomem:
|
|
+ ret = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, ret, "allocating device memory");
|
|
+ error:
|
|
+ xenfb_remove(dev);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenfb_resume(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenfb_info *info = dev->dev.driver_data;
|
|
+
|
|
+ xenfb_disconnect_backend(info);
|
|
+ xenfb_init_shared_page(info, info->fb_info);
|
|
+ return xenfb_connect_backend(dev, info);
|
|
+}
|
|
+
|
|
+static int xenfb_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenfb_info *info = dev->dev.driver_data;
|
|
+
|
|
+ del_timer(&info->refresh);
|
|
+ if (info->kthread)
|
|
+ kthread_stop(info->kthread);
|
|
+ xenfb_disconnect_backend(info);
|
|
+ if (info->fb_info) {
|
|
+ unregister_framebuffer(info->fb_info);
|
|
+ fb_dealloc_cmap(&info->fb_info->cmap);
|
|
+ framebuffer_release(info->fb_info);
|
|
+ }
|
|
+ free_page((unsigned long)info->page);
|
|
+ vfree(info->mfns);
|
|
+ kfree(info->pages);
|
|
+ vfree(info->fb);
|
|
+ kfree(info);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xenfb_init_shared_page(struct xenfb_info *info,
|
|
+ struct fb_info * fb_info)
|
|
+{
|
|
+ int i;
|
|
+ int epd = PAGE_SIZE / sizeof(info->mfns[0]);
|
|
+
|
|
+ for (i = 0; i < info->nr_pages; i++)
|
|
+ info->pages[i] = vmalloc_to_page(info->fb + i * PAGE_SIZE);
|
|
+
|
|
+ for (i = 0; i < info->nr_pages; i++)
|
|
+ info->mfns[i] = vmalloc_to_mfn(info->fb + i * PAGE_SIZE);
|
|
+
|
|
+ for (i = 0; i * epd < info->nr_pages; i++)
|
|
+ info->page->pd[i] = vmalloc_to_mfn(&info->mfns[i * epd]);
|
|
+
|
|
+ info->page->width = fb_info->var.xres;
|
|
+ info->page->height = fb_info->var.yres;
|
|
+ info->page->depth = fb_info->var.bits_per_pixel;
|
|
+ info->page->line_length = fb_info->fix.line_length;
|
|
+ info->page->mem_length = fb_info->fix.smem_len;
|
|
+ info->page->in_cons = info->page->in_prod = 0;
|
|
+ info->page->out_cons = info->page->out_prod = 0;
|
|
+}
|
|
+
|
|
+static int xenfb_connect_backend(struct xenbus_device *dev,
|
|
+ struct xenfb_info *info)
|
|
+{
|
|
+ int ret, irq;
|
|
+ struct xenbus_transaction xbt;
|
|
+
|
|
+ irq = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, xenfb_event_handler, 0, "xenfb", info);
|
|
+ if (irq < 0) {
|
|
+ xenbus_dev_fatal(dev, irq,
|
|
+ "bind_listening_port_to_irqhandler");
|
|
+ return irq;
|
|
+ }
|
|
+
|
|
+ again:
|
|
+ ret = xenbus_transaction_start(&xbt);
|
|
+ if (ret) {
|
|
+ xenbus_dev_fatal(dev, ret, "starting transaction");
|
|
+ goto unbind_irq;
|
|
+ }
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
|
|
+ virt_to_mfn(info->page));
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
|
|
+ irq_to_evtchn_port(irq));
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "protocol", "%s",
|
|
+ XEN_IO_PROTO_ABI_NATIVE);
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "feature-update", "1");
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_transaction_end(xbt, 0);
|
|
+ if (ret) {
|
|
+ if (ret == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, ret, "completing transaction");
|
|
+ goto unbind_irq;
|
|
+ }
|
|
+
|
|
+ info->irq = irq;
|
|
+ xenbus_switch_state(dev, XenbusStateInitialised);
|
|
+ return 0;
|
|
+
|
|
+ error_xenbus:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
|
|
+ unbind_irq:
|
|
+ unbind_from_irqhandler(irq, info);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void xenfb_disconnect_backend(struct xenfb_info *info)
|
|
+{
|
|
+ if (info->irq >= 0)
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = -1;
|
|
+}
|
|
+
|
|
+static void xenfb_backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ struct xenfb_info *info = dev->dev.driver_data;
|
|
+ int val;
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitWait:
|
|
+ InitWait:
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ /*
|
|
+ * Work around xenbus race condition: If backend goes
|
|
+ * through InitWait to Connected fast enough, we can
|
|
+ * get Connected twice here.
|
|
+ */
|
|
+ if (dev->state != XenbusStateConnected)
|
|
+ goto InitWait; /* no InitWait seen yet, fudge it */
|
|
+
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
|
|
+ "feature-resize", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ info->feature_resize = val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
|
+ "request-update", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+
|
|
+ if (val && !info->kthread) {
|
|
+ info->kthread = kthread_run(xenfb_thread, info,
|
|
+ "xenfb thread");
|
|
+ if (IS_ERR(info->kthread)) {
|
|
+ info->kthread = NULL;
|
|
+ xenbus_dev_fatal(dev, PTR_ERR(info->kthread),
|
|
+ "xenfb_thread");
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ // FIXME is this safe in any dev->state?
|
|
+ xenbus_frontend_closed(dev);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id xenfb_ids[] = {
|
|
+ { "vfb" },
|
|
+ { "" }
|
|
+};
|
|
+MODULE_ALIAS("xen:vfb");
|
|
+
|
|
+static struct xenbus_driver xenfb_driver = {
|
|
+ .name = "vfb",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = xenfb_ids,
|
|
+ .probe = xenfb_probe,
|
|
+ .remove = xenfb_remove,
|
|
+ .resume = xenfb_resume,
|
|
+ .otherend_changed = xenfb_backend_changed,
|
|
+};
|
|
+
|
|
+static int __init xenfb_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ /* Nothing to do if running in dom0. */
|
|
+ if (is_initial_xendomain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ return xenbus_register_frontend(&xenfb_driver);
|
|
+}
|
|
+
|
|
+static void __exit xenfb_cleanup(void)
|
|
+{
|
|
+ return xenbus_unregister_driver(&xenfb_driver);
|
|
+}
|
|
+
|
|
+module_init(xenfb_init);
|
|
+module_exit(xenfb_cleanup);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/fbfront/xenkbd.c 2008-04-02 12:34:02.000000000 +0200
|
|
@@ -0,0 +1,354 @@
|
|
+/*
|
|
+ * linux/drivers/input/keyboard/xenkbd.c -- Xen para-virtual input device
|
|
+ *
|
|
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
|
|
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
|
|
+ *
|
|
+ * Based on linux/drivers/input/mouse/sermouse.c
|
|
+ *
|
|
+ * This file is subject to the terms and conditions of the GNU General Public
|
|
+ * License. See the file COPYING in the main directory of this archive for
|
|
+ * more details.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * TODO:
|
|
+ *
|
|
+ * Switch to grant tables together with xenfb.c.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/input.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/io/fbif.h>
|
|
+#include <xen/interface/io/kbdif.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+struct xenkbd_info
|
|
+{
|
|
+ struct input_dev *kbd;
|
|
+ struct input_dev *ptr;
|
|
+ struct xenkbd_page *page;
|
|
+ int irq;
|
|
+ struct xenbus_device *xbdev;
|
|
+ char phys[32];
|
|
+};
|
|
+
|
|
+static int xenkbd_remove(struct xenbus_device *);
|
|
+static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
|
|
+static void xenkbd_disconnect_backend(struct xenkbd_info *);
|
|
+
|
|
+/*
|
|
+ * Note: if you need to send out events, see xenfb_do_update() for how
|
|
+ * to do that.
|
|
+ */
|
|
+
|
|
+static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ struct xenkbd_info *info = dev_id;
|
|
+ struct xenkbd_page *page = info->page;
|
|
+ __u32 cons, prod;
|
|
+
|
|
+ prod = page->in_prod;
|
|
+ if (prod == page->in_cons)
|
|
+ return IRQ_HANDLED;
|
|
+ rmb(); /* ensure we see ring contents up to prod */
|
|
+ for (cons = page->in_cons; cons != prod; cons++) {
|
|
+ union xenkbd_in_event *event;
|
|
+ struct input_dev *dev;
|
|
+ event = &XENKBD_IN_RING_REF(page, cons);
|
|
+
|
|
+ dev = info->ptr;
|
|
+ switch (event->type) {
|
|
+ case XENKBD_TYPE_MOTION:
|
|
+ if (event->motion.rel_z)
|
|
+ input_report_rel(dev, REL_WHEEL,
|
|
+ -event->motion.rel_z);
|
|
+ input_report_rel(dev, REL_X, event->motion.rel_x);
|
|
+ input_report_rel(dev, REL_Y, event->motion.rel_y);
|
|
+ break;
|
|
+ case XENKBD_TYPE_KEY:
|
|
+ dev = NULL;
|
|
+ if (test_bit(event->key.keycode, info->kbd->keybit))
|
|
+ dev = info->kbd;
|
|
+ if (test_bit(event->key.keycode, info->ptr->keybit))
|
|
+ dev = info->ptr;
|
|
+ if (dev)
|
|
+ input_report_key(dev, event->key.keycode,
|
|
+ event->key.pressed);
|
|
+ else
|
|
+ printk("xenkbd: unhandled keycode 0x%x\n",
|
|
+ event->key.keycode);
|
|
+ break;
|
|
+ case XENKBD_TYPE_POS:
|
|
+ if (event->pos.rel_z)
|
|
+ input_report_rel(dev, REL_WHEEL,
|
|
+ -event->pos.rel_z);
|
|
+ input_report_abs(dev, ABS_X, event->pos.abs_x);
|
|
+ input_report_abs(dev, ABS_Y, event->pos.abs_y);
|
|
+ break;
|
|
+ }
|
|
+ if (dev)
|
|
+ input_sync(dev);
|
|
+ }
|
|
+ mb(); /* ensure we got ring contents */
|
|
+ page->in_cons = cons;
|
|
+ notify_remote_via_irq(info->irq);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+int __devinit xenkbd_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int ret, i;
|
|
+ struct xenkbd_info *info;
|
|
+ struct input_dev *kbd, *ptr;
|
|
+
|
|
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
|
|
+ if (!info) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ dev->dev.driver_data = info;
|
|
+ info->xbdev = dev;
|
|
+ snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
|
|
+
|
|
+ info->page = (void *)__get_free_page(GFP_KERNEL);
|
|
+ if (!info->page)
|
|
+ goto error_nomem;
|
|
+ info->page->in_cons = info->page->in_prod = 0;
|
|
+ info->page->out_cons = info->page->out_prod = 0;
|
|
+
|
|
+ /* keyboard */
|
|
+ kbd = input_allocate_device();
|
|
+ if (!kbd)
|
|
+ goto error_nomem;
|
|
+ kbd->name = "Xen Virtual Keyboard";
|
|
+ kbd->phys = info->phys;
|
|
+ kbd->id.bustype = BUS_PCI;
|
|
+ kbd->id.vendor = 0x5853;
|
|
+ kbd->id.product = 0xffff;
|
|
+ kbd->evbit[0] = BIT(EV_KEY);
|
|
+ for (i = KEY_ESC; i < KEY_UNKNOWN; i++)
|
|
+ set_bit(i, kbd->keybit);
|
|
+ for (i = KEY_OK; i < KEY_MAX; i++)
|
|
+ set_bit(i, kbd->keybit);
|
|
+
|
|
+ ret = input_register_device(kbd);
|
|
+ if (ret) {
|
|
+ input_free_device(kbd);
|
|
+ xenbus_dev_fatal(dev, ret, "input_register_device(kbd)");
|
|
+ goto error;
|
|
+ }
|
|
+ info->kbd = kbd;
|
|
+
|
|
+ /* pointing device */
|
|
+ ptr = input_allocate_device();
|
|
+ if (!ptr)
|
|
+ goto error_nomem;
|
|
+ ptr->name = "Xen Virtual Pointer";
|
|
+ ptr->phys = info->phys;
|
|
+ ptr->id.bustype = BUS_PCI;
|
|
+ ptr->id.vendor = 0x5853;
|
|
+ ptr->id.product = 0xfffe;
|
|
+ ptr->evbit[0] = BIT(EV_KEY) | BIT(EV_REL) | BIT(EV_ABS);
|
|
+ for (i = BTN_LEFT; i <= BTN_TASK; i++)
|
|
+ set_bit(i, ptr->keybit);
|
|
+ ptr->relbit[0] = BIT(REL_X) | BIT(REL_Y) | BIT(REL_WHEEL);
|
|
+ input_set_abs_params(ptr, ABS_X, 0, XENFB_WIDTH, 0, 0);
|
|
+ input_set_abs_params(ptr, ABS_Y, 0, XENFB_HEIGHT, 0, 0);
|
|
+
|
|
+ ret = input_register_device(ptr);
|
|
+ if (ret) {
|
|
+ input_free_device(ptr);
|
|
+ xenbus_dev_fatal(dev, ret, "input_register_device(ptr)");
|
|
+ goto error;
|
|
+ }
|
|
+ info->ptr = ptr;
|
|
+
|
|
+ ret = xenkbd_connect_backend(dev, info);
|
|
+ if (ret < 0)
|
|
+ goto error;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ error_nomem:
|
|
+ ret = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, ret, "allocating device memory");
|
|
+ error:
|
|
+ xenkbd_remove(dev);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenkbd_resume(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenkbd_info *info = dev->dev.driver_data;
|
|
+
|
|
+ xenkbd_disconnect_backend(info);
|
|
+ info->page->in_cons = info->page->in_prod = 0;
|
|
+ info->page->out_cons = info->page->out_prod = 0;
|
|
+ return xenkbd_connect_backend(dev, info);
|
|
+}
|
|
+
|
|
+static int xenkbd_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenkbd_info *info = dev->dev.driver_data;
|
|
+
|
|
+ xenkbd_disconnect_backend(info);
|
|
+ input_unregister_device(info->kbd);
|
|
+ input_unregister_device(info->ptr);
|
|
+ free_page((unsigned long)info->page);
|
|
+ kfree(info);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenkbd_connect_backend(struct xenbus_device *dev,
|
|
+ struct xenkbd_info *info)
|
|
+{
|
|
+ int ret;
|
|
+ struct xenbus_transaction xbt;
|
|
+
|
|
+ ret = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, input_handler, 0, "xenkbd", info);
|
|
+ if (ret < 0) {
|
|
+ xenbus_dev_fatal(dev, ret,
|
|
+ "bind_listening_port_to_irqhandler");
|
|
+ return ret;
|
|
+ }
|
|
+ info->irq = ret;
|
|
+
|
|
+ again:
|
|
+ ret = xenbus_transaction_start(&xbt);
|
|
+ if (ret) {
|
|
+ xenbus_dev_fatal(dev, ret, "starting transaction");
|
|
+ return ret;
|
|
+ }
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "page-ref", "%lu",
|
|
+ virt_to_mfn(info->page));
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
|
|
+ irq_to_evtchn_port(info->irq));
|
|
+ if (ret)
|
|
+ goto error_xenbus;
|
|
+ ret = xenbus_transaction_end(xbt, 0);
|
|
+ if (ret) {
|
|
+ if (ret == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, ret, "completing transaction");
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateInitialised);
|
|
+ return 0;
|
|
+
|
|
+ error_xenbus:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, ret, "writing xenstore");
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void xenkbd_disconnect_backend(struct xenkbd_info *info)
|
|
+{
|
|
+ if (info->irq >= 0)
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = -1;
|
|
+}
|
|
+
|
|
+static void xenkbd_backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ struct xenkbd_info *info = dev->dev.driver_data;
|
|
+ int ret, val;
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitWait:
|
|
+ InitWait:
|
|
+ ret = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
|
+ "feature-abs-pointer", "%d", &val);
|
|
+ if (ret < 0)
|
|
+ val = 0;
|
|
+ if (val) {
|
|
+ ret = xenbus_printf(XBT_NIL, info->xbdev->nodename,
|
|
+ "request-abs-pointer", "1");
|
|
+ if (ret)
|
|
+ ; /* FIXME */
|
|
+ }
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ /*
|
|
+ * Work around xenbus race condition: If backend goes
|
|
+ * through InitWait to Connected fast enough, we can
|
|
+ * get Connected twice here.
|
|
+ */
|
|
+ if (dev->state != XenbusStateConnected)
|
|
+ goto InitWait; /* no InitWait seen yet, fudge it */
|
|
+
|
|
+ /* Set input abs params to match backend screen res */
|
|
+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
|
+ "width", "%d", &val) > 0 )
|
|
+ input_set_abs_params(info->ptr, ABS_X, 0, val, 0, 0);
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, info->xbdev->otherend,
|
|
+ "height", "%d", &val) > 0 )
|
|
+ input_set_abs_params(info->ptr, ABS_Y, 0, val, 0, 0);
|
|
+
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ xenbus_frontend_closed(dev);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id xenkbd_ids[] = {
|
|
+ { "vkbd" },
|
|
+ { "" }
|
|
+};
|
|
+MODULE_ALIAS("xen:vkbd");
|
|
+
|
|
+static struct xenbus_driver xenkbd_driver = {
|
|
+ .name = "vkbd",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = xenkbd_ids,
|
|
+ .probe = xenkbd_probe,
|
|
+ .remove = xenkbd_remove,
|
|
+ .resume = xenkbd_resume,
|
|
+ .otherend_changed = xenkbd_backend_changed,
|
|
+};
|
|
+
|
|
+static int __init xenkbd_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ /* Nothing to do if running in dom0. */
|
|
+ if (is_initial_xendomain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ return xenbus_register_frontend(&xenkbd_driver);
|
|
+}
|
|
+
|
|
+static void __exit xenkbd_cleanup(void)
|
|
+{
|
|
+ return xenbus_unregister_driver(&xenkbd_driver);
|
|
+}
|
|
+
|
|
+module_init(xenkbd_init);
|
|
+module_exit(xenkbd_cleanup);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/gntdev/Makefile 2008-01-07 13:19:18.000000000 +0100
|
|
@@ -0,0 +1 @@
|
|
+obj-$(CONFIG_XEN_GRANT_DEV) := gntdev.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/gntdev/gntdev.c 2011-01-03 12:43:21.000000000 +0100
|
|
@@ -0,0 +1,1038 @@
|
|
+/******************************************************************************
|
|
+ * gntdev.c
|
|
+ *
|
|
+ * Device for accessing (in user-space) pages that have been granted by other
|
|
+ * domains.
|
|
+ *
|
|
+ * Copyright (c) 2006-2007, D G Murray.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+ */
|
|
+
|
|
+#include <asm/atomic.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/device.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mman.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/io.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/driver_util.h>
|
|
+
|
|
+#include <linux/types.h>
|
|
+#include <xen/public/gntdev.h>
|
|
+
|
|
+
|
|
+#define DRIVER_AUTHOR "Derek G. Murray <Derek.Murray@cl.cam.ac.uk>"
|
|
+#define DRIVER_DESC "User-space granted page access driver"
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
+MODULE_AUTHOR(DRIVER_AUTHOR);
|
|
+MODULE_DESCRIPTION(DRIVER_DESC);
|
|
+
|
|
+#define MAX_GRANTS_LIMIT 1024
|
|
+#define DEFAULT_MAX_GRANTS 128
|
|
+
|
|
+/* A slot can be in one of three states:
|
|
+ *
|
|
+ * 0. GNTDEV_SLOT_INVALID:
|
|
+ * This slot is not associated with a grant reference, and is therefore free
|
|
+ * to be overwritten by a new grant reference.
|
|
+ *
|
|
+ * 1. GNTDEV_SLOT_NOT_YET_MAPPED:
|
|
+ * This slot is associated with a grant reference (via the
|
|
+ * IOCTL_GNTDEV_MAP_GRANT_REF ioctl), but it has not yet been mmap()-ed.
|
|
+ *
|
|
+ * 2. GNTDEV_SLOT_MAPPED:
|
|
+ * This slot is associated with a grant reference, and has been mmap()-ed.
|
|
+ */
|
|
+typedef enum gntdev_slot_state {
|
|
+ GNTDEV_SLOT_INVALID = 0,
|
|
+ GNTDEV_SLOT_NOT_YET_MAPPED,
|
|
+ GNTDEV_SLOT_MAPPED
|
|
+} gntdev_slot_state_t;
|
|
+
|
|
+#define GNTDEV_INVALID_HANDLE -1
|
|
+#define GNTDEV_FREE_LIST_INVALID -1
|
|
+/* Each opened instance of gntdev is associated with a list of grants,
|
|
+ * represented by an array of elements of the following type,
|
|
+ * gntdev_grant_info_t.
|
|
+ */
|
|
+typedef struct gntdev_grant_info {
|
|
+ gntdev_slot_state_t state;
|
|
+ union {
|
|
+ uint32_t free_list_index;
|
|
+ struct {
|
|
+ domid_t domid;
|
|
+ grant_ref_t ref;
|
|
+ grant_handle_t kernel_handle;
|
|
+ grant_handle_t user_handle;
|
|
+ uint64_t dev_bus_addr;
|
|
+ } valid;
|
|
+ } u;
|
|
+} gntdev_grant_info_t;
|
|
+
|
|
+/* Private data structure, which is stored in the file pointer for files
|
|
+ * associated with this device.
|
|
+ */
|
|
+typedef struct gntdev_file_private_data {
|
|
+
|
|
+ /* Array of grant information. */
|
|
+ gntdev_grant_info_t *grants;
|
|
+ uint32_t grants_size;
|
|
+
|
|
+ /* Read/write semaphore used to protect the grants array. */
|
|
+ struct rw_semaphore grants_sem;
|
|
+
|
|
+ /* An array of indices of free slots in the grants array.
|
|
+ * N.B. An entry in this list may temporarily have the value
|
|
+ * GNTDEV_FREE_LIST_INVALID if the corresponding slot has been removed
|
|
+ * from the list by the contiguous allocator, but the list has not yet
|
|
+ * been compressed. However, this is not visible across invocations of
|
|
+ * the device.
|
|
+ */
|
|
+ int32_t *free_list;
|
|
+
|
|
+ /* The number of free slots in the grants array. */
|
|
+ uint32_t free_list_size;
|
|
+
|
|
+ /* Read/write semaphore used to protect the free list. */
|
|
+ struct rw_semaphore free_list_sem;
|
|
+
|
|
+ /* Index of the next slot after the most recent contiguous allocation,
|
|
+ * for use in a next-fit allocator.
|
|
+ */
|
|
+ uint32_t next_fit_index;
|
|
+
|
|
+ /* Used to map grants into the kernel, before mapping them into user
|
|
+ * space.
|
|
+ */
|
|
+ struct page **foreign_pages;
|
|
+
|
|
+} gntdev_file_private_data_t;
|
|
+
|
|
+/* Module lifecycle operations. */
|
|
+static int __init gntdev_init(void);
|
|
+static void __exit gntdev_exit(void);
|
|
+
|
|
+module_init(gntdev_init);
|
|
+module_exit(gntdev_exit);
|
|
+
|
|
+/* File operations. */
|
|
+static int gntdev_open(struct inode *inode, struct file *flip);
|
|
+static int gntdev_release(struct inode *inode, struct file *flip);
|
|
+static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma);
|
|
+static long gntdev_ioctl(struct file *flip,
|
|
+ unsigned int cmd, unsigned long arg);
|
|
+
|
|
+static const struct file_operations gntdev_fops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .open = gntdev_open,
|
|
+ .release = gntdev_release,
|
|
+ .mmap = gntdev_mmap,
|
|
+ .unlocked_ioctl = gntdev_ioctl
|
|
+};
|
|
+
|
|
+/* VM operations. */
|
|
+static void gntdev_vma_close(struct vm_area_struct *vma);
|
|
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
|
|
+ pte_t *ptep, int is_fullmm);
|
|
+
|
|
+static struct vm_operations_struct gntdev_vmops = {
|
|
+ .close = gntdev_vma_close,
|
|
+ .zap_pte = gntdev_clear_pte
|
|
+};
|
|
+
|
|
+/* Global variables. */
|
|
+
|
|
+/* The driver major number, for use when unregistering the driver. */
|
|
+static int gntdev_major;
|
|
+
|
|
+#define GNTDEV_NAME "gntdev"
|
|
+
|
|
+/* Memory mapping functions
|
|
+ * ------------------------
|
|
+ *
|
|
+ * Every granted page is mapped into both kernel and user space, and the two
|
|
+ * following functions return the respective virtual addresses of these pages.
|
|
+ *
|
|
+ * When shadow paging is disabled, the granted page is mapped directly into
|
|
+ * user space; when it is enabled, it is mapped into the kernel and remapped
|
|
+ * into user space using vm_insert_page() (see gntdev_mmap(), below).
|
|
+ */
|
|
+
|
|
+/* Returns the virtual address (in user space) of the @page_index'th page
|
|
+ * in the given VM area.
|
|
+ */
|
|
+static inline unsigned long get_user_vaddr (struct vm_area_struct *vma,
|
|
+ int page_index)
|
|
+{
|
|
+ return (unsigned long) vma->vm_start + (page_index << PAGE_SHIFT);
|
|
+}
|
|
+
|
|
+/* Returns the virtual address (in kernel space) of the @slot_index'th page
|
|
+ * mapped by the gntdev instance that owns the given private data struct.
|
|
+ */
|
|
+static inline unsigned long get_kernel_vaddr (gntdev_file_private_data_t *priv,
|
|
+ int slot_index)
|
|
+{
|
|
+ unsigned long pfn;
|
|
+ void *kaddr;
|
|
+ pfn = page_to_pfn(priv->foreign_pages[slot_index]);
|
|
+ kaddr = pfn_to_kaddr(pfn);
|
|
+ return (unsigned long) kaddr;
|
|
+}
|
|
+
|
|
+/* Helper functions. */
|
|
+
|
|
+/* Adds information about a grant reference to the list of grants in the file's
|
|
+ * private data structure. Returns non-zero on failure. On success, sets the
|
|
+ * value of *offset to the offset that should be mmap()-ed in order to map the
|
|
+ * grant reference.
|
|
+ */
|
|
+static int add_grant_reference(gntdev_file_private_data_t *private_data,
|
|
+ struct ioctl_gntdev_grant_ref *op,
|
|
+ uint64_t *offset)
|
|
+{
|
|
+ uint32_t slot_index;
|
|
+
|
|
+ slot_index = private_data->free_list[--private_data->free_list_size];
|
|
+ private_data->free_list[private_data->free_list_size]
|
|
+ = GNTDEV_FREE_LIST_INVALID;
|
|
+
|
|
+ /* Copy the grant information into file's private data. */
|
|
+ private_data->grants[slot_index].state = GNTDEV_SLOT_NOT_YET_MAPPED;
|
|
+ private_data->grants[slot_index].u.valid.domid = op->domid;
|
|
+ private_data->grants[slot_index].u.valid.ref = op->ref;
|
|
+
|
|
+ /* The offset is calculated as the index of the chosen entry in the
|
|
+ * file's private data's array of grant information. This is then
|
|
+ * shifted to give an offset into the virtual "file address space".
|
|
+ */
|
|
+ *offset = slot_index << PAGE_SHIFT;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Adds the @count grant references to the contiguous range in the slot array
|
|
+ * beginning at @first_slot. It is assumed that @first_slot was returned by a
|
|
+ * previous invocation of find_contiguous_free_range(), during the same
|
|
+ * invocation of the driver.
|
|
+ */
|
|
+static int add_grant_references(gntdev_file_private_data_t *private_data,
|
|
+ uint32_t count,
|
|
+ struct ioctl_gntdev_grant_ref *ops,
|
|
+ uint32_t first_slot)
|
|
+{
|
|
+ uint32_t i;
|
|
+
|
|
+ for (i = 0; i < count; ++i) {
|
|
+
|
|
+ /* First, mark the slot's entry in the free list as invalid. */
|
|
+ uint32_t free_list_index =
|
|
+ private_data->grants[first_slot+i].u.free_list_index;
|
|
+ private_data->free_list[free_list_index] =
|
|
+ GNTDEV_FREE_LIST_INVALID;
|
|
+
|
|
+ /* Now, update the slot. */
|
|
+ private_data->grants[first_slot+i].state =
|
|
+ GNTDEV_SLOT_NOT_YET_MAPPED;
|
|
+ private_data->grants[first_slot+i].u.valid.domid =
|
|
+ ops[i].domid;
|
|
+ private_data->grants[first_slot+i].u.valid.ref = ops[i].ref;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Scans through the free list for @flip, removing entries that are marked as
|
|
+ * GNTDEV_SLOT_INVALID. This will reduce the recorded size of the free list to
|
|
+ * the number of valid entries.
|
|
+ */
|
|
+static void compress_free_list(gntdev_file_private_data_t *private_data)
|
|
+{
|
|
+ uint32_t i, j = 0, old_size;
|
|
+
|
|
+ old_size = private_data->free_list_size;
|
|
+ for (i = 0; i < old_size; ++i) {
|
|
+ if (private_data->free_list[i] != GNTDEV_FREE_LIST_INVALID) {
|
|
+ if (i > j) {
|
|
+ int32_t slot_index;
|
|
+
|
|
+ slot_index = private_data->free_list[i];
|
|
+ private_data->free_list[j] = slot_index;
|
|
+ private_data->grants[slot_index].u
|
|
+ .free_list_index = j;
|
|
+ private_data->free_list[i]
|
|
+ = GNTDEV_FREE_LIST_INVALID;
|
|
+ }
|
|
+ ++j;
|
|
+ } else {
|
|
+ --private_data->free_list_size;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Searches the grant array in the private data of @flip for a range of
|
|
+ * @num_slots contiguous slots in the GNTDEV_SLOT_INVALID state.
|
|
+ *
|
|
+ * Returns the index of the first slot if a range is found, otherwise -ENOMEM.
|
|
+ */
|
|
+static int find_contiguous_free_range(gntdev_file_private_data_t *private_data,
|
|
+ uint32_t num_slots)
|
|
+{
|
|
+ uint32_t i, start_index = private_data->next_fit_index;
|
|
+ uint32_t range_start = 0, range_length;
|
|
+
|
|
+ /* First search from the start_index to the end of the array. */
|
|
+ range_length = 0;
|
|
+ for (i = start_index; i < private_data->grants_size; ++i) {
|
|
+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
|
|
+ if (range_length == 0) {
|
|
+ range_start = i;
|
|
+ }
|
|
+ ++range_length;
|
|
+ if (range_length == num_slots) {
|
|
+ return range_start;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Now search from the start of the array to the start_index. */
|
|
+ range_length = 0;
|
|
+ for (i = 0; i < start_index; ++i) {
|
|
+ if (private_data->grants[i].state == GNTDEV_SLOT_INVALID) {
|
|
+ if (range_length == 0) {
|
|
+ range_start = i;
|
|
+ }
|
|
+ ++range_length;
|
|
+ if (range_length == num_slots) {
|
|
+ return range_start;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static int init_private_data(gntdev_file_private_data_t *priv,
|
|
+ uint32_t max_grants)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* Allocate space for the kernel-mapping of granted pages. */
|
|
+ priv->foreign_pages =
|
|
+ alloc_empty_pages_and_pagevec(max_grants);
|
|
+ if (!priv->foreign_pages)
|
|
+ goto nomem_out;
|
|
+
|
|
+ /* Allocate the grant list and free-list. */
|
|
+ priv->grants = kmalloc(max_grants * sizeof(gntdev_grant_info_t),
|
|
+ GFP_KERNEL);
|
|
+ if (!priv->grants)
|
|
+ goto nomem_out2;
|
|
+ priv->free_list = kmalloc(max_grants * sizeof(int32_t), GFP_KERNEL);
|
|
+ if (!priv->free_list)
|
|
+ goto nomem_out3;
|
|
+
|
|
+ /* Initialise the free-list, which contains all slots at first. */
|
|
+ for (i = 0; i < max_grants; ++i) {
|
|
+ priv->free_list[max_grants - i - 1] = i;
|
|
+ priv->grants[i].state = GNTDEV_SLOT_INVALID;
|
|
+ priv->grants[i].u.free_list_index = max_grants - i - 1;
|
|
+ }
|
|
+ priv->grants_size = max_grants;
|
|
+ priv->free_list_size = max_grants;
|
|
+ priv->next_fit_index = 0;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+nomem_out3:
|
|
+ kfree(priv->grants);
|
|
+nomem_out2:
|
|
+ free_empty_pages_and_pagevec(priv->foreign_pages, max_grants);
|
|
+nomem_out:
|
|
+ return -ENOMEM;
|
|
+
|
|
+}
|
|
+
|
|
+/* Interface functions. */
|
|
+
|
|
+/* Initialises the driver. Called when the module is loaded. */
|
|
+static int __init gntdev_init(void)
|
|
+{
|
|
+ struct class *class;
|
|
+ struct class_device *device;
|
|
+
|
|
+ if (!is_running_on_xen()) {
|
|
+ printk(KERN_ERR "You must be running Xen to use gntdev\n");
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops);
|
|
+ if (gntdev_major < 0)
|
|
+ {
|
|
+ printk(KERN_ERR "Could not register gntdev device\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /* Note that if the sysfs code fails, we will still initialise the
|
|
+ * device, and output the major number so that the device can be
|
|
+ * created manually using mknod.
|
|
+ */
|
|
+ if ((class = get_xen_class()) == NULL) {
|
|
+ printk(KERN_ERR "Error setting up xen_class\n");
|
|
+ printk(KERN_ERR "gntdev created with major number = %d\n",
|
|
+ gntdev_major);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ device = class_device_create(class, NULL, MKDEV(gntdev_major, 0),
|
|
+ NULL, GNTDEV_NAME);
|
|
+ if (IS_ERR(device)) {
|
|
+ printk(KERN_ERR "Error creating gntdev device in xen_class\n");
|
|
+ printk(KERN_ERR "gntdev created with major number = %d\n",
|
|
+ gntdev_major);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Cleans up and unregisters the driver. Called when the driver is unloaded.
|
|
+ */
|
|
+static void __exit gntdev_exit(void)
|
|
+{
|
|
+ struct class *class;
|
|
+ if ((class = get_xen_class()) != NULL)
|
|
+ class_device_destroy(class, MKDEV(gntdev_major, 0));
|
|
+ unregister_chrdev(gntdev_major, GNTDEV_NAME);
|
|
+}
|
|
+
|
|
+/* Called when the device is opened. */
|
|
+static int gntdev_open(struct inode *inode, struct file *flip)
|
|
+{
|
|
+ gntdev_file_private_data_t *private_data;
|
|
+
|
|
+ try_module_get(THIS_MODULE);
|
|
+
|
|
+ /* Allocate space for the per-instance private data. */
|
|
+ private_data = kmalloc(sizeof(*private_data), GFP_KERNEL);
|
|
+ if (!private_data)
|
|
+ goto nomem_out;
|
|
+
|
|
+ /* These will be lazily initialised by init_private_data. */
|
|
+ private_data->grants = NULL;
|
|
+ private_data->free_list = NULL;
|
|
+ private_data->foreign_pages = NULL;
|
|
+
|
|
+ init_rwsem(&private_data->grants_sem);
|
|
+ init_rwsem(&private_data->free_list_sem);
|
|
+
|
|
+ flip->private_data = private_data;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+nomem_out:
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+/* Called when the device is closed.
|
|
+ */
|
|
+static int gntdev_release(struct inode *inode, struct file *flip)
|
|
+{
|
|
+ if (flip->private_data) {
|
|
+ gntdev_file_private_data_t *private_data =
|
|
+ (gntdev_file_private_data_t *) flip->private_data;
|
|
+ if (private_data->foreign_pages)
|
|
+ free_empty_pages_and_pagevec
|
|
+ (private_data->foreign_pages,
|
|
+ private_data->grants_size);
|
|
+ if (private_data->grants)
|
|
+ kfree(private_data->grants);
|
|
+ if (private_data->free_list)
|
|
+ kfree(private_data->free_list);
|
|
+ kfree(private_data);
|
|
+ }
|
|
+ module_put(THIS_MODULE);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Called when an attempt is made to mmap() the device. The private data from
|
|
+ * @flip contains the list of grant references that can be mapped. The vm_pgoff
|
|
+ * field of @vma contains the index into that list that refers to the grant
|
|
+ * reference that will be mapped. Only mappings that are a multiple of
|
|
+ * PAGE_SIZE are handled.
|
|
+ */
|
|
+static int gntdev_mmap (struct file *flip, struct vm_area_struct *vma)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ unsigned long slot_index = vma->vm_pgoff;
|
|
+ unsigned long kernel_vaddr, user_vaddr;
|
|
+ uint32_t size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
+ uint64_t ptep;
|
|
+ int ret, exit_ret;
|
|
+ int flags;
|
|
+ int i;
|
|
+ struct page *page;
|
|
+ gntdev_file_private_data_t *private_data = flip->private_data;
|
|
+
|
|
+ if (unlikely(!private_data)) {
|
|
+ printk(KERN_ERR "File's private data is NULL.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Test to make sure that the grants array has been initialised. */
|
|
+ down_read(&private_data->grants_sem);
|
|
+ if (unlikely(!private_data->grants)) {
|
|
+ up_read(&private_data->grants_sem);
|
|
+ printk(KERN_ERR "Attempted to mmap before ioctl.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ up_read(&private_data->grants_sem);
|
|
+
|
|
+ if (unlikely((size <= 0) ||
|
|
+ (size + slot_index) > private_data->grants_size)) {
|
|
+ printk(KERN_ERR "Invalid number of pages or offset"
|
|
+ "(num_pages = %d, first_slot = %ld).\n",
|
|
+ size, slot_index);
|
|
+ return -ENXIO;
|
|
+ }
|
|
+
|
|
+ if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) {
|
|
+ printk(KERN_ERR "Writable mappings must be shared.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Slots must be in the NOT_YET_MAPPED state. */
|
|
+ down_write(&private_data->grants_sem);
|
|
+ for (i = 0; i < size; ++i) {
|
|
+ if (private_data->grants[slot_index + i].state !=
|
|
+ GNTDEV_SLOT_NOT_YET_MAPPED) {
|
|
+ printk(KERN_ERR "Slot (index = %ld) is in the wrong "
|
|
+ "state (%d).\n", slot_index + i,
|
|
+ private_data->grants[slot_index + i].state);
|
|
+ up_write(&private_data->grants_sem);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Install the hook for unmapping. */
|
|
+ vma->vm_ops = &gntdev_vmops;
|
|
+
|
|
+ /* The VM area contains pages from another VM. */
|
|
+ vma->vm_flags |= VM_FOREIGN;
|
|
+ vma->vm_private_data = kzalloc(size * sizeof(struct page *),
|
|
+ GFP_KERNEL);
|
|
+ if (vma->vm_private_data == NULL) {
|
|
+ printk(KERN_ERR "Couldn't allocate mapping structure for VM "
|
|
+ "area.\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ /* This flag prevents Bad PTE errors when the memory is unmapped. */
|
|
+ vma->vm_flags |= VM_RESERVED;
|
|
+
|
|
+ /* This flag prevents this VM area being copied on a fork(). A better
|
|
+ * behaviour might be to explicitly carry out the appropriate mappings
|
|
+ * on fork(), but I don't know if there's a hook for this.
|
|
+ */
|
|
+ vma->vm_flags |= VM_DONTCOPY;
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+ /* This flag ensures that the page tables are not unpinned before the
|
|
+ * VM area is unmapped. Therefore Xen still recognises the PTE as
|
|
+ * belonging to an L1 pagetable, and the grant unmap operation will
|
|
+ * succeed, even if the process does not exit cleanly.
|
|
+ */
|
|
+ vma->vm_mm->context.has_foreign_mappings = 1;
|
|
+#endif
|
|
+
|
|
+ exit_ret = -ENOMEM;
|
|
+ for (i = 0; i < size; ++i) {
|
|
+
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (!(vma->vm_flags & VM_WRITE))
|
|
+ flags |= GNTMAP_readonly;
|
|
+
|
|
+ kernel_vaddr = get_kernel_vaddr(private_data, slot_index + i);
|
|
+ user_vaddr = get_user_vaddr(vma, i);
|
|
+ page = private_data->foreign_pages[slot_index + i];
|
|
+
|
|
+ gnttab_set_map_op(&op, kernel_vaddr, flags,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.ref,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.domid);
|
|
+
|
|
+ /* Carry out the mapping of the grant reference. */
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
|
|
+ &op, 1);
|
|
+ BUG_ON(ret);
|
|
+ if (op.status != GNTST_okay) {
|
|
+ if (op.status != GNTST_eagain)
|
|
+ printk(KERN_ERR "Error mapping the grant reference "
|
|
+ "into the kernel (%d). domid = %d; ref = %d\n",
|
|
+ op.status,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.domid,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.ref);
|
|
+ else
|
|
+ /* Propagate eagain instead of trying to fix it up */
|
|
+ exit_ret = -EAGAIN;
|
|
+ goto undo_map_out;
|
|
+ }
|
|
+
|
|
+ /* Store a reference to the page that will be mapped into user
|
|
+ * space.
|
|
+ */
|
|
+ ((struct page **) vma->vm_private_data)[i] = page;
|
|
+
|
|
+ /* Mark mapped page as reserved. */
|
|
+ SetPageReserved(page);
|
|
+
|
|
+ /* Record the grant handle, for use in the unmap operation. */
|
|
+ private_data->grants[slot_index+i].u.valid.kernel_handle =
|
|
+ op.handle;
|
|
+ private_data->grants[slot_index+i].u.valid.dev_bus_addr =
|
|
+ op.dev_bus_addr;
|
|
+
|
|
+ private_data->grants[slot_index+i].state = GNTDEV_SLOT_MAPPED;
|
|
+ private_data->grants[slot_index+i].u.valid.user_handle =
|
|
+ GNTDEV_INVALID_HANDLE;
|
|
+
|
|
+ /* Now perform the mapping to user space. */
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+
|
|
+ /* NOT USING SHADOW PAGE TABLES. */
|
|
+ /* In this case, we map the grant(s) straight into user
|
|
+ * space.
|
|
+ */
|
|
+
|
|
+ /* Get the machine address of the PTE for the user
|
|
+ * page.
|
|
+ */
|
|
+ if ((ret = create_lookup_pte_addr(vma->vm_mm,
|
|
+ vma->vm_start
|
|
+ + (i << PAGE_SHIFT),
|
|
+ &ptep)))
|
|
+ {
|
|
+ printk(KERN_ERR "Error obtaining PTE pointer "
|
|
+ "(%d).\n", ret);
|
|
+ goto undo_map_out;
|
|
+ }
|
|
+
|
|
+ /* Configure the map operation. */
|
|
+
|
|
+ /* The reference is to be used by host CPUs. */
|
|
+ flags = GNTMAP_host_map;
|
|
+
|
|
+ /* Specifies a user space mapping. */
|
|
+ flags |= GNTMAP_application_map;
|
|
+
|
|
+ /* The map request contains the machine address of the
|
|
+ * PTE to update.
|
|
+ */
|
|
+ flags |= GNTMAP_contains_pte;
|
|
+
|
|
+ if (!(vma->vm_flags & VM_WRITE))
|
|
+ flags |= GNTMAP_readonly;
|
|
+
|
|
+ gnttab_set_map_op(&op, ptep, flags,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.ref,
|
|
+ private_data->grants[slot_index+i]
|
|
+ .u.valid.domid);
|
|
+
|
|
+ /* Carry out the mapping of the grant reference. */
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
|
|
+ &op, 1);
|
|
+ BUG_ON(ret);
|
|
+ if (op.status != GNTST_okay) {
|
|
+ printk(KERN_ERR "Error mapping the grant "
|
|
+ "reference into user space (%d). domid "
|
|
+ "= %d; ref = %d\n", op.status,
|
|
+ private_data->grants[slot_index+i].u
|
|
+ .valid.domid,
|
|
+ private_data->grants[slot_index+i].u
|
|
+ .valid.ref);
|
|
+ /* This should never happen after we've mapped into
|
|
+ * the kernel space. */
|
|
+ BUG_ON(op.status == GNTST_eagain);
|
|
+ goto undo_map_out;
|
|
+ }
|
|
+
|
|
+ /* Record the grant handle, for use in the unmap
|
|
+ * operation.
|
|
+ */
|
|
+ private_data->grants[slot_index+i].u.
|
|
+ valid.user_handle = op.handle;
|
|
+
|
|
+ /* Update p2m structure with the new mapping. */
|
|
+ set_phys_to_machine(__pa(kernel_vaddr) >> PAGE_SHIFT,
|
|
+ FOREIGN_FRAME(private_data->
|
|
+ grants[slot_index+i]
|
|
+ .u.valid.dev_bus_addr
|
|
+ >> PAGE_SHIFT));
|
|
+ } else {
|
|
+ /* USING SHADOW PAGE TABLES. */
|
|
+ /* In this case, we simply insert the page into the VM
|
|
+ * area. */
|
|
+ ret = vm_insert_page(vma, user_vaddr, page);
|
|
+ }
|
|
+
|
|
+ }
|
|
+ exit_ret = 0;
|
|
+
|
|
+ up_write(&private_data->grants_sem);
|
|
+ return exit_ret;
|
|
+
|
|
+undo_map_out:
|
|
+ /* If we have a mapping failure, the unmapping will be taken care of
|
|
+ * by do_mmap_pgoff(), which will eventually call gntdev_clear_pte().
|
|
+ * All we need to do here is free the vma_private_data.
|
|
+ */
|
|
+ kfree(vma->vm_private_data);
|
|
+
|
|
+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
|
|
+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
|
|
+ * unmap the grants. Therefore, we smuggle a reference to the file's
|
|
+ * private data in the VM area's private data pointer.
|
|
+ */
|
|
+ vma->vm_private_data = private_data;
|
|
+
|
|
+ up_write(&private_data->grants_sem);
|
|
+
|
|
+ return exit_ret;
|
|
+}
|
|
+
|
|
+static pte_t gntdev_clear_pte(struct vm_area_struct *vma, unsigned long addr,
|
|
+ pte_t *ptep, int is_fullmm)
|
|
+{
|
|
+ int slot_index, ret;
|
|
+ pte_t copy;
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+ gntdev_file_private_data_t *private_data;
|
|
+
|
|
+ /* THIS IS VERY UNPLEASANT: do_mmap_pgoff() will set the vma->vm_file
|
|
+ * to NULL on failure. However, we need this in gntdev_clear_pte() to
|
|
+ * unmap the grants. Therefore, we smuggle a reference to the file's
|
|
+ * private data in the VM area's private data pointer.
|
|
+ */
|
|
+ if (vma->vm_file) {
|
|
+ private_data = (gntdev_file_private_data_t *)
|
|
+ vma->vm_file->private_data;
|
|
+ } else if (vma->vm_private_data) {
|
|
+ private_data = (gntdev_file_private_data_t *)
|
|
+ vma->vm_private_data;
|
|
+ } else {
|
|
+ private_data = NULL; /* gcc warning */
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ /* Copy the existing value of the PTE for returning. */
|
|
+ copy = *ptep;
|
|
+
|
|
+ /* Calculate the grant relating to this PTE. */
|
|
+ slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
|
|
+
|
|
+ /* Only unmap grants if the slot has been mapped. This could be being
|
|
+ * called from a failing mmap().
|
|
+ */
|
|
+ if (private_data->grants[slot_index].state == GNTDEV_SLOT_MAPPED) {
|
|
+
|
|
+ /* First, we clear the user space mapping, if it has been made.
|
|
+ */
|
|
+ if (private_data->grants[slot_index].u.valid.user_handle !=
|
|
+ GNTDEV_INVALID_HANDLE &&
|
|
+ !xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* NOT USING SHADOW PAGE TABLES. */
|
|
+ gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
|
|
+ GNTMAP_contains_pte,
|
|
+ private_data->grants[slot_index]
|
|
+ .u.valid.user_handle);
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, &op, 1);
|
|
+ BUG_ON(ret);
|
|
+ if (op.status != GNTST_okay)
|
|
+ printk("User unmap grant status = %d\n",
|
|
+ op.status);
|
|
+ } else {
|
|
+ /* USING SHADOW PAGE TABLES. */
|
|
+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
+ }
|
|
+
|
|
+ /* Finally, we unmap the grant from kernel space. */
|
|
+ gnttab_set_unmap_op(&op,
|
|
+ get_kernel_vaddr(private_data, slot_index),
|
|
+ GNTMAP_host_map,
|
|
+ private_data->grants[slot_index].u.valid
|
|
+ .kernel_handle);
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ &op, 1);
|
|
+ BUG_ON(ret);
|
|
+ if (op.status != GNTST_okay)
|
|
+ printk("Kernel unmap grant status = %d\n", op.status);
|
|
+
|
|
+
|
|
+ /* Return slot to the not-yet-mapped state, so that it may be
|
|
+ * mapped again, or removed by a subsequent ioctl.
|
|
+ */
|
|
+ private_data->grants[slot_index].state =
|
|
+ GNTDEV_SLOT_NOT_YET_MAPPED;
|
|
+
|
|
+ /* Invalidate the physical to machine mapping for this page. */
|
|
+ set_phys_to_machine(
|
|
+ page_to_pfn(private_data->foreign_pages[slot_index]),
|
|
+ INVALID_P2M_ENTRY);
|
|
+
|
|
+ } else {
|
|
+ pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
+ }
|
|
+
|
|
+ return copy;
|
|
+}
|
|
+
|
|
+/* "Destructor" for a VM area.
|
|
+ */
|
|
+static void gntdev_vma_close(struct vm_area_struct *vma) {
|
|
+ if (vma->vm_private_data) {
|
|
+ kfree(vma->vm_private_data);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Called when an ioctl is made on the device.
|
|
+ */
|
|
+static long gntdev_ioctl(struct file *flip,
|
|
+ unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ int rc = 0;
|
|
+ gntdev_file_private_data_t *private_data =
|
|
+ (gntdev_file_private_data_t *) flip->private_data;
|
|
+
|
|
+ /* On the first invocation, we will lazily initialise the grant array
|
|
+ * and free-list.
|
|
+ */
|
|
+ if (unlikely(!private_data->grants)
|
|
+ && likely(cmd != IOCTL_GNTDEV_SET_MAX_GRANTS)) {
|
|
+ down_write(&private_data->grants_sem);
|
|
+
|
|
+ if (unlikely(private_data->grants)) {
|
|
+ up_write(&private_data->grants_sem);
|
|
+ goto private_data_initialised;
|
|
+ }
|
|
+
|
|
+ /* Just use the default. Setting to a non-default is handled
|
|
+ * in the ioctl switch.
|
|
+ */
|
|
+ rc = init_private_data(private_data, DEFAULT_MAX_GRANTS);
|
|
+
|
|
+ up_write(&private_data->grants_sem);
|
|
+
|
|
+ if (rc) {
|
|
+ printk (KERN_ERR "Initialising gntdev private data "
|
|
+ "failed.\n");
|
|
+ return rc;
|
|
+ }
|
|
+ }
|
|
+
|
|
+private_data_initialised:
|
|
+ switch (cmd) {
|
|
+ case IOCTL_GNTDEV_MAP_GRANT_REF:
|
|
+ {
|
|
+ struct ioctl_gntdev_map_grant_ref op;
|
|
+ struct ioctl_gntdev_grant_ref *refs = NULL;
|
|
+
|
|
+ if (copy_from_user(&op, (void __user *)arg, sizeof(op)))
|
|
+ return -EFAULT;
|
|
+ if (unlikely(op.count <= 0))
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (op.count > 1 && op.count <= private_data->grants_size) {
|
|
+ struct ioctl_gntdev_grant_ref *u;
|
|
+
|
|
+ refs = kmalloc(op.count * sizeof(*refs), GFP_KERNEL);
|
|
+ if (!refs)
|
|
+ return -ENOMEM;
|
|
+ u = ((struct ioctl_gntdev_map_grant_ref *)arg)->refs;
|
|
+ if (copy_from_user(refs, (void __user *)u,
|
|
+ sizeof(*refs) * op.count)) {
|
|
+ kfree(refs);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ down_write(&private_data->grants_sem);
|
|
+ down_write(&private_data->free_list_sem);
|
|
+
|
|
+ if (unlikely(op.count > private_data->free_list_size)) {
|
|
+ rc = -ENOMEM;
|
|
+ goto map_out;
|
|
+ }
|
|
+
|
|
+ if (op.count == 1) {
|
|
+ if ((rc = add_grant_reference(private_data, op.refs,
|
|
+ &op.index)) < 0) {
|
|
+ printk(KERN_ERR "Adding grant reference "
|
|
+ "failed (%d).\n", rc);
|
|
+ goto map_out;
|
|
+ }
|
|
+ } else {
|
|
+ if ((rc = find_contiguous_free_range(private_data,
|
|
+ op.count)) < 0) {
|
|
+ printk(KERN_ERR "Finding contiguous range "
|
|
+ "failed (%d).\n", rc);
|
|
+ goto map_out;
|
|
+ }
|
|
+ op.index = rc << PAGE_SHIFT;
|
|
+ if ((rc = add_grant_references(private_data, op.count,
|
|
+ refs, rc))) {
|
|
+ printk(KERN_ERR "Adding grant references "
|
|
+ "failed (%d).\n", rc);
|
|
+ goto map_out;
|
|
+ }
|
|
+ compress_free_list(private_data);
|
|
+ }
|
|
+
|
|
+ map_out:
|
|
+ up_write(&private_data->free_list_sem);
|
|
+ up_write(&private_data->grants_sem);
|
|
+
|
|
+ kfree(refs);
|
|
+
|
|
+ if (!rc && copy_to_user((void __user *)arg, &op, sizeof(op)))
|
|
+ rc = -EFAULT;
|
|
+ return rc;
|
|
+ }
|
|
+ case IOCTL_GNTDEV_UNMAP_GRANT_REF:
|
|
+ {
|
|
+ struct ioctl_gntdev_unmap_grant_ref op;
|
|
+ uint32_t i, start_index;
|
|
+
|
|
+ if (copy_from_user(&op, (void __user *)arg, sizeof(op)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ start_index = op.index >> PAGE_SHIFT;
|
|
+ if (start_index + op.count > private_data->grants_size)
|
|
+ return -EINVAL;
|
|
+
|
|
+ down_write(&private_data->grants_sem);
|
|
+
|
|
+ /* First, check that all pages are in the NOT_YET_MAPPED
|
|
+ * state.
|
|
+ */
|
|
+ for (i = 0; i < op.count; ++i) {
|
|
+ if (unlikely
|
|
+ (private_data->grants[start_index + i].state
|
|
+ != GNTDEV_SLOT_NOT_YET_MAPPED)) {
|
|
+ if (private_data->grants[start_index + i].state
|
|
+ == GNTDEV_SLOT_INVALID) {
|
|
+ printk(KERN_ERR
|
|
+ "Tried to remove an invalid "
|
|
+ "grant at offset 0x%x.",
|
|
+ (start_index + i)
|
|
+ << PAGE_SHIFT);
|
|
+ rc = -EINVAL;
|
|
+ } else {
|
|
+ printk(KERN_ERR
|
|
+ "Tried to remove a grant which "
|
|
+ "is currently mmap()-ed at "
|
|
+ "offset 0x%x.",
|
|
+ (start_index + i)
|
|
+ << PAGE_SHIFT);
|
|
+ rc = -EBUSY;
|
|
+ }
|
|
+ goto unmap_out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ down_write(&private_data->free_list_sem);
|
|
+
|
|
+ /* Unmap pages and add them to the free list.
|
|
+ */
|
|
+ for (i = 0; i < op.count; ++i) {
|
|
+ private_data->grants[start_index+i].state =
|
|
+ GNTDEV_SLOT_INVALID;
|
|
+ private_data->grants[start_index+i].u.free_list_index =
|
|
+ private_data->free_list_size;
|
|
+ private_data->free_list[private_data->free_list_size] =
|
|
+ start_index + i;
|
|
+ ++private_data->free_list_size;
|
|
+ }
|
|
+
|
|
+ up_write(&private_data->free_list_sem);
|
|
+ unmap_out:
|
|
+ up_write(&private_data->grants_sem);
|
|
+ return rc;
|
|
+ }
|
|
+ case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
|
|
+ {
|
|
+ struct ioctl_gntdev_get_offset_for_vaddr op;
|
|
+ struct vm_area_struct *vma;
|
|
+ unsigned long vaddr;
|
|
+
|
|
+ if (copy_from_user(&op, (void __user *)arg, sizeof(op)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ vaddr = (unsigned long)op.vaddr;
|
|
+
|
|
+ down_read(¤t->mm->mmap_sem);
|
|
+ vma = find_vma(current->mm, vaddr);
|
|
+ if (!vma || vma->vm_ops != &gntdev_vmops) {
|
|
+ rc = -EFAULT;
|
|
+ goto get_offset_out;
|
|
+ }
|
|
+ if (vma->vm_start != vaddr) {
|
|
+ printk(KERN_ERR "The vaddr specified in an "
|
|
+ "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at "
|
|
+ "the start of the VM area. vma->vm_start = "
|
|
+ "%#lx; vaddr = %#lx\n",
|
|
+ vma->vm_start, vaddr);
|
|
+ rc = -EFAULT;
|
|
+ goto get_offset_out;
|
|
+ }
|
|
+ op.offset = vma->vm_pgoff << PAGE_SHIFT;
|
|
+ op.count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
|
|
+ get_offset_out:
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+ if (!rc && copy_to_user((void __user *)arg, &op, sizeof(op)))
|
|
+ rc = -EFAULT;
|
|
+ return rc;
|
|
+ }
|
|
+ case IOCTL_GNTDEV_SET_MAX_GRANTS:
|
|
+ {
|
|
+ struct ioctl_gntdev_set_max_grants op;
|
|
+
|
|
+ if (copy_from_user(&op, (void __user *)arg, sizeof(op)))
|
|
+ return -EFAULT;
|
|
+ if (op.count > MAX_GRANTS_LIMIT)
|
|
+ return -EINVAL;
|
|
+
|
|
+ down_write(&private_data->grants_sem);
|
|
+ if (unlikely(private_data->grants))
|
|
+ rc = -EBUSY;
|
|
+ else
|
|
+ rc = init_private_data(private_data, op.count);
|
|
+ up_write(&private_data->grants_sem);
|
|
+ return rc;
|
|
+ }
|
|
+ default:
|
|
+ return -ENOIOCTLCMD;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/Makefile 2007-07-12 08:54:23.000000000 +0200
|
|
@@ -0,0 +1,5 @@
|
|
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
|
|
+obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o
|
|
+
|
|
+netbk-y := netback.o xenbus.o interface.o accel.o
|
|
+netloop-y := loopback.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/accel.c 2008-01-07 13:19:18.000000000 +0100
|
|
@@ -0,0 +1,269 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/netback/accel.c
|
|
+ *
|
|
+ * Interface between backend virtual network device and accelerated plugin.
|
|
+ *
|
|
+ * Copyright (C) 2007 Solarflare Communications, Inc
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <linux/mutex.h>
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#if 0
|
|
+#undef DPRINTK
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ printk("netback/accel (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * A list of available netback accelerator plugin modules (each list
|
|
+ * entry is of type struct netback_accelerator)
|
|
+ */
|
|
+static struct list_head accelerators_list;
|
|
+/* Lock used to protect access to accelerators_list */
|
|
+DEFINE_MUTEX(accelerators_mutex);
|
|
+
|
|
+/*
|
|
+ * Compare a backend to an accelerator, and decide if they are
|
|
+ * compatible (i.e. if the accelerator should be used by the
|
|
+ * backend)
|
|
+ */
|
|
+static int match_accelerator(struct xenbus_device *xendev,
|
|
+ struct backend_info *be,
|
|
+ struct netback_accelerator *accelerator)
|
|
+{
|
|
+ int rc = 0;
|
|
+ char *eth_name = xenbus_read(XBT_NIL, xendev->nodename, "accel", NULL);
|
|
+
|
|
+ if (IS_ERR(eth_name)) {
|
|
+ /* Probably means not present */
|
|
+ DPRINTK("%s: no match due to xenbus_read accel error %d\n",
|
|
+ __FUNCTION__, PTR_ERR(eth_name));
|
|
+ return 0;
|
|
+ } else {
|
|
+ if (!strcmp(eth_name, accelerator->eth_name))
|
|
+ rc = 1;
|
|
+ kfree(eth_name);
|
|
+ return rc;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void do_probe(struct backend_info *be,
|
|
+ struct netback_accelerator *accelerator,
|
|
+ struct xenbus_device *xendev)
|
|
+{
|
|
+ be->accelerator = accelerator;
|
|
+ atomic_inc(&be->accelerator->use_count);
|
|
+ if (be->accelerator->hooks->probe(xendev) != 0) {
|
|
+ atomic_dec(&be->accelerator->use_count);
|
|
+ module_put(be->accelerator->hooks->owner);
|
|
+ be->accelerator = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Notify suitable backends that a new accelerator is available and
|
|
+ * connected. This will also notify the accelerator plugin module
|
|
+ * that it is being used for a device through the probe hook.
|
|
+ */
|
|
+static int netback_accelerator_probe_backend(struct device *dev, void *arg)
|
|
+{
|
|
+ struct netback_accelerator *accelerator =
|
|
+ (struct netback_accelerator *)arg;
|
|
+ struct xenbus_device *xendev = to_xenbus_device(dev);
|
|
+
|
|
+ if (!strcmp("vif", xendev->devicetype)) {
|
|
+ struct backend_info *be = xendev->dev.driver_data;
|
|
+
|
|
+ if (match_accelerator(xendev, be, accelerator) &&
|
|
+ try_module_get(accelerator->hooks->owner)) {
|
|
+ do_probe(be, accelerator, xendev);
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Notify suitable backends that an accelerator is unavailable.
|
|
+ */
|
|
+static int netback_accelerator_remove_backend(struct device *dev, void *arg)
|
|
+{
|
|
+ struct xenbus_device *xendev = to_xenbus_device(dev);
|
|
+ struct netback_accelerator *accelerator =
|
|
+ (struct netback_accelerator *)arg;
|
|
+
|
|
+ if (!strcmp("vif", xendev->devicetype)) {
|
|
+ struct backend_info *be = xendev->dev.driver_data;
|
|
+
|
|
+ if (be->accelerator == accelerator) {
|
|
+ be->accelerator->hooks->remove(xendev);
|
|
+ atomic_dec(&be->accelerator->use_count);
|
|
+ module_put(be->accelerator->hooks->owner);
|
|
+ be->accelerator = NULL;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/*
|
|
+ * Entry point for an netback accelerator plugin module. Called to
|
|
+ * advertise its presence, and connect to any suitable backends.
|
|
+ */
|
|
+int netback_connect_accelerator(unsigned version, int id, const char *eth_name,
|
|
+ struct netback_accel_hooks *hooks)
|
|
+{
|
|
+ struct netback_accelerator *new_accelerator;
|
|
+ unsigned eth_name_len;
|
|
+
|
|
+ if (version != NETBACK_ACCEL_VERSION) {
|
|
+ if (version > NETBACK_ACCEL_VERSION) {
|
|
+ /* Caller has higher version number, leave it
|
|
+ up to them to decide whether to continue.
|
|
+ They can recall with a lower number if
|
|
+ they're happy to be compatible with us */
|
|
+ return NETBACK_ACCEL_VERSION;
|
|
+ } else {
|
|
+ /* We have a more recent version than caller.
|
|
+ Currently reject, but may in future be able
|
|
+ to be backwardly compatible */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ new_accelerator =
|
|
+ kmalloc(sizeof(struct netback_accelerator), GFP_KERNEL);
|
|
+ if (!new_accelerator) {
|
|
+ DPRINTK("%s: failed to allocate memory for accelerator\n",
|
|
+ __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ new_accelerator->id = id;
|
|
+
|
|
+ eth_name_len = strlen(eth_name)+1;
|
|
+ new_accelerator->eth_name = kmalloc(eth_name_len, GFP_KERNEL);
|
|
+ if (!new_accelerator->eth_name) {
|
|
+ DPRINTK("%s: failed to allocate memory for eth_name string\n",
|
|
+ __FUNCTION__);
|
|
+ kfree(new_accelerator);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ strlcpy(new_accelerator->eth_name, eth_name, eth_name_len);
|
|
+
|
|
+ new_accelerator->hooks = hooks;
|
|
+
|
|
+ atomic_set(&new_accelerator->use_count, 0);
|
|
+
|
|
+ mutex_lock(&accelerators_mutex);
|
|
+ list_add(&new_accelerator->link, &accelerators_list);
|
|
+
|
|
+ /* tell existing backends about new plugin */
|
|
+ xenbus_for_each_backend(new_accelerator,
|
|
+ netback_accelerator_probe_backend);
|
|
+
|
|
+ mutex_unlock(&accelerators_mutex);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(netback_connect_accelerator);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Disconnect an accelerator plugin module that has previously been
|
|
+ * connected.
|
|
+ */
|
|
+void netback_disconnect_accelerator(int id, const char *eth_name)
|
|
+{
|
|
+ struct netback_accelerator *accelerator, *next;
|
|
+
|
|
+ mutex_lock(&accelerators_mutex);
|
|
+ list_for_each_entry_safe(accelerator, next, &accelerators_list, link) {
|
|
+ if (!strcmp(eth_name, accelerator->eth_name)) {
|
|
+ xenbus_for_each_backend
|
|
+ (accelerator, netback_accelerator_remove_backend);
|
|
+ BUG_ON(atomic_read(&accelerator->use_count) != 0);
|
|
+ list_del(&accelerator->link);
|
|
+ kfree(accelerator->eth_name);
|
|
+ kfree(accelerator);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ mutex_unlock(&accelerators_mutex);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(netback_disconnect_accelerator);
|
|
+
|
|
+
|
|
+void netback_probe_accelerators(struct backend_info *be,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ struct netback_accelerator *accelerator;
|
|
+
|
|
+ /*
|
|
+ * Check list of accelerators to see if any is suitable, and
|
|
+ * use it if it is.
|
|
+ */
|
|
+ mutex_lock(&accelerators_mutex);
|
|
+ list_for_each_entry(accelerator, &accelerators_list, link) {
|
|
+ if (match_accelerator(dev, be, accelerator) &&
|
|
+ try_module_get(accelerator->hooks->owner)) {
|
|
+ do_probe(be, accelerator, dev);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ mutex_unlock(&accelerators_mutex);
|
|
+}
|
|
+
|
|
+
|
|
+void netback_remove_accelerators(struct backend_info *be,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ mutex_lock(&accelerators_mutex);
|
|
+ /* Notify the accelerator (if any) of this device's removal */
|
|
+ if (be->accelerator != NULL) {
|
|
+ be->accelerator->hooks->remove(dev);
|
|
+ atomic_dec(&be->accelerator->use_count);
|
|
+ module_put(be->accelerator->hooks->owner);
|
|
+ be->accelerator = NULL;
|
|
+ }
|
|
+ mutex_unlock(&accelerators_mutex);
|
|
+}
|
|
+
|
|
+
|
|
+void netif_accel_init(void)
|
|
+{
|
|
+ INIT_LIST_HEAD(&accelerators_list);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/common.h 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,226 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/netif/backend/common.h
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __NETIF__BACKEND__COMMON_H__
|
|
+#define __NETIF__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+#include <linux/wait.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/io/netif.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+#define IPRINTK(fmt, args...) \
|
|
+ printk(KERN_INFO "xen_net: " fmt, ##args)
|
|
+#define WPRINTK(fmt, args...) \
|
|
+ printk(KERN_WARNING "xen_net: " fmt, ##args)
|
|
+
|
|
+typedef struct netif_st {
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+
|
|
+ u8 fe_dev_addr[6];
|
|
+
|
|
+ /* Physical parameters of the comms window. */
|
|
+ grant_handle_t tx_shmem_handle;
|
|
+ grant_ref_t tx_shmem_ref;
|
|
+ grant_handle_t rx_shmem_handle;
|
|
+ grant_ref_t rx_shmem_ref;
|
|
+ unsigned int irq;
|
|
+
|
|
+ /* The shared rings and indexes. */
|
|
+ netif_tx_back_ring_t tx;
|
|
+ netif_rx_back_ring_t rx;
|
|
+ struct vm_struct *tx_comms_area;
|
|
+ struct vm_struct *rx_comms_area;
|
|
+
|
|
+ /* Flags that must not be set in dev->features */
|
|
+ int features_disabled;
|
|
+
|
|
+ /* Frontend feature information. */
|
|
+ u8 can_sg:1;
|
|
+ u8 gso:1;
|
|
+ u8 csum:1;
|
|
+
|
|
+ /* Internal feature information. */
|
|
+ u8 can_queue:1; /* can queue packets for receiver? */
|
|
+ u8 copying_receiver:1; /* copy packets to receiver? */
|
|
+
|
|
+ /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
|
|
+ RING_IDX rx_req_cons_peek;
|
|
+
|
|
+ /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
|
|
+ unsigned long credit_bytes;
|
|
+ unsigned long credit_usec;
|
|
+ unsigned long remaining_credit;
|
|
+ struct timer_list credit_timeout;
|
|
+
|
|
+ /* Enforce draining of the transmit queue. */
|
|
+ struct timer_list tx_queue_timeout;
|
|
+
|
|
+ /* Statistics */
|
|
+ int nr_copied_skbs;
|
|
+
|
|
+ /* Miscellaneous private stuff. */
|
|
+ struct list_head list; /* scheduling list */
|
|
+ atomic_t refcnt;
|
|
+ struct net_device *dev;
|
|
+ struct net_device_stats stats;
|
|
+
|
|
+ unsigned int carrier;
|
|
+
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+} netif_t;
|
|
+
|
|
+/*
|
|
+ * Implement our own carrier flag: the network stack's version causes delays
|
|
+ * when the carrier is re-enabled (in particular, dev_activate() may not
|
|
+ * immediately be called, which can cause packet loss; also the etherbridge
|
|
+ * can be rather lazy in activating its port).
|
|
+ */
|
|
+#define netback_carrier_on(netif) ((netif)->carrier = 1)
|
|
+#define netback_carrier_off(netif) ((netif)->carrier = 0)
|
|
+#define netback_carrier_ok(netif) ((netif)->carrier)
|
|
+
|
|
+enum {
|
|
+ NETBK_DONT_COPY_SKB,
|
|
+ NETBK_DELAYED_COPY_SKB,
|
|
+ NETBK_ALWAYS_COPY_SKB,
|
|
+};
|
|
+
|
|
+extern int netbk_copy_skb_mode;
|
|
+
|
|
+/* Function pointers into netback accelerator plugin modules */
|
|
+struct netback_accel_hooks {
|
|
+ struct module *owner;
|
|
+ int (*probe)(struct xenbus_device *dev);
|
|
+ int (*remove)(struct xenbus_device *dev);
|
|
+};
|
|
+
|
|
+/* Structure to track the state of a netback accelerator plugin */
|
|
+struct netback_accelerator {
|
|
+ struct list_head link;
|
|
+ int id;
|
|
+ char *eth_name;
|
|
+ atomic_t use_count;
|
|
+ struct netback_accel_hooks *hooks;
|
|
+};
|
|
+
|
|
+struct backend_info {
|
|
+ struct xenbus_device *dev;
|
|
+ netif_t *netif;
|
|
+ enum xenbus_state frontend_state;
|
|
+
|
|
+ /* State relating to the netback accelerator */
|
|
+ void *netback_accel_priv;
|
|
+ /* The accelerator that this backend is currently using */
|
|
+ struct netback_accelerator *accelerator;
|
|
+};
|
|
+
|
|
+#define NETBACK_ACCEL_VERSION 0x00010001
|
|
+
|
|
+/*
|
|
+ * Connect an accelerator plugin module to netback. Returns zero on
|
|
+ * success, < 0 on error, > 0 (with highest version number supported)
|
|
+ * if version mismatch.
|
|
+ */
|
|
+extern int netback_connect_accelerator(unsigned version,
|
|
+ int id, const char *eth_name,
|
|
+ struct netback_accel_hooks *hooks);
|
|
+/* Disconnect a previously connected accelerator plugin module */
|
|
+extern void netback_disconnect_accelerator(int id, const char *eth_name);
|
|
+
|
|
+
|
|
+extern
|
|
+void netback_probe_accelerators(struct backend_info *be,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+void netback_remove_accelerators(struct backend_info *be,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+void netif_accel_init(void);
|
|
+
|
|
+
|
|
+#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
|
|
+#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
|
|
+
|
|
+void netif_disconnect(netif_t *netif);
|
|
+
|
|
+void netif_set_features(netif_t *netif);
|
|
+netif_t *netif_alloc(struct device *parent, domid_t domid, unsigned int handle);
|
|
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
|
|
+ unsigned long rx_ring_ref, unsigned int evtchn);
|
|
+
|
|
+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define netif_put(_b) \
|
|
+ do { \
|
|
+ if ( atomic_dec_and_test(&(_b)->refcnt) ) \
|
|
+ wake_up(&(_b)->waiting_to_free); \
|
|
+ } while (0)
|
|
+
|
|
+void netif_xenbus_init(void);
|
|
+
|
|
+#define netif_schedulable(netif) \
|
|
+ (netif_running((netif)->dev) && netback_carrier_ok(netif))
|
|
+
|
|
+void netif_schedule_work(netif_t *netif);
|
|
+void netif_deschedule_work(netif_t *netif);
|
|
+
|
|
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
|
|
+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
|
|
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+
|
|
+static inline int netbk_can_queue(struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ return netif->can_queue;
|
|
+}
|
|
+
|
|
+static inline int netbk_can_sg(struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ return netif->can_sg;
|
|
+}
|
|
+
|
|
+#endif /* __NETIF__BACKEND__COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/interface.c 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,434 @@
|
|
+/******************************************************************************
|
|
+ * arch/xen/drivers/netif/backend/interface.c
|
|
+ *
|
|
+ * Network-device interface management.
|
|
+ *
|
|
+ * Copyright (c) 2004-2005, Keir Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <linux/ethtool.h>
|
|
+#include <linux/rtnetlink.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+/*
|
|
+ * Module parameter 'queue_length':
|
|
+ *
|
|
+ * Enables queuing in the network stack when a client has run out of receive
|
|
+ * descriptors. Although this feature can improve receive bandwidth by avoiding
|
|
+ * packet loss, it can also result in packets sitting in the 'tx_queue' for
|
|
+ * unbounded time. This is bad if those packets hold onto foreign resources.
|
|
+ * For example, consider a packet that holds onto resources belonging to the
|
|
+ * guest for which it is queued (e.g., packet received on vif1.0, destined for
|
|
+ * vif1.1 which is not activated in the guest): in this situation the guest
|
|
+ * will never be destroyed, unless vif1.1 is taken down. To avoid this, we
|
|
+ * run a timer (tx_queue_timeout) to drain the queue when the interface is
|
|
+ * blocked.
|
|
+ */
|
|
+static unsigned long netbk_queue_length = 32;
|
|
+module_param_named(queue_length, netbk_queue_length, ulong, 0644);
|
|
+
|
|
+static void __netif_up(netif_t *netif)
|
|
+{
|
|
+ enable_irq(netif->irq);
|
|
+ netif_schedule_work(netif);
|
|
+}
|
|
+
|
|
+static void __netif_down(netif_t *netif)
|
|
+{
|
|
+ disable_irq(netif->irq);
|
|
+ netif_deschedule_work(netif);
|
|
+}
|
|
+
|
|
+static int net_open(struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ if (netback_carrier_ok(netif)) {
|
|
+ __netif_up(netif);
|
|
+ netif_start_queue(dev);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int net_close(struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ if (netback_carrier_ok(netif))
|
|
+ __netif_down(netif);
|
|
+ netif_stop_queue(dev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int netbk_change_mtu(struct net_device *dev, int mtu)
|
|
+{
|
|
+ int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
|
|
+
|
|
+ if (mtu > max)
|
|
+ return -EINVAL;
|
|
+ dev->mtu = mtu;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void netif_set_features(netif_t *netif)
|
|
+{
|
|
+ struct net_device *dev = netif->dev;
|
|
+ int features = dev->features;
|
|
+
|
|
+ if (netif->can_sg)
|
|
+ features |= NETIF_F_SG;
|
|
+ if (netif->gso)
|
|
+ features |= NETIF_F_TSO;
|
|
+ if (netif->csum)
|
|
+ features |= NETIF_F_IP_CSUM;
|
|
+
|
|
+ features &= ~(netif->features_disabled);
|
|
+
|
|
+ if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN)
|
|
+ dev->mtu = ETH_DATA_LEN;
|
|
+
|
|
+ dev->features = features;
|
|
+}
|
|
+
|
|
+static int netbk_set_tx_csum(struct net_device *dev, u32 data)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!netif->csum)
|
|
+ return -ENOSYS;
|
|
+ netif->features_disabled &= ~NETIF_F_IP_CSUM;
|
|
+ } else {
|
|
+ netif->features_disabled |= NETIF_F_IP_CSUM;
|
|
+ }
|
|
+
|
|
+ netif_set_features(netif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int netbk_set_sg(struct net_device *dev, u32 data)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!netif->can_sg)
|
|
+ return -ENOSYS;
|
|
+ netif->features_disabled &= ~NETIF_F_SG;
|
|
+ } else {
|
|
+ netif->features_disabled |= NETIF_F_SG;
|
|
+ }
|
|
+
|
|
+ netif_set_features(netif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int netbk_set_tso(struct net_device *dev, u32 data)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ if (data) {
|
|
+ if (!netif->gso)
|
|
+ return -ENOSYS;
|
|
+ netif->features_disabled &= ~NETIF_F_TSO;
|
|
+ } else {
|
|
+ netif->features_disabled |= NETIF_F_TSO;
|
|
+ }
|
|
+
|
|
+ netif_set_features(netif);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void netbk_get_drvinfo(struct net_device *dev,
|
|
+ struct ethtool_drvinfo *info)
|
|
+{
|
|
+ strcpy(info->driver, "netbk");
|
|
+ strcpy(info->bus_info, dev->class_dev.dev->bus_id);
|
|
+}
|
|
+
|
|
+static const struct netif_stat {
|
|
+ char name[ETH_GSTRING_LEN];
|
|
+ u16 offset;
|
|
+} netbk_stats[] = {
|
|
+ { "copied_skbs", offsetof(netif_t, nr_copied_skbs) },
|
|
+};
|
|
+
|
|
+static int netbk_get_stats_count(struct net_device *dev)
|
|
+{
|
|
+ return ARRAY_SIZE(netbk_stats);
|
|
+}
|
|
+
|
|
+static void netbk_get_ethtool_stats(struct net_device *dev,
|
|
+ struct ethtool_stats *stats, u64 * data)
|
|
+{
|
|
+ void *netif = netdev_priv(dev);
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
|
|
+ data[i] = *(int *)(netif + netbk_stats[i].offset);
|
|
+}
|
|
+
|
|
+static void netbk_get_strings(struct net_device *dev, u32 stringset, u8 * data)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ switch (stringset) {
|
|
+ case ETH_SS_STATS:
|
|
+ for (i = 0; i < ARRAY_SIZE(netbk_stats); i++)
|
|
+ memcpy(data + i * ETH_GSTRING_LEN,
|
|
+ netbk_stats[i].name, ETH_GSTRING_LEN);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct ethtool_ops network_ethtool_ops =
|
|
+{
|
|
+ .get_drvinfo = netbk_get_drvinfo,
|
|
+
|
|
+ .get_tx_csum = ethtool_op_get_tx_csum,
|
|
+ .set_tx_csum = netbk_set_tx_csum,
|
|
+ .get_sg = ethtool_op_get_sg,
|
|
+ .set_sg = netbk_set_sg,
|
|
+ .get_tso = ethtool_op_get_tso,
|
|
+ .set_tso = netbk_set_tso,
|
|
+ .get_link = ethtool_op_get_link,
|
|
+
|
|
+ .get_stats_count = netbk_get_stats_count,
|
|
+ .get_ethtool_stats = netbk_get_ethtool_stats,
|
|
+ .get_strings = netbk_get_strings,
|
|
+};
|
|
+
|
|
+netif_t *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct net_device *dev;
|
|
+ netif_t *netif;
|
|
+ char name[IFNAMSIZ] = {};
|
|
+
|
|
+ snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
|
|
+ dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
|
|
+ if (dev == NULL) {
|
|
+ DPRINTK("Could not create netif: out of memory\n");
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+ }
|
|
+
|
|
+ SET_NETDEV_DEV(dev, parent);
|
|
+
|
|
+ netif = netdev_priv(dev);
|
|
+ memset(netif, 0, sizeof(*netif));
|
|
+ netif->domid = domid;
|
|
+ netif->handle = handle;
|
|
+ netif->can_sg = 1;
|
|
+ netif->csum = 1;
|
|
+ atomic_set(&netif->refcnt, 1);
|
|
+ init_waitqueue_head(&netif->waiting_to_free);
|
|
+ netif->dev = dev;
|
|
+
|
|
+ netback_carrier_off(netif);
|
|
+
|
|
+ netif->credit_bytes = netif->remaining_credit = ~0UL;
|
|
+ netif->credit_usec = 0UL;
|
|
+ init_timer(&netif->credit_timeout);
|
|
+ /* Initialize 'expires' now: it's used to track the credit window. */
|
|
+ netif->credit_timeout.expires = jiffies;
|
|
+
|
|
+ init_timer(&netif->tx_queue_timeout);
|
|
+
|
|
+ dev->hard_start_xmit = netif_be_start_xmit;
|
|
+ dev->get_stats = netif_be_get_stats;
|
|
+ dev->open = net_open;
|
|
+ dev->stop = net_close;
|
|
+ dev->change_mtu = netbk_change_mtu;
|
|
+
|
|
+ netif_set_features(netif);
|
|
+
|
|
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
|
|
+
|
|
+ dev->tx_queue_len = netbk_queue_length;
|
|
+
|
|
+ /*
|
|
+ * Initialise a dummy MAC address. We choose the numerically
|
|
+ * largest non-broadcast address to prevent the address getting
|
|
+ * stolen by an Ethernet bridge for STP purposes.
|
|
+ * (FE:FF:FF:FF:FF:FF)
|
|
+ */
|
|
+ memset(dev->dev_addr, 0xFF, ETH_ALEN);
|
|
+ dev->dev_addr[0] &= ~0x01;
|
|
+
|
|
+ rtnl_lock();
|
|
+ err = register_netdevice(dev);
|
|
+ rtnl_unlock();
|
|
+ if (err) {
|
|
+ DPRINTK("Could not register new net device %s: err=%d\n",
|
|
+ dev->name, err);
|
|
+ free_netdev(dev);
|
|
+ return ERR_PTR(err);
|
|
+ }
|
|
+
|
|
+ DPRINTK("Successfully created netif\n");
|
|
+ return netif;
|
|
+}
|
|
+
|
|
+static int map_frontend_pages(
|
|
+ netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
|
|
+ GNTMAP_host_map, tx_ring_ref, netif->domid);
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ DPRINTK(" Gnttab failure mapping tx_ring_ref %d!\n", (int)op.status);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ netif->tx_shmem_ref = tx_ring_ref;
|
|
+ netif->tx_shmem_handle = op.handle;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
|
|
+ GNTMAP_host_map, rx_ring_ref, netif->domid);
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ struct gnttab_unmap_grant_ref unop;
|
|
+
|
|
+ gnttab_set_unmap_op(&unop,
|
|
+ (unsigned long)netif->tx_comms_area->addr,
|
|
+ GNTMAP_host_map, netif->tx_shmem_handle);
|
|
+ VOID(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ &unop, 1));
|
|
+ DPRINTK(" Gnttab failure mapping rx_ring_ref %d!\n", (int)op.status);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ netif->rx_shmem_ref = rx_ring_ref;
|
|
+ netif->rx_shmem_handle = op.handle;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_pages(netif_t *netif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
|
|
+ GNTMAP_host_map, netif->tx_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
|
|
+ GNTMAP_host_map, netif->rx_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
|
|
+ unsigned long rx_ring_ref, unsigned int evtchn)
|
|
+{
|
|
+ int err = -ENOMEM;
|
|
+ netif_tx_sring_t *txs;
|
|
+ netif_rx_sring_t *rxs;
|
|
+
|
|
+ /* Already connected through? */
|
|
+ if (netif->irq)
|
|
+ return 0;
|
|
+
|
|
+ netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (netif->tx_comms_area == NULL)
|
|
+ return -ENOMEM;
|
|
+ netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (netif->rx_comms_area == NULL)
|
|
+ goto err_rx;
|
|
+
|
|
+ err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
|
|
+ if (err)
|
|
+ goto err_map;
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ netif->domid, evtchn, netif_be_int, 0,
|
|
+ netif->dev->name, netif);
|
|
+ if (err < 0)
|
|
+ goto err_hypervisor;
|
|
+ netif->irq = err;
|
|
+ disable_irq(netif->irq);
|
|
+
|
|
+ txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
|
|
+ BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
|
|
+
|
|
+ rxs = (netif_rx_sring_t *)
|
|
+ ((char *)netif->rx_comms_area->addr);
|
|
+ BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
|
|
+
|
|
+ netif->rx_req_cons_peek = 0;
|
|
+
|
|
+ netif_get(netif);
|
|
+
|
|
+ rtnl_lock();
|
|
+ netback_carrier_on(netif);
|
|
+ if (netif_running(netif->dev))
|
|
+ __netif_up(netif);
|
|
+ rtnl_unlock();
|
|
+
|
|
+ return 0;
|
|
+err_hypervisor:
|
|
+ unmap_frontend_pages(netif);
|
|
+err_map:
|
|
+ free_vm_area(netif->rx_comms_area);
|
|
+err_rx:
|
|
+ free_vm_area(netif->tx_comms_area);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void netif_disconnect(netif_t *netif)
|
|
+{
|
|
+ if (netback_carrier_ok(netif)) {
|
|
+ rtnl_lock();
|
|
+ netback_carrier_off(netif);
|
|
+ netif_carrier_off(netif->dev); /* discard queued packets */
|
|
+ if (netif_running(netif->dev))
|
|
+ __netif_down(netif);
|
|
+ rtnl_unlock();
|
|
+ netif_put(netif);
|
|
+ }
|
|
+
|
|
+ atomic_dec(&netif->refcnt);
|
|
+ wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
|
|
+
|
|
+ del_timer_sync(&netif->credit_timeout);
|
|
+ del_timer_sync(&netif->tx_queue_timeout);
|
|
+
|
|
+ if (netif->irq)
|
|
+ unbind_from_irqhandler(netif->irq, netif);
|
|
+
|
|
+ unregister_netdev(netif->dev);
|
|
+
|
|
+ if (netif->tx.sring) {
|
|
+ unmap_frontend_pages(netif);
|
|
+ free_vm_area(netif->tx_comms_area);
|
|
+ free_vm_area(netif->rx_comms_area);
|
|
+ }
|
|
+
|
|
+ free_netdev(netif->dev);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/loopback.c 2011-01-03 12:43:21.000000000 +0100
|
|
@@ -0,0 +1,309 @@
|
|
+/******************************************************************************
|
|
+ * netback/loopback.c
|
|
+ *
|
|
+ * A two-interface loopback device to emulate a local netfront-netback
|
|
+ * connection. This ensures that local packet delivery looks identical
|
|
+ * to inter-domain delivery. Most importantly, packets delivered locally
|
|
+ * originating from other domains will get *copied* when they traverse this
|
|
+ * driver. This prevents unbounded delays in socket-buffer queues from
|
|
+ * causing the netback driver to "seize up".
|
|
+ *
|
|
+ * This driver creates a symmetric pair of loopback interfaces with names
|
|
+ * vif0.0 and veth0. The intention is that 'vif0.0' is bound to an Ethernet
|
|
+ * bridge, just like a proper netback interface, while a local IP interface
|
|
+ * is configured on 'veth0'.
|
|
+ *
|
|
+ * As with a real netback interface, vif0.0 is configured with a suitable
|
|
+ * dummy MAC address. No default is provided for veth0: a reasonable strategy
|
|
+ * is to transfer eth0's MAC address to veth0, and give eth0 a dummy address
|
|
+ * (to avoid confusing the Etherbridge).
|
|
+ *
|
|
+ * Copyright (c) 2005 K A Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/inetdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/ethtool.h>
|
|
+#include <net/dst.h>
|
|
+#include <net/xfrm.h> /* secpath_reset() */
|
|
+#include <asm/hypervisor.h> /* is_initial_xendomain() */
|
|
+
|
|
+static int nloopbacks = -1;
|
|
+module_param(nloopbacks, int, 0);
|
|
+MODULE_PARM_DESC(nloopbacks, "Number of netback-loopback devices to create");
|
|
+
|
|
+struct net_private {
|
|
+ struct net_device *loopback_dev;
|
|
+ struct net_device_stats stats;
|
|
+ int loop_idx;
|
|
+};
|
|
+
|
|
+static int loopback_open(struct net_device *dev)
|
|
+{
|
|
+ struct net_private *np = netdev_priv(dev);
|
|
+ memset(&np->stats, 0, sizeof(np->stats));
|
|
+ netif_start_queue(dev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int loopback_close(struct net_device *dev)
|
|
+{
|
|
+ netif_stop_queue(dev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+static int is_foreign(unsigned long pfn)
|
|
+{
|
|
+ /* NB. Play it safe for auto-translation mode. */
|
|
+ return (xen_feature(XENFEAT_auto_translated_physmap) ||
|
|
+ (phys_to_machine_mapping[pfn] & FOREIGN_FRAME_BIT));
|
|
+}
|
|
+#else
|
|
+/* How to detect a foreign mapping? Play it safe. */
|
|
+#define is_foreign(pfn) (1)
|
|
+#endif
|
|
+
|
|
+static int skb_remove_foreign_references(struct sk_buff *skb)
|
|
+{
|
|
+ struct page *page;
|
|
+ unsigned long pfn;
|
|
+ int i, off;
|
|
+ char *vaddr;
|
|
+
|
|
+ BUG_ON(skb_shinfo(skb)->frag_list);
|
|
+
|
|
+ if (skb_cloned(skb) &&
|
|
+ unlikely(pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
|
|
+ return 0;
|
|
+
|
|
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
|
|
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[i].page);
|
|
+ if (!is_foreign(pfn))
|
|
+ continue;
|
|
+
|
|
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (unlikely(!page))
|
|
+ return 0;
|
|
+
|
|
+ vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
|
|
+ off = skb_shinfo(skb)->frags[i].page_offset;
|
|
+ memcpy(page_address(page) + off,
|
|
+ vaddr + off,
|
|
+ skb_shinfo(skb)->frags[i].size);
|
|
+ kunmap_skb_frag(vaddr);
|
|
+
|
|
+ put_page(skb_shinfo(skb)->frags[i].page);
|
|
+ skb_shinfo(skb)->frags[i].page = page;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
+{
|
|
+ struct net_private *np = netdev_priv(dev);
|
|
+
|
|
+ if (!skb_remove_foreign_references(skb)) {
|
|
+ np->stats.tx_dropped++;
|
|
+ dev_kfree_skb(skb);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ dst_release(skb->dst);
|
|
+ skb->dst = NULL;
|
|
+
|
|
+ skb_orphan(skb);
|
|
+
|
|
+ np->stats.tx_bytes += skb->len;
|
|
+ np->stats.tx_packets++;
|
|
+
|
|
+ /* Switch to loopback context. */
|
|
+ dev = np->loopback_dev;
|
|
+ np = netdev_priv(dev);
|
|
+
|
|
+ np->stats.rx_bytes += skb->len;
|
|
+ np->stats.rx_packets++;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ /* Defer checksum calculation. */
|
|
+ skb->proto_csum_blank = 1;
|
|
+ /* Must be a local packet: assert its integrity. */
|
|
+ skb->proto_data_valid = 1;
|
|
+ }
|
|
+
|
|
+ skb->ip_summed = skb->proto_data_valid ?
|
|
+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
|
|
+
|
|
+ skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
|
|
+ skb->protocol = eth_type_trans(skb, dev);
|
|
+ skb->dev = dev;
|
|
+ dev->last_rx = jiffies;
|
|
+
|
|
+ /* Flush netfilter context: rx'ed skbuffs not expected to have any. */
|
|
+ nf_reset(skb);
|
|
+ secpath_reset(skb);
|
|
+
|
|
+ netif_rx(skb);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct net_device_stats *loopback_get_stats(struct net_device *dev)
|
|
+{
|
|
+ struct net_private *np = netdev_priv(dev);
|
|
+ return &np->stats;
|
|
+}
|
|
+
|
|
+static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
|
|
+{
|
|
+ strcpy(info->driver, "netloop");
|
|
+ snprintf(info->bus_info, ETHTOOL_BUSINFO_LEN, "vif-0-%d",
|
|
+ ((struct net_private *)netdev_priv(dev))->loop_idx);
|
|
+}
|
|
+
|
|
+static struct ethtool_ops network_ethtool_ops =
|
|
+{
|
|
+ .get_drvinfo = get_drvinfo,
|
|
+
|
|
+ .get_tx_csum = ethtool_op_get_tx_csum,
|
|
+ .set_tx_csum = ethtool_op_set_tx_csum,
|
|
+ .get_sg = ethtool_op_get_sg,
|
|
+ .set_sg = ethtool_op_set_sg,
|
|
+ .get_tso = ethtool_op_get_tso,
|
|
+ .set_tso = ethtool_op_set_tso,
|
|
+ .get_link = ethtool_op_get_link,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Nothing to do here. Virtual interface is point-to-point and the
|
|
+ * physical interface is probably promiscuous anyway.
|
|
+ */
|
|
+static void loopback_set_multicast_list(struct net_device *dev)
|
|
+{
|
|
+}
|
|
+
|
|
+static void loopback_construct(struct net_device *dev, struct net_device *lo,
|
|
+ int loop_idx)
|
|
+{
|
|
+ struct net_private *np = netdev_priv(dev);
|
|
+
|
|
+ np->loopback_dev = lo;
|
|
+ np->loop_idx = loop_idx;
|
|
+
|
|
+ dev->open = loopback_open;
|
|
+ dev->stop = loopback_close;
|
|
+ dev->hard_start_xmit = loopback_start_xmit;
|
|
+ dev->get_stats = loopback_get_stats;
|
|
+ dev->set_multicast_list = loopback_set_multicast_list;
|
|
+ dev->change_mtu = NULL; /* allow arbitrary mtu */
|
|
+
|
|
+ dev->tx_queue_len = 0;
|
|
+
|
|
+ dev->features = (NETIF_F_HIGHDMA |
|
|
+ NETIF_F_LLTX |
|
|
+ NETIF_F_TSO |
|
|
+ NETIF_F_SG |
|
|
+ NETIF_F_IP_CSUM);
|
|
+
|
|
+ SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
|
|
+
|
|
+ /*
|
|
+ * We do not set a jumbo MTU on the interface. Otherwise the network
|
|
+ * stack will try to send large packets that will get dropped by the
|
|
+ * Ethernet bridge (unless the physical Ethernet interface is
|
|
+ * configured to transfer jumbo packets). If a larger MTU is desired
|
|
+ * then the system administrator can specify it using the 'ifconfig'
|
|
+ * command.
|
|
+ */
|
|
+ /*dev->mtu = 16*1024;*/
|
|
+}
|
|
+
|
|
+static int __init make_loopback(int i)
|
|
+{
|
|
+ struct net_device *dev1, *dev2;
|
|
+ char dev_name[IFNAMSIZ];
|
|
+ int err = -ENOMEM;
|
|
+
|
|
+ sprintf(dev_name, "vif0.%d", i);
|
|
+ dev1 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
|
|
+ if (!dev1)
|
|
+ return err;
|
|
+
|
|
+ sprintf(dev_name, "veth%d", i);
|
|
+ dev2 = alloc_netdev(sizeof(struct net_private), dev_name, ether_setup);
|
|
+ if (!dev2)
|
|
+ goto fail_netdev2;
|
|
+
|
|
+ loopback_construct(dev1, dev2, i);
|
|
+ loopback_construct(dev2, dev1, i);
|
|
+
|
|
+ /*
|
|
+ * Initialise a dummy MAC address for the 'dummy backend' interface. We
|
|
+ * choose the numerically largest non-broadcast address to prevent the
|
|
+ * address getting stolen by an Ethernet bridge for STP purposes.
|
|
+ */
|
|
+ memset(dev1->dev_addr, 0xFF, ETH_ALEN);
|
|
+ dev1->dev_addr[0] &= ~0x01;
|
|
+
|
|
+ if ((err = register_netdev(dev1)) != 0)
|
|
+ goto fail;
|
|
+
|
|
+ if ((err = register_netdev(dev2)) != 0) {
|
|
+ unregister_netdev(dev1);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ free_netdev(dev2);
|
|
+ fail_netdev2:
|
|
+ free_netdev(dev1);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __init loopback_init(void)
|
|
+{
|
|
+ int i, err = 0;
|
|
+
|
|
+ if (nloopbacks == -1)
|
|
+ nloopbacks = is_initial_xendomain() ? 4 : 0;
|
|
+
|
|
+ for (i = 0; i < nloopbacks; i++)
|
|
+ if ((err = make_loopback(i)) != 0)
|
|
+ break;
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+module_init(loopback_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/netback.c 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,1700 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/netback/netback.c
|
|
+ *
|
|
+ * Back-end of the driver for virtual network devices. This portion of the
|
|
+ * driver exports a 'unified' network-device interface that can be accessed
|
|
+ * by any operating system that implements a compatible front end. A
|
|
+ * reference front-end implementation can be found in:
|
|
+ * drivers/xen/netfront/netfront.c
|
|
+ *
|
|
+ * Copyright (c) 2002-2005, K A Fraser
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <linux/if_vlan.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <xen/interface/memory.h>
|
|
+
|
|
+/*define NETBE_DEBUG_INTERRUPT*/
|
|
+
|
|
+struct netbk_rx_meta {
|
|
+ skb_frag_t frag;
|
|
+ int id;
|
|
+ u8 copy:1;
|
|
+};
|
|
+
|
|
+struct netbk_tx_pending_inuse {
|
|
+ struct list_head list;
|
|
+ unsigned long alloc_time;
|
|
+};
|
|
+
|
|
+static void netif_idx_release(u16 pending_idx);
|
|
+static void make_tx_response(netif_t *netif,
|
|
+ netif_tx_request_t *txp,
|
|
+ s8 st);
|
|
+static netif_rx_response_t *make_rx_response(netif_t *netif,
|
|
+ u16 id,
|
|
+ s8 st,
|
|
+ u16 offset,
|
|
+ u16 size,
|
|
+ u16 flags);
|
|
+
|
|
+static void net_tx_action(unsigned long unused);
|
|
+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
|
|
+
|
|
+static void net_rx_action(unsigned long unused);
|
|
+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
|
|
+
|
|
+static struct timer_list net_timer;
|
|
+static struct timer_list netbk_tx_pending_timer;
|
|
+
|
|
+#define MAX_PENDING_REQS 256
|
|
+
|
|
+static struct sk_buff_head rx_queue;
|
|
+
|
|
+static struct page **mmap_pages;
|
|
+static inline unsigned long idx_to_pfn(unsigned int idx)
|
|
+{
|
|
+ return page_to_pfn(mmap_pages[idx]);
|
|
+}
|
|
+
|
|
+static inline unsigned long idx_to_kaddr(unsigned int idx)
|
|
+{
|
|
+ return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
|
|
+}
|
|
+
|
|
+/* extra field used in struct page */
|
|
+static inline void netif_set_page_index(struct page *pg, unsigned int index)
|
|
+{
|
|
+ *(unsigned long *)&pg->mapping = index;
|
|
+}
|
|
+
|
|
+static inline int netif_page_index(struct page *pg)
|
|
+{
|
|
+ unsigned long idx = (unsigned long)pg->mapping;
|
|
+
|
|
+ if (!PageForeign(pg))
|
|
+ return -1;
|
|
+
|
|
+ if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
|
|
+ return -1;
|
|
+
|
|
+ return idx;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is the amount of packet we copy rather than map, so that the
|
|
+ * guest can't fiddle with the contents of the headers while we do
|
|
+ * packet processing on them (netfilter, routing, etc).
|
|
+ */
|
|
+#define PKT_PROT_LEN (ETH_HLEN + VLAN_HLEN + \
|
|
+ sizeof(struct iphdr) + MAX_IPOPTLEN + \
|
|
+ sizeof(struct tcphdr) + 40 /* MAX_TCP_OPTION_SPACE */)
|
|
+
|
|
+static struct pending_tx_info {
|
|
+ netif_tx_request_t req;
|
|
+ netif_t *netif;
|
|
+} pending_tx_info[MAX_PENDING_REQS];
|
|
+static u16 pending_ring[MAX_PENDING_REQS];
|
|
+typedef unsigned int PEND_RING_IDX;
|
|
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
|
|
+static PEND_RING_IDX pending_prod, pending_cons;
|
|
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
|
|
+
|
|
+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
|
|
+static u16 dealloc_ring[MAX_PENDING_REQS];
|
|
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
|
|
+
|
|
+/* Doubly-linked list of in-use pending entries. */
|
|
+static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
|
|
+static LIST_HEAD(pending_inuse_head);
|
|
+
|
|
+static struct sk_buff_head tx_queue;
|
|
+
|
|
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
|
|
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
|
|
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
|
|
+
|
|
+static struct list_head net_schedule_list;
|
|
+static spinlock_t net_schedule_list_lock;
|
|
+
|
|
+#define MAX_MFN_ALLOC 64
|
|
+static unsigned long mfn_list[MAX_MFN_ALLOC];
|
|
+static unsigned int alloc_index = 0;
|
|
+
|
|
+/* Setting this allows the safe use of this driver without netloop. */
|
|
+static int MODPARM_copy_skb = 1;
|
|
+module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
|
|
+MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
|
|
+static int MODPARM_permute_returns = 0;
|
|
+module_param_named(permute_returns, MODPARM_permute_returns, bool, S_IRUSR|S_IWUSR);
|
|
+MODULE_PARM_DESC(permute_returns, "Randomly permute the order in which TX responses are sent to the frontend");
|
|
+
|
|
+int netbk_copy_skb_mode;
|
|
+
|
|
+static inline unsigned long alloc_mfn(void)
|
|
+{
|
|
+ BUG_ON(alloc_index == 0);
|
|
+ return mfn_list[--alloc_index];
|
|
+}
|
|
+
|
|
+static int check_mfn(int nr)
|
|
+{
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+ int rc;
|
|
+
|
|
+ if (likely(alloc_index >= nr))
|
|
+ return 0;
|
|
+
|
|
+ set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
|
|
+ reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
|
|
+ rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
|
|
+ if (likely(rc > 0))
|
|
+ alloc_index += rc;
|
|
+
|
|
+ return alloc_index >= nr ? 0 : -ENOMEM;
|
|
+}
|
|
+
|
|
+static inline void maybe_schedule_tx_action(void)
|
|
+{
|
|
+ smp_mb();
|
|
+ if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
|
|
+ !list_empty(&net_schedule_list))
|
|
+ tasklet_schedule(&net_tx_tasklet);
|
|
+}
|
|
+
|
|
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
|
|
+{
|
|
+ struct skb_shared_info *ninfo;
|
|
+ struct sk_buff *nskb;
|
|
+ unsigned long offset;
|
|
+ int ret;
|
|
+ int len;
|
|
+ int headlen;
|
|
+
|
|
+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
|
|
+
|
|
+ nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (unlikely(!nskb))
|
|
+ goto err;
|
|
+
|
|
+ skb_reserve(nskb, 16 + NET_IP_ALIGN);
|
|
+ headlen = nskb->end - nskb->data;
|
|
+ if (headlen > skb_headlen(skb))
|
|
+ headlen = skb_headlen(skb);
|
|
+ ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ ninfo = skb_shinfo(nskb);
|
|
+ ninfo->gso_size = skb_shinfo(skb)->gso_size;
|
|
+ ninfo->gso_type = skb_shinfo(skb)->gso_type;
|
|
+
|
|
+ offset = headlen;
|
|
+ len = skb->len - headlen;
|
|
+
|
|
+ nskb->len = skb->len;
|
|
+ nskb->data_len = len;
|
|
+ nskb->truesize += len;
|
|
+
|
|
+ while (len) {
|
|
+ struct page *page;
|
|
+ int copy;
|
|
+ int zero;
|
|
+
|
|
+ if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
|
|
+ dump_stack();
|
|
+ goto err_free;
|
|
+ }
|
|
+
|
|
+ copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
|
|
+ zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
|
|
+
|
|
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
|
|
+ if (unlikely(!page))
|
|
+ goto err_free;
|
|
+
|
|
+ ret = skb_copy_bits(skb, offset, page_address(page), copy);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ ninfo->frags[ninfo->nr_frags].page = page;
|
|
+ ninfo->frags[ninfo->nr_frags].page_offset = 0;
|
|
+ ninfo->frags[ninfo->nr_frags].size = copy;
|
|
+ ninfo->nr_frags++;
|
|
+
|
|
+ offset += copy;
|
|
+ len -= copy;
|
|
+ }
|
|
+
|
|
+ offset = nskb->data - skb->data;
|
|
+
|
|
+ nskb->h.raw = skb->h.raw + offset;
|
|
+ nskb->nh.raw = skb->nh.raw + offset;
|
|
+ nskb->mac.raw = skb->mac.raw + offset;
|
|
+
|
|
+ return nskb;
|
|
+
|
|
+ err_free:
|
|
+ kfree_skb(nskb);
|
|
+ err:
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static inline int netbk_max_required_rx_slots(netif_t *netif)
|
|
+{
|
|
+ if (netif->can_sg || netif->gso)
|
|
+ return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
|
|
+ return 1; /* all in one */
|
|
+}
|
|
+
|
|
+static inline int netbk_queue_full(netif_t *netif)
|
|
+{
|
|
+ RING_IDX peek = netif->rx_req_cons_peek;
|
|
+ RING_IDX needed = netbk_max_required_rx_slots(netif);
|
|
+
|
|
+ return ((netif->rx.sring->req_prod - peek) < needed) ||
|
|
+ ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
|
|
+}
|
|
+
|
|
+static void tx_queue_callback(unsigned long data)
|
|
+{
|
|
+ netif_t *netif = (netif_t *)data;
|
|
+ if (netif_schedulable(netif))
|
|
+ netif_wake_queue(netif->dev);
|
|
+}
|
|
+
|
|
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+
|
|
+ BUG_ON(skb->dev != dev);
|
|
+
|
|
+ /* Drop the packet if the target domain has no receive buffers. */
|
|
+ if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
|
|
+ goto drop;
|
|
+
|
|
+ /*
|
|
+ * Copy the packet here if it's destined for a flipping interface
|
|
+ * but isn't flippable (e.g. extra references to data).
|
|
+ * XXX For now we also copy skbuffs whose head crosses a page
|
|
+ * boundary, because netbk_gop_skb can't handle them.
|
|
+ */
|
|
+ if (!netif->copying_receiver ||
|
|
+ ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
|
|
+ struct sk_buff *nskb = netbk_copy_skb(skb);
|
|
+ if ( unlikely(nskb == NULL) )
|
|
+ goto drop;
|
|
+ /* Copy only the header fields we use in this driver. */
|
|
+ nskb->dev = skb->dev;
|
|
+ nskb->ip_summed = skb->ip_summed;
|
|
+ nskb->proto_data_valid = skb->proto_data_valid;
|
|
+ dev_kfree_skb(skb);
|
|
+ skb = nskb;
|
|
+ }
|
|
+
|
|
+ netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
|
|
+ !!skb_shinfo(skb)->gso_size;
|
|
+ netif_get(netif);
|
|
+
|
|
+ if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
|
|
+ netif->rx.sring->req_event = netif->rx_req_cons_peek +
|
|
+ netbk_max_required_rx_slots(netif);
|
|
+ mb(); /* request notification /then/ check & stop the queue */
|
|
+ if (netbk_queue_full(netif)) {
|
|
+ netif_stop_queue(dev);
|
|
+ /*
|
|
+ * Schedule 500ms timeout to restart the queue, thus
|
|
+ * ensuring that an inactive queue will be drained.
|
|
+ * Packets will be immediately be dropped until more
|
|
+ * receive buffers become available (see
|
|
+ * netbk_queue_full() check above).
|
|
+ */
|
|
+ netif->tx_queue_timeout.data = (unsigned long)netif;
|
|
+ netif->tx_queue_timeout.function = tx_queue_callback;
|
|
+ __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ skb_queue_tail(&rx_queue, skb);
|
|
+ tasklet_schedule(&net_rx_tasklet);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ drop:
|
|
+ netif->stats.tx_dropped++;
|
|
+ dev_kfree_skb(skb);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#if 0
|
|
+static void xen_network_done_notify(void)
|
|
+{
|
|
+ static struct net_device *eth0_dev = NULL;
|
|
+ if (unlikely(eth0_dev == NULL))
|
|
+ eth0_dev = __dev_get_by_name("eth0");
|
|
+ netif_rx_schedule(eth0_dev);
|
|
+}
|
|
+/*
|
|
+ * Add following to poll() function in NAPI driver (Tigon3 is example):
|
|
+ * if ( xen_network_done() )
|
|
+ * tg3_enable_ints(tp);
|
|
+ */
|
|
+int xen_network_done(void)
|
|
+{
|
|
+ return skb_queue_empty(&rx_queue);
|
|
+}
|
|
+#endif
|
|
+
|
|
+struct netrx_pending_operations {
|
|
+ unsigned trans_prod, trans_cons;
|
|
+ unsigned mmu_prod, mmu_mcl;
|
|
+ unsigned mcl_prod, mcl_cons;
|
|
+ unsigned copy_prod, copy_cons;
|
|
+ unsigned meta_prod, meta_cons;
|
|
+ mmu_update_t *mmu;
|
|
+ gnttab_transfer_t *trans;
|
|
+ gnttab_copy_t *copy;
|
|
+ multicall_entry_t *mcl;
|
|
+ struct netbk_rx_meta *meta;
|
|
+};
|
|
+
|
|
+/* Set up the grant operations for this fragment. If it's a flipping
|
|
+ interface, we also set up the unmap request from here. */
|
|
+static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
|
|
+ int i, struct netrx_pending_operations *npo,
|
|
+ struct page *page, unsigned long size,
|
|
+ unsigned long offset)
|
|
+{
|
|
+ mmu_update_t *mmu;
|
|
+ gnttab_transfer_t *gop;
|
|
+ gnttab_copy_t *copy_gop;
|
|
+ multicall_entry_t *mcl;
|
|
+ netif_rx_request_t *req;
|
|
+ unsigned long old_mfn, new_mfn;
|
|
+ int idx = netif_page_index(page);
|
|
+
|
|
+ old_mfn = virt_to_mfn(page_address(page));
|
|
+
|
|
+ req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
|
|
+ if (netif->copying_receiver) {
|
|
+ /* The fragment needs to be copied rather than
|
|
+ flipped. */
|
|
+ meta->copy = 1;
|
|
+ copy_gop = npo->copy + npo->copy_prod++;
|
|
+ copy_gop->flags = GNTCOPY_dest_gref;
|
|
+ if (idx > -1) {
|
|
+ struct pending_tx_info *src_pend = &pending_tx_info[idx];
|
|
+ copy_gop->source.domid = src_pend->netif->domid;
|
|
+ copy_gop->source.u.ref = src_pend->req.gref;
|
|
+ copy_gop->flags |= GNTCOPY_source_gref;
|
|
+ } else {
|
|
+ copy_gop->source.domid = DOMID_SELF;
|
|
+ copy_gop->source.u.gmfn = old_mfn;
|
|
+ }
|
|
+ copy_gop->source.offset = offset;
|
|
+ copy_gop->dest.domid = netif->domid;
|
|
+ copy_gop->dest.offset = 0;
|
|
+ copy_gop->dest.u.ref = req->gref;
|
|
+ copy_gop->len = size;
|
|
+ } else {
|
|
+ meta->copy = 0;
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ new_mfn = alloc_mfn();
|
|
+
|
|
+ /*
|
|
+ * Set the new P2M table entry before
|
|
+ * reassigning the old data page. Heed the
|
|
+ * comment in pgtable-2level.h:pte_page(). :-)
|
|
+ */
|
|
+ set_phys_to_machine(page_to_pfn(page), new_mfn);
|
|
+
|
|
+ mcl = npo->mcl + npo->mcl_prod++;
|
|
+ MULTI_update_va_mapping(mcl,
|
|
+ (unsigned long)page_address(page),
|
|
+ pfn_pte_ma(new_mfn, PAGE_KERNEL),
|
|
+ 0);
|
|
+
|
|
+ mmu = npo->mmu + npo->mmu_prod++;
|
|
+ mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
|
|
+ MMU_MACHPHYS_UPDATE;
|
|
+ mmu->val = page_to_pfn(page);
|
|
+ }
|
|
+
|
|
+ gop = npo->trans + npo->trans_prod++;
|
|
+ gop->mfn = old_mfn;
|
|
+ gop->domid = netif->domid;
|
|
+ gop->ref = req->gref;
|
|
+ }
|
|
+ return req->id;
|
|
+}
|
|
+
|
|
+static void netbk_gop_skb(struct sk_buff *skb,
|
|
+ struct netrx_pending_operations *npo)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(skb->dev);
|
|
+ int nr_frags = skb_shinfo(skb)->nr_frags;
|
|
+ int i;
|
|
+ int extra;
|
|
+ struct netbk_rx_meta *head_meta, *meta;
|
|
+
|
|
+ head_meta = npo->meta + npo->meta_prod++;
|
|
+ head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
|
|
+ head_meta->frag.size = skb_shinfo(skb)->gso_size;
|
|
+ extra = !!head_meta->frag.size + 1;
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++) {
|
|
+ meta = npo->meta + npo->meta_prod++;
|
|
+ meta->frag = skb_shinfo(skb)->frags[i];
|
|
+ meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
|
|
+ meta->frag.page,
|
|
+ meta->frag.size,
|
|
+ meta->frag.page_offset);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * This must occur at the end to ensure that we don't trash skb_shinfo
|
|
+ * until we're done. We know that the head doesn't cross a page
|
|
+ * boundary because such packets get copied in netif_be_start_xmit.
|
|
+ */
|
|
+ head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
|
|
+ virt_to_page(skb->data),
|
|
+ skb_headlen(skb),
|
|
+ offset_in_page(skb->data));
|
|
+
|
|
+ netif->rx.req_cons += nr_frags + extra;
|
|
+}
|
|
+
|
|
+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++)
|
|
+ put_page(meta[i].frag.page);
|
|
+}
|
|
+
|
|
+/* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
|
|
+ used to set up the operations on the top of
|
|
+ netrx_pending_operations, which have since been done. Check that
|
|
+ they didn't give any errors and advance over them. */
|
|
+static int netbk_check_gop(int nr_frags, domid_t domid, struct netrx_pending_operations *npo)
|
|
+{
|
|
+ multicall_entry_t *mcl;
|
|
+ gnttab_transfer_t *gop;
|
|
+ gnttab_copy_t *copy_op;
|
|
+ int status = NETIF_RSP_OKAY;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i <= nr_frags; i++) {
|
|
+ if (npo->meta[npo->meta_cons + i].copy) {
|
|
+ copy_op = npo->copy + npo->copy_cons++;
|
|
+ if (unlikely(copy_op->status == GNTST_eagain))
|
|
+ gnttab_check_GNTST_eagain_while(GNTTABOP_copy, copy_op);
|
|
+ if (unlikely(copy_op->status != GNTST_okay)) {
|
|
+ DPRINTK("Bad status %d from copy to DOM%d.\n",
|
|
+ copy_op->status, domid);
|
|
+ status = NETIF_RSP_ERROR;
|
|
+ }
|
|
+ } else {
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ mcl = npo->mcl + npo->mcl_cons++;
|
|
+ /* The update_va_mapping() must not fail. */
|
|
+ BUG_ON(mcl->result != 0);
|
|
+ }
|
|
+
|
|
+ gop = npo->trans + npo->trans_cons++;
|
|
+ /* Check the reassignment error code. */
|
|
+ if (unlikely(gop->status != GNTST_okay)) {
|
|
+ DPRINTK("Bad status %d from grant transfer to DOM%u\n",
|
|
+ gop->status, domid);
|
|
+ /*
|
|
+ * Page no longer belongs to us unless
|
|
+ * GNTST_bad_page, but that should be
|
|
+ * a fatal error anyway.
|
|
+ */
|
|
+ BUG_ON(gop->status == GNTST_bad_page);
|
|
+ status = NETIF_RSP_ERROR;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return status;
|
|
+}
|
|
+
|
|
+static void netbk_add_frag_responses(netif_t *netif, int status,
|
|
+ struct netbk_rx_meta *meta, int nr_frags)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long offset;
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++) {
|
|
+ int id = meta[i].id;
|
|
+ int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
|
|
+
|
|
+ if (meta[i].copy)
|
|
+ offset = 0;
|
|
+ else
|
|
+ offset = meta[i].frag.page_offset;
|
|
+ make_rx_response(netif, id, status, offset,
|
|
+ meta[i].frag.size, flags);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void net_rx_action(unsigned long unused)
|
|
+{
|
|
+ netif_t *netif = NULL;
|
|
+ s8 status;
|
|
+ u16 id, irq, flags;
|
|
+ netif_rx_response_t *resp;
|
|
+ multicall_entry_t *mcl;
|
|
+ struct sk_buff_head rxq;
|
|
+ struct sk_buff *skb;
|
|
+ int notify_nr = 0;
|
|
+ int ret;
|
|
+ int nr_frags;
|
|
+ int count;
|
|
+ unsigned long offset;
|
|
+
|
|
+ /*
|
|
+ * Putting hundreds of bytes on the stack is considered rude.
|
|
+ * Static works because a tasklet can only be on one CPU at any time.
|
|
+ */
|
|
+ static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
|
|
+ static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
|
|
+ static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
|
|
+ static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
|
|
+ static unsigned char rx_notify[NR_IRQS];
|
|
+ static u16 notify_list[NET_RX_RING_SIZE];
|
|
+ static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
|
|
+
|
|
+ struct netrx_pending_operations npo = {
|
|
+ mmu: rx_mmu,
|
|
+ trans: grant_trans_op,
|
|
+ copy: grant_copy_op,
|
|
+ mcl: rx_mcl,
|
|
+ meta: meta};
|
|
+
|
|
+ skb_queue_head_init(&rxq);
|
|
+
|
|
+ count = 0;
|
|
+
|
|
+ while ((skb = skb_dequeue(&rx_queue)) != NULL) {
|
|
+ nr_frags = skb_shinfo(skb)->nr_frags;
|
|
+ *(int *)skb->cb = nr_frags;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap) &&
|
|
+ !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
|
|
+ check_mfn(nr_frags + 1)) {
|
|
+ /* Memory squeeze? Back off for an arbitrary while. */
|
|
+ if ( net_ratelimit() )
|
|
+ WPRINTK("Memory squeeze in netback "
|
|
+ "driver.\n");
|
|
+ mod_timer(&net_timer, jiffies + HZ);
|
|
+ skb_queue_head(&rx_queue, skb);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ netbk_gop_skb(skb, &npo);
|
|
+
|
|
+ count += nr_frags + 1;
|
|
+
|
|
+ __skb_queue_tail(&rxq, skb);
|
|
+
|
|
+ /* Filled the batch queue? */
|
|
+ if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
|
|
+
|
|
+ npo.mmu_mcl = npo.mcl_prod;
|
|
+ if (npo.mcl_prod) {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+ BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
|
|
+ mcl = npo.mcl + npo.mcl_prod++;
|
|
+
|
|
+ BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
|
|
+ mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
|
|
+
|
|
+ mcl->op = __HYPERVISOR_mmu_update;
|
|
+ mcl->args[0] = (unsigned long)rx_mmu;
|
|
+ mcl->args[1] = npo.mmu_prod;
|
|
+ mcl->args[2] = 0;
|
|
+ mcl->args[3] = DOMID_SELF;
|
|
+ }
|
|
+
|
|
+ if (npo.trans_prod) {
|
|
+ BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
|
|
+ mcl = npo.mcl + npo.mcl_prod++;
|
|
+ mcl->op = __HYPERVISOR_grant_table_op;
|
|
+ mcl->args[0] = GNTTABOP_transfer;
|
|
+ mcl->args[1] = (unsigned long)grant_trans_op;
|
|
+ mcl->args[2] = npo.trans_prod;
|
|
+ }
|
|
+
|
|
+ if (npo.copy_prod) {
|
|
+ BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
|
|
+ mcl = npo.mcl + npo.mcl_prod++;
|
|
+ mcl->op = __HYPERVISOR_grant_table_op;
|
|
+ mcl->args[0] = GNTTABOP_copy;
|
|
+ mcl->args[1] = (unsigned long)grant_copy_op;
|
|
+ mcl->args[2] = npo.copy_prod;
|
|
+ }
|
|
+
|
|
+ /* Nothing to do? */
|
|
+ if (!npo.mcl_prod)
|
|
+ return;
|
|
+
|
|
+ BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
|
|
+
|
|
+ ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
|
|
+ BUG_ON(ret != 0);
|
|
+ /* The mmu_machphys_update() must not fail. */
|
|
+ BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
|
|
+
|
|
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
|
|
+ nr_frags = *(int *)skb->cb;
|
|
+
|
|
+ netif = netdev_priv(skb->dev);
|
|
+
|
|
+ status = netbk_check_gop(nr_frags, netif->domid, &npo);
|
|
+
|
|
+ /* We can't rely on skb_release_data to release the
|
|
+ pages used by fragments for us, since it tries to
|
|
+ touch the pages in the fraglist. If we're in
|
|
+ flipping mode, that doesn't work. In copying mode,
|
|
+ we still have access to all of the pages, and so
|
|
+ it's safe to let release_data deal with it. */
|
|
+ /* (Freeing the fragments is safe since we copy
|
|
+ non-linear skbs destined for flipping interfaces) */
|
|
+ if (!netif->copying_receiver) {
|
|
+ atomic_set(&(skb_shinfo(skb)->dataref), 1);
|
|
+ skb_shinfo(skb)->frag_list = NULL;
|
|
+ skb_shinfo(skb)->nr_frags = 0;
|
|
+ netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
|
|
+ }
|
|
+
|
|
+ netif->stats.tx_bytes += skb->len;
|
|
+ netif->stats.tx_packets++;
|
|
+
|
|
+ id = meta[npo.meta_cons].id;
|
|
+ flags = nr_frags ? NETRXF_more_data : 0;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
|
|
+ flags |= NETRXF_csum_blank | NETRXF_data_validated;
|
|
+ else if (skb->proto_data_valid) /* remote but checksummed? */
|
|
+ flags |= NETRXF_data_validated;
|
|
+
|
|
+ if (meta[npo.meta_cons].copy)
|
|
+ offset = 0;
|
|
+ else
|
|
+ offset = offset_in_page(skb->data);
|
|
+ resp = make_rx_response(netif, id, status, offset,
|
|
+ skb_headlen(skb), flags);
|
|
+
|
|
+ if (meta[npo.meta_cons].frag.size) {
|
|
+ struct netif_extra_info *gso =
|
|
+ (struct netif_extra_info *)
|
|
+ RING_GET_RESPONSE(&netif->rx,
|
|
+ netif->rx.rsp_prod_pvt++);
|
|
+
|
|
+ resp->flags |= NETRXF_extra_info;
|
|
+
|
|
+ gso->u.gso.size = meta[npo.meta_cons].frag.size;
|
|
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
|
|
+ gso->u.gso.pad = 0;
|
|
+ gso->u.gso.features = 0;
|
|
+
|
|
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
|
|
+ gso->flags = 0;
|
|
+ }
|
|
+
|
|
+ netbk_add_frag_responses(netif, status,
|
|
+ meta + npo.meta_cons + 1,
|
|
+ nr_frags);
|
|
+
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
|
|
+ irq = netif->irq;
|
|
+ if (ret && !rx_notify[irq]) {
|
|
+ rx_notify[irq] = 1;
|
|
+ notify_list[notify_nr++] = irq;
|
|
+ }
|
|
+
|
|
+ if (netif_queue_stopped(netif->dev) &&
|
|
+ netif_schedulable(netif) &&
|
|
+ !netbk_queue_full(netif))
|
|
+ netif_wake_queue(netif->dev);
|
|
+
|
|
+ netif_put(netif);
|
|
+ dev_kfree_skb(skb);
|
|
+
|
|
+ npo.meta_cons += nr_frags + 1;
|
|
+ }
|
|
+
|
|
+ while (notify_nr != 0) {
|
|
+ irq = notify_list[--notify_nr];
|
|
+ rx_notify[irq] = 0;
|
|
+ notify_remote_via_irq(irq);
|
|
+ }
|
|
+
|
|
+ /* More work to do? */
|
|
+ if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
|
|
+ tasklet_schedule(&net_rx_tasklet);
|
|
+#if 0
|
|
+ else
|
|
+ xen_network_done_notify();
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void net_alarm(unsigned long unused)
|
|
+{
|
|
+ tasklet_schedule(&net_rx_tasklet);
|
|
+}
|
|
+
|
|
+static void netbk_tx_pending_timeout(unsigned long unused)
|
|
+{
|
|
+ tasklet_schedule(&net_tx_tasklet);
|
|
+}
|
|
+
|
|
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
|
|
+{
|
|
+ netif_t *netif = netdev_priv(dev);
|
|
+ return &netif->stats;
|
|
+}
|
|
+
|
|
+static int __on_net_schedule_list(netif_t *netif)
|
|
+{
|
|
+ return netif->list.next != NULL;
|
|
+}
|
|
+
|
|
+/* Must be called with net_schedule_list_lock held. */
|
|
+static void remove_from_net_schedule_list(netif_t *netif)
|
|
+{
|
|
+ if (likely(__on_net_schedule_list(netif))) {
|
|
+ list_del(&netif->list);
|
|
+ netif->list.next = NULL;
|
|
+ netif_put(netif);
|
|
+ }
|
|
+}
|
|
+
|
|
+static netif_t *poll_net_schedule_list(void)
|
|
+{
|
|
+ netif_t *netif = NULL;
|
|
+
|
|
+ spin_lock_irq(&net_schedule_list_lock);
|
|
+ if (!list_empty(&net_schedule_list)) {
|
|
+ netif = list_first_entry(&net_schedule_list, netif_t, list);
|
|
+ netif_get(netif);
|
|
+ remove_from_net_schedule_list(netif);
|
|
+ }
|
|
+ spin_unlock_irq(&net_schedule_list_lock);
|
|
+ return netif;
|
|
+}
|
|
+
|
|
+static void add_to_net_schedule_list_tail(netif_t *netif)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (__on_net_schedule_list(netif))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irqsave(&net_schedule_list_lock, flags);
|
|
+ if (!__on_net_schedule_list(netif) &&
|
|
+ likely(netif_schedulable(netif))) {
|
|
+ list_add_tail(&netif->list, &net_schedule_list);
|
|
+ netif_get(netif);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&net_schedule_list_lock, flags);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
|
|
+ * If this driver is pipelining transmit requests then we can be very
|
|
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
|
|
+ * send a notification if there are no outstanding unreceived responses.
|
|
+ * If we may be buffer transmit buffers for any reason then we must be rather
|
|
+ * more conservative and treat this as the final check for pending work.
|
|
+ */
|
|
+void netif_schedule_work(netif_t *netif)
|
|
+{
|
|
+ int more_to_do;
|
|
+
|
|
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
|
|
+ more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
|
|
+#else
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
|
|
+#endif
|
|
+
|
|
+ if (more_to_do) {
|
|
+ add_to_net_schedule_list_tail(netif);
|
|
+ maybe_schedule_tx_action();
|
|
+ }
|
|
+}
|
|
+
|
|
+void netif_deschedule_work(netif_t *netif)
|
|
+{
|
|
+ spin_lock_irq(&net_schedule_list_lock);
|
|
+ remove_from_net_schedule_list(netif);
|
|
+ spin_unlock_irq(&net_schedule_list_lock);
|
|
+}
|
|
+
|
|
+
|
|
+static void tx_add_credit(netif_t *netif)
|
|
+{
|
|
+ unsigned long max_burst, max_credit;
|
|
+
|
|
+ /*
|
|
+ * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
|
|
+ * Otherwise the interface can seize up due to insufficient credit.
|
|
+ */
|
|
+ max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
|
|
+ max_burst = min(max_burst, 131072UL);
|
|
+ max_burst = max(max_burst, netif->credit_bytes);
|
|
+
|
|
+ /* Take care that adding a new chunk of credit doesn't wrap to zero. */
|
|
+ max_credit = netif->remaining_credit + netif->credit_bytes;
|
|
+ if (max_credit < netif->remaining_credit)
|
|
+ max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
|
|
+
|
|
+ netif->remaining_credit = min(max_credit, max_burst);
|
|
+}
|
|
+
|
|
+static void tx_credit_callback(unsigned long data)
|
|
+{
|
|
+ netif_t *netif = (netif_t *)data;
|
|
+ tx_add_credit(netif);
|
|
+ netif_schedule_work(netif);
|
|
+}
|
|
+
|
|
+static inline int copy_pending_req(PEND_RING_IDX pending_idx)
|
|
+{
|
|
+ return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
|
|
+ &mmap_pages[pending_idx]);
|
|
+}
|
|
+
|
|
+static void permute_dealloc_ring(PEND_RING_IDX dc, PEND_RING_IDX dp)
|
|
+{
|
|
+ static unsigned random_src = 0x12345678;
|
|
+ unsigned dst_offset;
|
|
+ PEND_RING_IDX dest;
|
|
+ u16 tmp;
|
|
+
|
|
+ while (dc != dp) {
|
|
+ dst_offset = (random_src / 256) % (dp - dc);
|
|
+ dest = dc + dst_offset;
|
|
+ tmp = dealloc_ring[MASK_PEND_IDX(dest)];
|
|
+ dealloc_ring[MASK_PEND_IDX(dest)] =
|
|
+ dealloc_ring[MASK_PEND_IDX(dc)];
|
|
+ dealloc_ring[MASK_PEND_IDX(dc)] = tmp;
|
|
+ dc++;
|
|
+ random_src *= 68389;
|
|
+ }
|
|
+}
|
|
+
|
|
+inline static void net_tx_action_dealloc(void)
|
|
+{
|
|
+ struct netbk_tx_pending_inuse *inuse, *n;
|
|
+ gnttab_unmap_grant_ref_t *gop;
|
|
+ u16 pending_idx;
|
|
+ PEND_RING_IDX dc, dp;
|
|
+ netif_t *netif;
|
|
+ LIST_HEAD(list);
|
|
+
|
|
+ dc = dealloc_cons;
|
|
+ gop = tx_unmap_ops;
|
|
+
|
|
+ /*
|
|
+ * Free up any grants we have finished using
|
|
+ */
|
|
+ do {
|
|
+ dp = dealloc_prod;
|
|
+
|
|
+ /* Ensure we see all indices enqueued by netif_idx_release(). */
|
|
+ smp_rmb();
|
|
+
|
|
+ if (MODPARM_permute_returns)
|
|
+ permute_dealloc_ring(dc, dp);
|
|
+
|
|
+ while (dc != dp) {
|
|
+ unsigned long pfn;
|
|
+
|
|
+ pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
|
|
+ list_move_tail(&pending_inuse[pending_idx].list, &list);
|
|
+
|
|
+ pfn = idx_to_pfn(pending_idx);
|
|
+ /* Already unmapped? */
|
|
+ if (!phys_to_machine_mapping_valid(pfn))
|
|
+ continue;
|
|
+
|
|
+ gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
|
|
+ GNTMAP_host_map,
|
|
+ grant_tx_handle[pending_idx]);
|
|
+ gop++;
|
|
+ }
|
|
+
|
|
+ } while (dp != dealloc_prod);
|
|
+
|
|
+ dealloc_cons = dc;
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
|
|
+ tx_unmap_ops, gop - tx_unmap_ops))
|
|
+ BUG();
|
|
+
|
|
+ /* Copy any entries that have been pending for too long. */
|
|
+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
|
|
+ !list_empty(&pending_inuse_head)) {
|
|
+ list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
|
|
+ if (time_after(inuse->alloc_time + HZ / 2, jiffies))
|
|
+ break;
|
|
+
|
|
+ pending_idx = inuse - pending_inuse;
|
|
+
|
|
+ pending_tx_info[pending_idx].netif->nr_copied_skbs++;
|
|
+
|
|
+ switch (copy_pending_req(pending_idx)) {
|
|
+ case 0:
|
|
+ list_move_tail(&inuse->list, &list);
|
|
+ continue;
|
|
+ case -EBUSY:
|
|
+ list_del_init(&inuse->list);
|
|
+ continue;
|
|
+ case -ENOENT:
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(inuse, n, &list, list) {
|
|
+ pending_idx = inuse - pending_inuse;
|
|
+
|
|
+ netif = pending_tx_info[pending_idx].netif;
|
|
+
|
|
+ make_tx_response(netif, &pending_tx_info[pending_idx].req,
|
|
+ NETIF_RSP_OKAY);
|
|
+
|
|
+ /* Ready for next use. */
|
|
+ gnttab_reset_grant_page(mmap_pages[pending_idx]);
|
|
+
|
|
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
|
|
+
|
|
+ netif_put(netif);
|
|
+
|
|
+ list_del_init(&inuse->list);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
|
|
+{
|
|
+ RING_IDX cons = netif->tx.req_cons;
|
|
+
|
|
+ do {
|
|
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
|
|
+ if (cons >= end)
|
|
+ break;
|
|
+ txp = RING_GET_REQUEST(&netif->tx, cons++);
|
|
+ } while (1);
|
|
+ netif->tx.req_cons = cons;
|
|
+ netif_schedule_work(netif);
|
|
+ netif_put(netif);
|
|
+}
|
|
+
|
|
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
|
|
+ netif_tx_request_t *txp, int work_to_do)
|
|
+{
|
|
+ RING_IDX cons = netif->tx.req_cons;
|
|
+ int frags = 0;
|
|
+
|
|
+ if (!(first->flags & NETTXF_more_data))
|
|
+ return 0;
|
|
+
|
|
+ do {
|
|
+ if (frags >= work_to_do) {
|
|
+ DPRINTK("Need more frags\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ if (unlikely(frags >= MAX_SKB_FRAGS)) {
|
|
+ DPRINTK("Too many frags\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
|
|
+ sizeof(*txp));
|
|
+ if (txp->size > first->size) {
|
|
+ DPRINTK("Frags galore\n");
|
|
+ return -frags;
|
|
+ }
|
|
+
|
|
+ first->size -= txp->size;
|
|
+ frags++;
|
|
+
|
|
+ if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
|
|
+ DPRINTK("txp->offset: %x, size: %u\n",
|
|
+ txp->offset, txp->size);
|
|
+ return -frags;
|
|
+ }
|
|
+ } while ((txp++)->flags & NETTXF_more_data);
|
|
+
|
|
+ return frags;
|
|
+}
|
|
+
|
|
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
|
|
+ struct sk_buff *skb,
|
|
+ netif_tx_request_t *txp,
|
|
+ gnttab_map_grant_ref_t *mop)
|
|
+{
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ skb_frag_t *frags = shinfo->frags;
|
|
+ unsigned long pending_idx = *((u16 *)skb->data);
|
|
+ int i, start;
|
|
+
|
|
+ /* Skip first skb fragment if it is on same page as header fragment. */
|
|
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
|
|
+
|
|
+ for (i = start; i < shinfo->nr_frags; i++, txp++) {
|
|
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
|
|
+
|
|
+ gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
|
|
+ GNTMAP_host_map | GNTMAP_readonly,
|
|
+ txp->gref, netif->domid);
|
|
+
|
|
+ memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
|
|
+ netif_get(netif);
|
|
+ pending_tx_info[pending_idx].netif = netif;
|
|
+ frags[i].page = (void *)pending_idx;
|
|
+ }
|
|
+
|
|
+ return mop;
|
|
+}
|
|
+
|
|
+static int netbk_tx_check_mop(struct sk_buff *skb,
|
|
+ gnttab_map_grant_ref_t **mopp)
|
|
+{
|
|
+ gnttab_map_grant_ref_t *mop = *mopp;
|
|
+ int pending_idx = *((u16 *)skb->data);
|
|
+ netif_t *netif = pending_tx_info[pending_idx].netif;
|
|
+ netif_tx_request_t *txp;
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ int nr_frags = shinfo->nr_frags;
|
|
+ int i, err, start;
|
|
+
|
|
+ /* Check status of header. */
|
|
+ err = mop->status;
|
|
+ if (unlikely(err != GNTST_okay)) {
|
|
+ txp = &pending_tx_info[pending_idx].req;
|
|
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
|
|
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
|
|
+ netif_put(netif);
|
|
+ } else {
|
|
+ set_phys_to_machine(idx_to_pfn(pending_idx),
|
|
+ FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
|
|
+ grant_tx_handle[pending_idx] = mop->handle;
|
|
+ }
|
|
+
|
|
+ /* Skip first skb fragment if it is on same page as header fragment. */
|
|
+ start = ((unsigned long)shinfo->frags[0].page == pending_idx);
|
|
+
|
|
+ for (i = start; i < nr_frags; i++) {
|
|
+ int j, newerr;
|
|
+
|
|
+ pending_idx = (unsigned long)shinfo->frags[i].page;
|
|
+
|
|
+ /* Check error status: if okay then remember grant handle. */
|
|
+ newerr = (++mop)->status;
|
|
+ if (likely(newerr == GNTST_okay)) {
|
|
+ set_phys_to_machine(idx_to_pfn(pending_idx),
|
|
+ FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
|
|
+ grant_tx_handle[pending_idx] = mop->handle;
|
|
+ /* Had a previous error? Invalidate this fragment. */
|
|
+ if (unlikely(err != GNTST_okay))
|
|
+ netif_idx_release(pending_idx);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* Error on this fragment: respond to client with an error. */
|
|
+ txp = &pending_tx_info[pending_idx].req;
|
|
+ make_tx_response(netif, txp, NETIF_RSP_ERROR);
|
|
+ pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
|
|
+ netif_put(netif);
|
|
+
|
|
+ /* Not the first error? Preceding frags already invalidated. */
|
|
+ if (err != GNTST_okay)
|
|
+ continue;
|
|
+
|
|
+ /* First error: invalidate header and preceding fragments. */
|
|
+ pending_idx = *((u16 *)skb->data);
|
|
+ netif_idx_release(pending_idx);
|
|
+ for (j = start; j < i; j++) {
|
|
+ pending_idx = (unsigned long)shinfo->frags[i].page;
|
|
+ netif_idx_release(pending_idx);
|
|
+ }
|
|
+
|
|
+ /* Remember the error: invalidate all subsequent fragments. */
|
|
+ err = newerr;
|
|
+ }
|
|
+
|
|
+ *mopp = mop + 1;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void netbk_fill_frags(struct sk_buff *skb)
|
|
+{
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ int nr_frags = shinfo->nr_frags;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < nr_frags; i++) {
|
|
+ skb_frag_t *frag = shinfo->frags + i;
|
|
+ netif_tx_request_t *txp;
|
|
+ unsigned long pending_idx;
|
|
+
|
|
+ pending_idx = (unsigned long)frag->page;
|
|
+
|
|
+ pending_inuse[pending_idx].alloc_time = jiffies;
|
|
+ list_add_tail(&pending_inuse[pending_idx].list,
|
|
+ &pending_inuse_head);
|
|
+
|
|
+ txp = &pending_tx_info[pending_idx].req;
|
|
+ frag->page = mmap_pages[pending_idx];
|
|
+ frag->size = txp->size;
|
|
+ frag->page_offset = txp->offset;
|
|
+
|
|
+ skb->len += txp->size;
|
|
+ skb->data_len += txp->size;
|
|
+ skb->truesize += txp->size;
|
|
+ }
|
|
+}
|
|
+
|
|
+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
|
|
+ int work_to_do)
|
|
+{
|
|
+ struct netif_extra_info extra;
|
|
+ RING_IDX cons = netif->tx.req_cons;
|
|
+
|
|
+ do {
|
|
+ if (unlikely(work_to_do-- <= 0)) {
|
|
+ DPRINTK("Missing extra info\n");
|
|
+ return -EBADR;
|
|
+ }
|
|
+
|
|
+ memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
|
|
+ sizeof(extra));
|
|
+ if (unlikely(!extra.type ||
|
|
+ extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
|
|
+ netif->tx.req_cons = ++cons;
|
|
+ DPRINTK("Invalid extra type: %d\n", extra.type);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
|
|
+ netif->tx.req_cons = ++cons;
|
|
+ } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
|
|
+
|
|
+ return work_to_do;
|
|
+}
|
|
+
|
|
+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
|
|
+{
|
|
+ if (!gso->u.gso.size) {
|
|
+ DPRINTK("GSO size must not be zero.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Currently only TCPv4 S.O. is supported. */
|
|
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
|
|
+ DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
|
|
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
|
|
+
|
|
+ /* Header must be checked, and gso_segs computed. */
|
|
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
|
|
+ skb_shinfo(skb)->gso_segs = 0;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Called after netfront has transmitted */
|
|
+static void net_tx_action(unsigned long unused)
|
|
+{
|
|
+ struct sk_buff *skb;
|
|
+ netif_t *netif;
|
|
+ netif_tx_request_t txreq;
|
|
+ netif_tx_request_t txfrags[MAX_SKB_FRAGS];
|
|
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
|
|
+ u16 pending_idx;
|
|
+ RING_IDX i;
|
|
+ gnttab_map_grant_ref_t *mop;
|
|
+ unsigned int data_len;
|
|
+ int ret, work_to_do;
|
|
+
|
|
+ net_tx_action_dealloc();
|
|
+
|
|
+ mop = tx_map_ops;
|
|
+ while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
|
|
+ !list_empty(&net_schedule_list)) {
|
|
+ /* Get a netif from the list with work to do. */
|
|
+ netif = poll_net_schedule_list();
|
|
+ if (!netif)
|
|
+ continue;
|
|
+
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
|
|
+ if (!work_to_do) {
|
|
+ netif_put(netif);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ i = netif->tx.req_cons;
|
|
+ rmb(); /* Ensure that we see the request before we copy it. */
|
|
+ memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
|
|
+
|
|
+ /* Credit-based scheduling. */
|
|
+ if (txreq.size > netif->remaining_credit) {
|
|
+ unsigned long now = jiffies;
|
|
+ unsigned long next_credit =
|
|
+ netif->credit_timeout.expires +
|
|
+ msecs_to_jiffies(netif->credit_usec / 1000);
|
|
+
|
|
+ /* Timer could already be pending in rare cases. */
|
|
+ if (timer_pending(&netif->credit_timeout)) {
|
|
+ netif_put(netif);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* Passed the point where we can replenish credit? */
|
|
+ if (time_after_eq(now, next_credit)) {
|
|
+ netif->credit_timeout.expires = now;
|
|
+ tx_add_credit(netif);
|
|
+ }
|
|
+
|
|
+ /* Still too big to send right now? Set a callback. */
|
|
+ if (txreq.size > netif->remaining_credit) {
|
|
+ netif->credit_timeout.data =
|
|
+ (unsigned long)netif;
|
|
+ netif->credit_timeout.function =
|
|
+ tx_credit_callback;
|
|
+ __mod_timer(&netif->credit_timeout,
|
|
+ next_credit);
|
|
+ netif_put(netif);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+ netif->remaining_credit -= txreq.size;
|
|
+
|
|
+ work_to_do--;
|
|
+ netif->tx.req_cons = ++i;
|
|
+
|
|
+ memset(extras, 0, sizeof(extras));
|
|
+ if (txreq.flags & NETTXF_extra_info) {
|
|
+ work_to_do = netbk_get_extras(netif, extras,
|
|
+ work_to_do);
|
|
+ i = netif->tx.req_cons;
|
|
+ if (unlikely(work_to_do < 0)) {
|
|
+ netbk_tx_err(netif, &txreq, i);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
|
|
+ if (unlikely(ret < 0)) {
|
|
+ netbk_tx_err(netif, &txreq, i - ret);
|
|
+ continue;
|
|
+ }
|
|
+ i += ret;
|
|
+
|
|
+ if (unlikely(txreq.size < ETH_HLEN)) {
|
|
+ DPRINTK("Bad packet size: %d\n", txreq.size);
|
|
+ netbk_tx_err(netif, &txreq, i);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* No crossing a page as the payload mustn't fragment. */
|
|
+ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
|
|
+ DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
|
|
+ txreq.offset, txreq.size,
|
|
+ (txreq.offset &~PAGE_MASK) + txreq.size);
|
|
+ netbk_tx_err(netif, &txreq, i);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
|
|
+
|
|
+ data_len = (txreq.size > PKT_PROT_LEN &&
|
|
+ ret < MAX_SKB_FRAGS) ?
|
|
+ PKT_PROT_LEN : txreq.size;
|
|
+
|
|
+ skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
|
|
+ GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (unlikely(skb == NULL)) {
|
|
+ DPRINTK("Can't allocate a skb in start_xmit.\n");
|
|
+ netbk_tx_err(netif, &txreq, i);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Packets passed to netif_rx() must have some headroom. */
|
|
+ skb_reserve(skb, 16 + NET_IP_ALIGN);
|
|
+
|
|
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
|
|
+ struct netif_extra_info *gso;
|
|
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
|
|
+
|
|
+ if (netbk_set_skb_gso(skb, gso)) {
|
|
+ kfree_skb(skb);
|
|
+ netbk_tx_err(netif, &txreq, i);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
|
|
+ GNTMAP_host_map | GNTMAP_readonly,
|
|
+ txreq.gref, netif->domid);
|
|
+ mop++;
|
|
+
|
|
+ memcpy(&pending_tx_info[pending_idx].req,
|
|
+ &txreq, sizeof(txreq));
|
|
+ pending_tx_info[pending_idx].netif = netif;
|
|
+ *((u16 *)skb->data) = pending_idx;
|
|
+
|
|
+ __skb_put(skb, data_len);
|
|
+
|
|
+ skb_shinfo(skb)->nr_frags = ret;
|
|
+ if (data_len < txreq.size) {
|
|
+ skb_shinfo(skb)->nr_frags++;
|
|
+ skb_shinfo(skb)->frags[0].page =
|
|
+ (void *)(unsigned long)pending_idx;
|
|
+ } else {
|
|
+ /* Discriminate from any valid pending_idx value. */
|
|
+ skb_shinfo(skb)->frags[0].page = (void *)~0UL;
|
|
+ }
|
|
+
|
|
+ __skb_queue_tail(&tx_queue, skb);
|
|
+
|
|
+ pending_cons++;
|
|
+
|
|
+ mop = netbk_get_requests(netif, skb, txfrags, mop);
|
|
+
|
|
+ netif->tx.req_cons = i;
|
|
+ netif_schedule_work(netif);
|
|
+
|
|
+ if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (mop == tx_map_ops)
|
|
+ goto out;
|
|
+
|
|
+ /* NOTE: some maps may fail with GNTST_eagain, which could be successfully
|
|
+ * retried in the backend after a delay. However, we can also fail the tx
|
|
+ * req and let the frontend resend the relevant packet again. This is fine
|
|
+ * because it is unlikely that a network buffer will be paged out or shared,
|
|
+ * and therefore it is unlikely to fail with GNTST_eagain. */
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ mop = tx_map_ops;
|
|
+ while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
|
|
+ netif_tx_request_t *txp;
|
|
+
|
|
+ pending_idx = *((u16 *)skb->data);
|
|
+ netif = pending_tx_info[pending_idx].netif;
|
|
+ txp = &pending_tx_info[pending_idx].req;
|
|
+
|
|
+ /* Check the remap error code. */
|
|
+ if (unlikely(netbk_tx_check_mop(skb, &mop))) {
|
|
+ DPRINTK("netback grant failed.\n");
|
|
+ skb_shinfo(skb)->nr_frags = 0;
|
|
+ kfree_skb(skb);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ data_len = skb->len;
|
|
+ memcpy(skb->data,
|
|
+ (void *)(idx_to_kaddr(pending_idx)|txp->offset),
|
|
+ data_len);
|
|
+ if (data_len < txp->size) {
|
|
+ /* Append the packet payload as a fragment. */
|
|
+ txp->offset += data_len;
|
|
+ txp->size -= data_len;
|
|
+ } else {
|
|
+ /* Schedule a response immediately. */
|
|
+ netif_idx_release(pending_idx);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Old frontends do not assert data_validated but we
|
|
+ * can infer it from csum_blank so test both flags.
|
|
+ */
|
|
+ if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
|
|
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
+ skb->proto_data_valid = 1;
|
|
+ } else {
|
|
+ skb->ip_summed = CHECKSUM_NONE;
|
|
+ skb->proto_data_valid = 0;
|
|
+ }
|
|
+ skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
|
|
+
|
|
+ netbk_fill_frags(skb);
|
|
+
|
|
+ /*
|
|
+ * If the initial fragment was < PKT_PROT_LEN then
|
|
+ * pull through some bytes from the other fragments to
|
|
+ * increase the linear region to PKT_PROT_LEN bytes.
|
|
+ */
|
|
+ if (skb_headlen(skb) < PKT_PROT_LEN && skb_is_nonlinear(skb)) {
|
|
+ int target = min_t(int, skb->len, PKT_PROT_LEN);
|
|
+ __pskb_pull_tail(skb, target - skb_headlen(skb));
|
|
+ }
|
|
+
|
|
+ skb->dev = netif->dev;
|
|
+ skb->protocol = eth_type_trans(skb, skb->dev);
|
|
+
|
|
+ netif->stats.rx_bytes += skb->len;
|
|
+ netif->stats.rx_packets++;
|
|
+
|
|
+ if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
|
|
+ unlikely(skb_linearize(skb))) {
|
|
+ DPRINTK("Can't linearize skb in net_tx_action.\n");
|
|
+ kfree_skb(skb);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ netif_rx(skb);
|
|
+ netif->dev->last_rx = jiffies;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
|
|
+ !list_empty(&pending_inuse_head)) {
|
|
+ struct netbk_tx_pending_inuse *oldest;
|
|
+
|
|
+ oldest = list_entry(pending_inuse_head.next,
|
|
+ struct netbk_tx_pending_inuse, list);
|
|
+ mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void netif_idx_release(u16 pending_idx)
|
|
+{
|
|
+ static DEFINE_SPINLOCK(_lock);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&_lock, flags);
|
|
+ dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
|
|
+ /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
|
|
+ smp_wmb();
|
|
+ dealloc_prod++;
|
|
+ spin_unlock_irqrestore(&_lock, flags);
|
|
+
|
|
+ tasklet_schedule(&net_tx_tasklet);
|
|
+}
|
|
+
|
|
+static void netif_page_release(struct page *page, unsigned int order)
|
|
+{
|
|
+ int idx = netif_page_index(page);
|
|
+ BUG_ON(order);
|
|
+ BUG_ON(idx < 0);
|
|
+ netif_idx_release(idx);
|
|
+}
|
|
+
|
|
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ netif_t *netif = dev_id;
|
|
+
|
|
+ add_to_net_schedule_list_tail(netif);
|
|
+ maybe_schedule_tx_action();
|
|
+
|
|
+ if (netif_schedulable(netif) && !netbk_queue_full(netif))
|
|
+ netif_wake_queue(netif->dev);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static void make_tx_response(netif_t *netif,
|
|
+ netif_tx_request_t *txp,
|
|
+ s8 st)
|
|
+{
|
|
+ RING_IDX i = netif->tx.rsp_prod_pvt;
|
|
+ netif_tx_response_t *resp;
|
|
+ int notify;
|
|
+
|
|
+ resp = RING_GET_RESPONSE(&netif->tx, i);
|
|
+ resp->id = txp->id;
|
|
+ resp->status = st;
|
|
+
|
|
+ if (txp->flags & NETTXF_extra_info)
|
|
+ RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
|
|
+
|
|
+ netif->tx.rsp_prod_pvt = ++i;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(netif->irq);
|
|
+
|
|
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
|
|
+ if (i == netif->tx.req_cons) {
|
|
+ int more_to_do;
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
|
|
+ if (more_to_do)
|
|
+ add_to_net_schedule_list_tail(netif);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
+static netif_rx_response_t *make_rx_response(netif_t *netif,
|
|
+ u16 id,
|
|
+ s8 st,
|
|
+ u16 offset,
|
|
+ u16 size,
|
|
+ u16 flags)
|
|
+{
|
|
+ RING_IDX i = netif->rx.rsp_prod_pvt;
|
|
+ netif_rx_response_t *resp;
|
|
+
|
|
+ resp = RING_GET_RESPONSE(&netif->rx, i);
|
|
+ resp->offset = offset;
|
|
+ resp->flags = flags;
|
|
+ resp->id = id;
|
|
+ resp->status = (s16)size;
|
|
+ if (st < 0)
|
|
+ resp->status = (s16)st;
|
|
+
|
|
+ netif->rx.rsp_prod_pvt = ++i;
|
|
+
|
|
+ return resp;
|
|
+}
|
|
+
|
|
+#ifdef NETBE_DEBUG_INTERRUPT
|
|
+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ struct list_head *ent;
|
|
+ netif_t *netif;
|
|
+ int i = 0;
|
|
+
|
|
+ printk(KERN_ALERT "netif_schedule_list:\n");
|
|
+ spin_lock_irq(&net_schedule_list_lock);
|
|
+
|
|
+ list_for_each (ent, &net_schedule_list) {
|
|
+ netif = list_entry(ent, netif_t, list);
|
|
+ printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
|
|
+ "rx_resp_prod=%08x\n",
|
|
+ i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
|
|
+ printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
|
|
+ netif->tx.req_cons, netif->tx.rsp_prod_pvt);
|
|
+ printk(KERN_ALERT " shared(rx_req_prod=%08x "
|
|
+ "rx_resp_prod=%08x\n",
|
|
+ netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
|
|
+ printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
|
|
+ netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
|
|
+ printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
|
|
+ netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irq(&net_schedule_list_lock);
|
|
+ printk(KERN_ALERT " ** End of netif_schedule_list **\n");
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int __init netback_init(void)
|
|
+{
|
|
+ int i;
|
|
+ struct page *page;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ /* We can increase reservation by this much in net_rx_action(). */
|
|
+ balloon_update_driver_allowance(NET_RX_RING_SIZE);
|
|
+
|
|
+ skb_queue_head_init(&rx_queue);
|
|
+ skb_queue_head_init(&tx_queue);
|
|
+
|
|
+ init_timer(&net_timer);
|
|
+ net_timer.data = 0;
|
|
+ net_timer.function = net_alarm;
|
|
+
|
|
+ init_timer(&netbk_tx_pending_timer);
|
|
+ netbk_tx_pending_timer.data = 0;
|
|
+ netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
|
|
+
|
|
+ mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
|
|
+ if (mmap_pages == NULL) {
|
|
+ printk("%s: out of memory\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
|
|
+ page = mmap_pages[i];
|
|
+ SetPageForeign(page, netif_page_release);
|
|
+ netif_set_page_index(page, i);
|
|
+ INIT_LIST_HEAD(&pending_inuse[i].list);
|
|
+ }
|
|
+
|
|
+ pending_cons = 0;
|
|
+ pending_prod = MAX_PENDING_REQS;
|
|
+ for (i = 0; i < MAX_PENDING_REQS; i++)
|
|
+ pending_ring[i] = i;
|
|
+
|
|
+ spin_lock_init(&net_schedule_list_lock);
|
|
+ INIT_LIST_HEAD(&net_schedule_list);
|
|
+
|
|
+ netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
|
|
+ if (MODPARM_copy_skb) {
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
|
|
+ NULL, 0))
|
|
+ netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
|
|
+ else
|
|
+ netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
|
|
+ }
|
|
+
|
|
+ netif_accel_init();
|
|
+
|
|
+ netif_xenbus_init();
|
|
+
|
|
+#ifdef NETBE_DEBUG_INTERRUPT
|
|
+ (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
|
|
+ 0,
|
|
+ netif_be_dbg,
|
|
+ SA_SHIRQ,
|
|
+ "net-be-dbg",
|
|
+ &netif_be_dbg);
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+module_init(netback_init);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netback/xenbus.c 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,455 @@
|
|
+/* Xenbus code for netif backend
|
|
+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+ Copyright (C) 2005 XenSource Ltd
|
|
+
|
|
+ This program is free software; you can redistribute it and/or modify
|
|
+ it under the terms of the GNU General Public License as published by
|
|
+ the Free Software Foundation; either version 2 of the License, or
|
|
+ (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ GNU General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+*/
|
|
+
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include "common.h"
|
|
+
|
|
+#if 0
|
|
+#undef DPRINTK
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
|
|
+#endif
|
|
+
|
|
+
|
|
+static int connect_rings(struct backend_info *);
|
|
+static void connect(struct backend_info *);
|
|
+static void backend_create_netif(struct backend_info *be);
|
|
+static void netback_disconnect(struct device *);
|
|
+
|
|
+static int netback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ netback_remove_accelerators(be, dev);
|
|
+
|
|
+ netback_disconnect(&dev->dev);
|
|
+ kfree(be);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void netback_disconnect(struct device *xbdev_dev)
|
|
+{
|
|
+ struct backend_info *be = xbdev_dev->driver_data;
|
|
+
|
|
+ if (be->netif) {
|
|
+ kobject_uevent(&xbdev_dev->kobj, KOBJ_OFFLINE);
|
|
+ netif_disconnect(be->netif);
|
|
+ be->netif = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures and switch to InitWait.
|
|
+ */
|
|
+static int netback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ const char *message;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ int sg;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ be->dev = dev;
|
|
+ dev->dev.driver_data = be;
|
|
+
|
|
+ sg = 1;
|
|
+ if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
|
|
+ sg = 0;
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
|
|
+ if (err) {
|
|
+ message = "writing feature-sg";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
|
|
+ "%d", sg);
|
|
+ if (err) {
|
|
+ message = "writing feature-gso-tcpv4";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ /* We support rx-copy path. */
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "feature-rx-copy", "%d", 1);
|
|
+ if (err) {
|
|
+ message = "writing feature-rx-copy";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We don't support rx-flip path (except old guests who don't
|
|
+ * grok this feature flag).
|
|
+ */
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "feature-rx-flip", "%d", 0);
|
|
+ if (err) {
|
|
+ message = "writing feature-rx-flip";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ netback_probe_accelerators(be, dev);
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ /* This kicks hotplug scripts, so do it immediately. */
|
|
+ backend_create_netif(be);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+abort_transaction:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, err, "%s", message);
|
|
+fail:
|
|
+ DPRINTK("failed");
|
|
+ netback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Handle the creation of the hotplug script environment. We add the script
|
|
+ * and vif variables to the environment, for the benefit of the vif-* hotplug
|
|
+ * scripts.
|
|
+ */
|
|
+static int netback_uevent(struct xenbus_device *xdev, char **envp,
|
|
+ int num_envp, char *buffer, int buffer_size)
|
|
+{
|
|
+ struct backend_info *be = xdev->dev.driver_data;
|
|
+ netif_t *netif = be->netif;
|
|
+ int i = 0, length = 0;
|
|
+ char *val;
|
|
+
|
|
+ DPRINTK("netback_uevent");
|
|
+
|
|
+ val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
|
|
+ if (IS_ERR(val)) {
|
|
+ int err = PTR_ERR(val);
|
|
+ xenbus_dev_fatal(xdev, err, "reading script");
|
|
+ return err;
|
|
+ }
|
|
+ else {
|
|
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
|
|
+ &length, "script=%s", val);
|
|
+ kfree(val);
|
|
+ }
|
|
+
|
|
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
|
|
+ "vif=%s", netif->dev->name);
|
|
+
|
|
+ envp[i] = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void backend_create_netif(struct backend_info *be)
|
|
+{
|
|
+ int err;
|
|
+ long handle;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ if (be->netif != NULL)
|
|
+ return;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading handle");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
|
|
+ if (IS_ERR(be->netif)) {
|
|
+ err = PTR_ERR(be->netif);
|
|
+ be->netif = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating interface");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * Callback received when the frontend's state changes.
|
|
+ */
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("%s", xenbus_strstate(frontend_state));
|
|
+
|
|
+ be->frontend_state = frontend_state;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk(KERN_INFO "%s: %s: prepare for reconnect\n",
|
|
+ __FUNCTION__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ /* backend_create_netif() is idempotent */
|
|
+ backend_create_netif(be);
|
|
+ if (be->netif)
|
|
+ connect(be);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ netback_disconnect(&dev->dev);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ /* implies netback_disconnect() via netback_remove() */
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void xen_net_read_rate(struct xenbus_device *dev,
|
|
+ unsigned long *bytes, unsigned long *usec)
|
|
+{
|
|
+ char *s, *e;
|
|
+ unsigned long b, u;
|
|
+ char *ratestr;
|
|
+
|
|
+ /* Default to unlimited bandwidth. */
|
|
+ *bytes = ~0UL;
|
|
+ *usec = 0;
|
|
+
|
|
+ ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
|
|
+ if (IS_ERR(ratestr))
|
|
+ return;
|
|
+
|
|
+ s = ratestr;
|
|
+ b = simple_strtoul(s, &e, 10);
|
|
+ if ((s == e) || (*e != ','))
|
|
+ goto fail;
|
|
+
|
|
+ s = e + 1;
|
|
+ u = simple_strtoul(s, &e, 10);
|
|
+ if ((s == e) || (*e != '\0'))
|
|
+ goto fail;
|
|
+
|
|
+ *bytes = b;
|
|
+ *usec = u;
|
|
+
|
|
+ kfree(ratestr);
|
|
+ return;
|
|
+
|
|
+ fail:
|
|
+ WPRINTK("Failed to parse network rate limit. Traffic unlimited.\n");
|
|
+ kfree(ratestr);
|
|
+}
|
|
+
|
|
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
|
|
+{
|
|
+ char *s, *e, *macstr;
|
|
+ int i;
|
|
+
|
|
+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
|
|
+ if (IS_ERR(macstr))
|
|
+ return PTR_ERR(macstr);
|
|
+
|
|
+ for (i = 0; i < ETH_ALEN; i++) {
|
|
+ mac[i] = simple_strtoul(s, &e, 16);
|
|
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
|
|
+ kfree(macstr);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+ s = e+1;
|
|
+ }
|
|
+
|
|
+ kfree(macstr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ int err;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ err = connect_rings(be);
|
|
+ if (err)
|
|
+ return;
|
|
+
|
|
+ err = xen_net_read_mac(dev, be->netif->fe_dev_addr);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ xen_net_read_rate(dev, &be->netif->credit_bytes,
|
|
+ &be->netif->credit_usec);
|
|
+ be->netif->remaining_credit = be->netif->credit_bytes;
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+
|
|
+ netif_wake_queue(be->netif->dev);
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_rings(struct backend_info *be)
|
|
+{
|
|
+ netif_t *netif = be->netif;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long tx_ring_ref, rx_ring_ref;
|
|
+ unsigned int evtchn, rx_copy;
|
|
+ int err;
|
|
+ int val;
|
|
+
|
|
+ DPRINTK("");
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "tx-ring-ref", "%lu", &tx_ring_ref,
|
|
+ "rx-ring-ref", "%lu", &rx_ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
|
|
+ &rx_copy);
|
|
+ if (err == -ENOENT) {
|
|
+ err = 0;
|
|
+ rx_copy = 0;
|
|
+ }
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+ netif->copying_receiver = !!rx_copy;
|
|
+
|
|
+ if (netif->dev->tx_queue_len != 0) {
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend,
|
|
+ "feature-rx-notify", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ if (val)
|
|
+ netif->can_queue = 1;
|
|
+ else
|
|
+ /* Must be non-zero for pfifo_fast to work. */
|
|
+ netif->dev->tx_queue_len = 1;
|
|
+ }
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ netif->can_sg = !!val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4", "%d",
|
|
+ &val) < 0)
|
|
+ val = 0;
|
|
+ netif->gso = !!val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ netif->csum = !val;
|
|
+
|
|
+ /* Set dev->features */
|
|
+ netif_set_features(netif);
|
|
+
|
|
+ /* Map the shared frame, irq etc. */
|
|
+ err = netif_map(netif, tx_ring_ref, rx_ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "mapping shared-frames %lu/%lu port %u",
|
|
+ tx_ring_ref, rx_ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver Registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id netback_ids[] = {
|
|
+ { "vif" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver netback = {
|
|
+ .name = "vif",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = netback_ids,
|
|
+ .probe = netback_probe,
|
|
+ .remove = netback_remove,
|
|
+ .uevent = netback_uevent,
|
|
+ .otherend_changed = frontend_changed,
|
|
+};
|
|
+
|
|
+
|
|
+void netif_xenbus_init(void)
|
|
+{
|
|
+ xenbus_register_backend(&netback);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netfront/Makefile 2007-07-12 08:54:23.000000000 +0200
|
|
@@ -0,0 +1,4 @@
|
|
+
|
|
+obj-$(CONFIG_XEN_NETDEV_FRONTEND) := xennet.o
|
|
+
|
|
+xennet-objs := netfront.o accel.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netfront/accel.c 2009-05-04 10:01:03.000000000 +0200
|
|
@@ -0,0 +1,827 @@
|
|
+/******************************************************************************
|
|
+ * Virtual network driver for conversing with remote driver backends.
|
|
+ *
|
|
+ * Copyright (C) 2007 Solarflare Communications, Inc.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#include "netfront.h"
|
|
+
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ pr_debug("netfront/accel (%s:%d) " fmt, \
|
|
+ __FUNCTION__, __LINE__, ##args)
|
|
+#define IPRINTK(fmt, args...) \
|
|
+ printk(KERN_INFO "netfront/accel: " fmt, ##args)
|
|
+#define WPRINTK(fmt, args...) \
|
|
+ printk(KERN_WARNING "netfront/accel: " fmt, ##args)
|
|
+
|
|
+static int netfront_remove_accelerator(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+static int netfront_load_accelerator(struct netfront_info *np,
|
|
+ struct xenbus_device *dev,
|
|
+ const char *frontend);
|
|
+
|
|
+static void netfront_accelerator_remove_watch(struct netfront_info *np);
|
|
+
|
|
+/*
|
|
+ * List of all netfront accelerator plugin modules available. Each
|
|
+ * list entry is of type struct netfront_accelerator.
|
|
+ */
|
|
+static struct list_head accelerators_list;
|
|
+
|
|
+/* Workqueue to process acceleration configuration changes */
|
|
+struct workqueue_struct *accel_watch_workqueue;
|
|
+
|
|
+/* Mutex to prevent concurrent loads and suspends, etc. */
|
|
+DEFINE_MUTEX(accelerator_mutex);
|
|
+
|
|
+void netif_init_accel(void)
|
|
+{
|
|
+ INIT_LIST_HEAD(&accelerators_list);
|
|
+
|
|
+ accel_watch_workqueue = create_workqueue("net_accel");
|
|
+}
|
|
+
|
|
+void netif_exit_accel(void)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator, *tmp;
|
|
+
|
|
+ flush_workqueue(accel_watch_workqueue);
|
|
+ destroy_workqueue(accel_watch_workqueue);
|
|
+
|
|
+ /* No lock required as everything else should be quiet by now */
|
|
+ list_for_each_entry_safe(accelerator, tmp, &accelerators_list, link) {
|
|
+ BUG_ON(!list_empty(&accelerator->vif_states));
|
|
+
|
|
+ list_del(&accelerator->link);
|
|
+ kfree(accelerator->frontend);
|
|
+ kfree(accelerator);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Watch the configured accelerator and change plugin if it's modified
|
|
+ */
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+static void accel_watch_work(struct work_struct *context)
|
|
+#else
|
|
+static void accel_watch_work(void *context)
|
|
+#endif
|
|
+{
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ struct netfront_accel_vif_state *vif_state =
|
|
+ container_of(context, struct netfront_accel_vif_state,
|
|
+ accel_work);
|
|
+#else
|
|
+ struct netfront_accel_vif_state *vif_state =
|
|
+ (struct netfront_accel_vif_state *)context;
|
|
+#endif
|
|
+ struct netfront_info *np = vif_state->np;
|
|
+ char *accel_frontend;
|
|
+ int accel_len, rc = -1;
|
|
+
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+
|
|
+ accel_frontend = xenbus_read(XBT_NIL, np->xbdev->otherend,
|
|
+ "accel-frontend", &accel_len);
|
|
+ if (IS_ERR(accel_frontend)) {
|
|
+ accel_frontend = NULL;
|
|
+ netfront_remove_accelerator(np, np->xbdev);
|
|
+ } else {
|
|
+ /* If this is the first time, request the accelerator,
|
|
+ otherwise only request one if it has changed */
|
|
+ if (vif_state->accel_frontend == NULL) {
|
|
+ rc = netfront_load_accelerator(np, np->xbdev,
|
|
+ accel_frontend);
|
|
+ } else {
|
|
+ if (strncmp(vif_state->accel_frontend, accel_frontend,
|
|
+ accel_len)) {
|
|
+ netfront_remove_accelerator(np, np->xbdev);
|
|
+ rc = netfront_load_accelerator(np, np->xbdev,
|
|
+ accel_frontend);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Get rid of previous state and replace with the new name */
|
|
+ if (vif_state->accel_frontend != NULL)
|
|
+ kfree(vif_state->accel_frontend);
|
|
+ vif_state->accel_frontend = accel_frontend;
|
|
+
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+
|
|
+ if (rc == 0) {
|
|
+ DPRINTK("requesting module %s\n", accel_frontend);
|
|
+ request_module("%s", accel_frontend);
|
|
+ /*
|
|
+ * Module should now call netfront_accelerator_loaded() once
|
|
+ * it's up and running, and we can continue from there
|
|
+ */
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void accel_watch_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ struct netfront_accel_vif_state *vif_state =
|
|
+ container_of(watch, struct netfront_accel_vif_state,
|
|
+ accel_watch);
|
|
+ queue_work(accel_watch_workqueue, &vif_state->accel_work);
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accelerator_add_watch(struct netfront_info *np)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ /*
|
|
+ * If old watch exists, e.g. from before suspend/resume,
|
|
+ * remove it now
|
|
+ */
|
|
+ netfront_accelerator_remove_watch(np);
|
|
+
|
|
+ /* Get a watch on the accelerator plugin */
|
|
+ err = xenbus_watch_path2(np->xbdev, np->xbdev->otherend,
|
|
+ "accel-frontend",
|
|
+ &np->accel_vif_state.accel_watch,
|
|
+ accel_watch_changed);
|
|
+ if (err) {
|
|
+ DPRINTK("%s: Failed to register accel watch: %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ np->accel_vif_state.accel_watch.node = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void
|
|
+netfront_accelerator_purge_watch(struct netfront_accel_vif_state *vif_state)
|
|
+{
|
|
+ flush_workqueue(accel_watch_workqueue);
|
|
+
|
|
+ /* Clean up any state left from watch */
|
|
+ if (vif_state->accel_frontend != NULL) {
|
|
+ kfree(vif_state->accel_frontend);
|
|
+ vif_state->accel_frontend = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+void netfront_accelerator_remove_watch(struct netfront_info *np)
|
|
+{
|
|
+ struct netfront_accel_vif_state *vif_state = &np->accel_vif_state;
|
|
+
|
|
+ /* Get rid of watch on accelerator plugin */
|
|
+ if (vif_state->accel_watch.node != NULL) {
|
|
+ unregister_xenbus_watch(&vif_state->accel_watch);
|
|
+ kfree(vif_state->accel_watch.node);
|
|
+ vif_state->accel_watch.node = NULL;
|
|
+
|
|
+ netfront_accelerator_purge_watch(vif_state);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Initialise the accel_vif_state field in the netfront state
|
|
+ */
|
|
+void init_accelerator_vif(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ np->accelerator = NULL;
|
|
+
|
|
+ /* It's assumed that these things don't change */
|
|
+ np->accel_vif_state.np = np;
|
|
+ np->accel_vif_state.dev = dev;
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work);
|
|
+#else
|
|
+ INIT_WORK(&np->accel_vif_state.accel_work, accel_watch_work,
|
|
+ &np->accel_vif_state);
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Compare a frontend description string against an accelerator to see
|
|
+ * if they match. Would ultimately be nice to replace the string with
|
|
+ * a unique numeric identifier for each accelerator.
|
|
+ */
|
|
+static int match_accelerator(const char *frontend,
|
|
+ struct netfront_accelerator *accelerator)
|
|
+{
|
|
+ return strcmp(frontend, accelerator->frontend) == 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Add a frontend vif to the list of vifs that is using a netfront
|
|
+ * accelerator plugin module. Must be called with the accelerator
|
|
+ * mutex held.
|
|
+ */
|
|
+static void add_accelerator_vif(struct netfront_accelerator *accelerator,
|
|
+ struct netfront_info *np)
|
|
+{
|
|
+ if (np->accelerator == NULL) {
|
|
+ np->accelerator = accelerator;
|
|
+
|
|
+ list_add(&np->accel_vif_state.link, &accelerator->vif_states);
|
|
+ } else {
|
|
+ /*
|
|
+ * May get here legitimately if suspend_cancel is
|
|
+ * called, but in that case configuration should not
|
|
+ * have changed
|
|
+ */
|
|
+ BUG_ON(np->accelerator != accelerator);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Initialise the state to track an accelerator plugin module.
|
|
+ *
|
|
+ * Must be called with the accelerator mutex held.
|
|
+ */
|
|
+static int init_accelerator(const char *frontend,
|
|
+ struct netfront_accelerator **result,
|
|
+ struct netfront_accel_hooks *hooks)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator =
|
|
+ kmalloc(sizeof(struct netfront_accelerator), GFP_KERNEL);
|
|
+ int frontend_len;
|
|
+
|
|
+ if (!accelerator) {
|
|
+ DPRINTK("no memory for accelerator\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ frontend_len = strlen(frontend) + 1;
|
|
+ accelerator->frontend = kmalloc(frontend_len, GFP_KERNEL);
|
|
+ if (!accelerator->frontend) {
|
|
+ DPRINTK("no memory for accelerator\n");
|
|
+ kfree(accelerator);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ strlcpy(accelerator->frontend, frontend, frontend_len);
|
|
+
|
|
+ INIT_LIST_HEAD(&accelerator->vif_states);
|
|
+ spin_lock_init(&accelerator->vif_states_lock);
|
|
+
|
|
+ accelerator->hooks = hooks;
|
|
+
|
|
+ list_add(&accelerator->link, &accelerators_list);
|
|
+
|
|
+ *result = accelerator;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Modify the hooks stored in the per-vif state to match that in the
|
|
+ * netfront accelerator's state.
|
|
+ *
|
|
+ * Takes the vif_states_lock spinlock and may sleep.
|
|
+ */
|
|
+static void
|
|
+accelerator_set_vif_state_hooks(struct netfront_accel_vif_state *vif_state)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ unsigned long flags;
|
|
+
|
|
+ DPRINTK("%p\n",vif_state);
|
|
+
|
|
+ /* Make sure there are no data path operations going on */
|
|
+ netif_poll_disable(vif_state->np->netdev);
|
|
+ netif_tx_lock_bh(vif_state->np->netdev);
|
|
+
|
|
+ accelerator = vif_state->np->accelerator;
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+ vif_state->hooks = accelerator->hooks;
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
+
|
|
+ netif_tx_unlock_bh(vif_state->np->netdev);
|
|
+ netif_poll_enable(vif_state->np->netdev);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Must be called with the accelerator mutex held. Takes the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+static void accelerator_probe_new_vif(struct netfront_info *np,
|
|
+ struct xenbus_device *dev,
|
|
+ struct netfront_accelerator *accelerator)
|
|
+{
|
|
+ struct netfront_accel_hooks *hooks;
|
|
+
|
|
+ DPRINTK("\n");
|
|
+
|
|
+ /* Include this frontend device on the accelerator's list */
|
|
+ add_accelerator_vif(accelerator, np);
|
|
+
|
|
+ hooks = accelerator->hooks;
|
|
+
|
|
+ if (hooks && hooks->new_device(np->netdev, dev) == 0)
|
|
+ accelerator_set_vif_state_hooks(&np->accel_vif_state);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Request that a particular netfront accelerator plugin is loaded.
|
|
+ * Usually called as a result of the vif configuration specifying
|
|
+ * which one to use.
|
|
+ *
|
|
+ * Must be called with accelerator_mutex held. Takes the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+static int netfront_load_accelerator(struct netfront_info *np,
|
|
+ struct xenbus_device *dev,
|
|
+ const char *frontend)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ int rc = 0;
|
|
+
|
|
+ DPRINTK(" %s\n", frontend);
|
|
+
|
|
+ /*
|
|
+ * Look at list of loaded accelerators to see if the requested
|
|
+ * one is already there
|
|
+ */
|
|
+ list_for_each_entry(accelerator, &accelerators_list, link) {
|
|
+ if (match_accelerator(frontend, accelerator)) {
|
|
+ accelerator_probe_new_vif(np, dev, accelerator);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Couldn't find it, so create a new one and load the module */
|
|
+ if ((rc = init_accelerator(frontend, &accelerator, NULL)) < 0) {
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ /* Include this frontend device on the accelerator's list */
|
|
+ add_accelerator_vif(accelerator, np);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Go through all the netfront vifs and see if they have requested
|
|
+ * this accelerator. Notify the accelerator plugin of the relevant
|
|
+ * device if so. Called when an accelerator plugin module is first
|
|
+ * loaded and connects to netfront.
|
|
+ *
|
|
+ * Must be called with accelerator_mutex held. Takes the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+static void
|
|
+accelerator_probe_vifs(struct netfront_accelerator *accelerator,
|
|
+ struct netfront_accel_hooks *hooks)
|
|
+{
|
|
+ struct netfront_accel_vif_state *vif_state, *tmp;
|
|
+
|
|
+ DPRINTK("%p\n", accelerator);
|
|
+
|
|
+ /*
|
|
+ * Store the hooks for future calls to probe a new device, and
|
|
+ * to wire into the vif_state once the accelerator plugin is
|
|
+ * ready to accelerate each vif
|
|
+ */
|
|
+ BUG_ON(hooks == NULL);
|
|
+ accelerator->hooks = hooks;
|
|
+
|
|
+ /* Holds accelerator_mutex to iterate list */
|
|
+ list_for_each_entry_safe(vif_state, tmp, &accelerator->vif_states,
|
|
+ link) {
|
|
+ struct netfront_info *np = vif_state->np;
|
|
+
|
|
+ if (hooks->new_device(np->netdev, vif_state->dev) == 0)
|
|
+ accelerator_set_vif_state_hooks(vif_state);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Called by the netfront accelerator plugin module when it has
|
|
+ * loaded.
|
|
+ *
|
|
+ * Takes the accelerator_mutex and vif_states_lock spinlock.
|
|
+ */
|
|
+int netfront_accelerator_loaded(int version, const char *frontend,
|
|
+ struct netfront_accel_hooks *hooks)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+
|
|
+ if (is_initial_xendomain())
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (version != NETFRONT_ACCEL_VERSION) {
|
|
+ if (version > NETFRONT_ACCEL_VERSION) {
|
|
+ /* Caller has higher version number, leave it
|
|
+ up to them to decide whether to continue.
|
|
+ They can re-call with a lower number if
|
|
+ they're happy to be compatible with us */
|
|
+ return NETFRONT_ACCEL_VERSION;
|
|
+ } else {
|
|
+ /* We have a more recent version than caller.
|
|
+ Currently reject, but may in future be able
|
|
+ to be backwardly compatible */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+
|
|
+ /*
|
|
+ * Look through list of accelerators to see if it has already
|
|
+ * been requested
|
|
+ */
|
|
+ list_for_each_entry(accelerator, &accelerators_list, link) {
|
|
+ if (match_accelerator(frontend, accelerator)) {
|
|
+ accelerator_probe_vifs(accelerator, hooks);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * If it wasn't in the list, add it now so that when it is
|
|
+ * requested the caller will find it
|
|
+ */
|
|
+ DPRINTK("Couldn't find matching accelerator (%s)\n",
|
|
+ frontend);
|
|
+
|
|
+ init_accelerator(frontend, &accelerator, hooks);
|
|
+
|
|
+ out:
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(netfront_accelerator_loaded);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Remove the hooks from a single vif state.
|
|
+ *
|
|
+ * Takes the vif_states_lock spinlock and may sleep.
|
|
+ */
|
|
+static void
|
|
+accelerator_remove_single_hook(struct netfront_accelerator *accelerator,
|
|
+ struct netfront_accel_vif_state *vif_state)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ /* Make sure there are no data path operations going on */
|
|
+ netif_poll_disable(vif_state->np->netdev);
|
|
+ netif_tx_lock_bh(vif_state->np->netdev);
|
|
+
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * Remove the hooks, but leave the vif_state on the
|
|
+ * accelerator's list as that signifies this vif is
|
|
+ * interested in using that accelerator if it becomes
|
|
+ * available again
|
|
+ */
|
|
+ vif_state->hooks = NULL;
|
|
+
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
+
|
|
+ netif_tx_unlock_bh(vif_state->np->netdev);
|
|
+ netif_poll_enable(vif_state->np->netdev);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Safely remove the accelerator function hooks from a netfront state.
|
|
+ *
|
|
+ * Must be called with the accelerator mutex held. Takes the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+static void accelerator_remove_hooks(struct netfront_accelerator *accelerator)
|
|
+{
|
|
+ struct netfront_accel_vif_state *vif_state, *tmp;
|
|
+ unsigned long flags;
|
|
+
|
|
+ /* Mutex is held to iterate list */
|
|
+ list_for_each_entry_safe(vif_state, tmp,
|
|
+ &accelerator->vif_states,
|
|
+ link) {
|
|
+ if(vif_state->hooks) {
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+
|
|
+ /* Last chance to get statistics from the accelerator */
|
|
+ vif_state->hooks->get_stats(vif_state->np->netdev,
|
|
+ &vif_state->np->stats);
|
|
+
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock,
|
|
+ flags);
|
|
+
|
|
+ accelerator_remove_single_hook(accelerator, vif_state);
|
|
+
|
|
+ accelerator->hooks->remove(vif_state->dev);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ accelerator->hooks = NULL;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Called by a netfront accelerator when it is unloaded. This safely
|
|
+ * removes the hooks into the plugin and blocks until all devices have
|
|
+ * finished using it, so on return it is safe to unload.
|
|
+ *
|
|
+ * Takes the accelerator mutex, and vif_states_lock spinlock.
|
|
+ */
|
|
+void netfront_accelerator_stop(const char *frontend)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+
|
|
+ list_for_each_entry(accelerator, &accelerators_list, link) {
|
|
+ if (match_accelerator(frontend, accelerator)) {
|
|
+ accelerator_remove_hooks(accelerator);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ out:
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(netfront_accelerator_stop);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Helper for call_remove and do_suspend
|
|
+ *
|
|
+ * Must be called with the accelerator mutex held. Takes the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+static int do_remove(struct netfront_info *np, struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator = np->accelerator;
|
|
+ unsigned long flags;
|
|
+ int rc = 0;
|
|
+
|
|
+ if (np->accel_vif_state.hooks) {
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+
|
|
+ /* Last chance to get statistics from the accelerator */
|
|
+ np->accel_vif_state.hooks->get_stats(np->netdev, &np->stats);
|
|
+
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock,
|
|
+ flags);
|
|
+
|
|
+ /*
|
|
+ * Try and do the opposite of accelerator_probe_new_vif
|
|
+ * to ensure there's no state pointing back at the
|
|
+ * netdev
|
|
+ */
|
|
+ accelerator_remove_single_hook(accelerator,
|
|
+ &np->accel_vif_state);
|
|
+
|
|
+ rc = accelerator->hooks->remove(dev);
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Must be called with the accelerator mutex held. Takes the
|
|
+ * vif_states_lock spinlock
|
|
+ */
|
|
+static int netfront_remove_accelerator(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ struct netfront_accel_vif_state *tmp_vif_state;
|
|
+ int rc = 0;
|
|
+
|
|
+ /* Check that we've got a device that was accelerated */
|
|
+ if (np->accelerator == NULL)
|
|
+ return rc;
|
|
+
|
|
+ accelerator = np->accelerator;
|
|
+
|
|
+ list_for_each_entry(tmp_vif_state, &accelerator->vif_states,
|
|
+ link) {
|
|
+ if (tmp_vif_state == &np->accel_vif_state) {
|
|
+ list_del(&np->accel_vif_state.link);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ rc = do_remove(np, dev);
|
|
+
|
|
+ np->accelerator = NULL;
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the accelerator mutex and the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+int netfront_accelerator_call_remove(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ int rc;
|
|
+ netfront_accelerator_remove_watch(np);
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+ rc = netfront_remove_accelerator(np, dev);
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the accelerator mutex and the
|
|
+ * vif_states_lock spinlock.
|
|
+ */
|
|
+int netfront_accelerator_suspend(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+
|
|
+ /* Check that we've got a device that was accelerated */
|
|
+ if (np->accelerator == NULL)
|
|
+ goto out;
|
|
+
|
|
+ /*
|
|
+ * Call the remove accelerator hook, but leave the vif_state
|
|
+ * on the accelerator's list in case there is a suspend_cancel.
|
|
+ */
|
|
+ rc = do_remove(np, dev);
|
|
+ out:
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accelerator_suspend_cancel(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ netfront_accelerator_purge_watch(&np->accel_vif_state);
|
|
+
|
|
+ /*
|
|
+ * Gratuitously fire the watch handler to reinstate the
|
|
+ * configured accelerator
|
|
+ */
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ queue_work(accel_watch_workqueue,
|
|
+ &np->accel_vif_state.accel_work);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the accelerator mutex
|
|
+ */
|
|
+void netfront_accelerator_resume(struct netfront_info *np,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_accel_vif_state *accel_vif_state = NULL;
|
|
+
|
|
+ mutex_lock(&accelerator_mutex);
|
|
+
|
|
+ /* Check that we've got a device that was accelerated */
|
|
+ if(np->accelerator == NULL)
|
|
+ goto out;
|
|
+
|
|
+ /* Find the vif_state from the accelerator's list */
|
|
+ list_for_each_entry(accel_vif_state, &np->accelerator->vif_states,
|
|
+ link) {
|
|
+ if (accel_vif_state->dev == dev) {
|
|
+ BUG_ON(accel_vif_state != &np->accel_vif_state);
|
|
+
|
|
+ /*
|
|
+ * Remove it from the accelerator's list so
|
|
+ * state is consistent for probing new vifs
|
|
+ * when they get connected
|
|
+ */
|
|
+ list_del(&accel_vif_state->link);
|
|
+ np->accelerator = NULL;
|
|
+
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ mutex_unlock(&accelerator_mutex);
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the vif_states_lock spinlock
|
|
+ */
|
|
+int netfront_check_accelerator_queue_ready(struct net_device *dev,
|
|
+ struct netfront_info *np)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ int rc = 1;
|
|
+ unsigned long flags;
|
|
+
|
|
+ accelerator = np->accelerator;
|
|
+
|
|
+ /* Call the check_ready accelerator hook. */
|
|
+ if (np->accel_vif_state.hooks && accelerator) {
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+ if (np->accel_vif_state.hooks &&
|
|
+ np->accelerator == accelerator)
|
|
+ rc = np->accel_vif_state.hooks->check_ready(dev);
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the vif_states_lock spinlock
|
|
+ */
|
|
+void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
|
|
+ struct net_device *dev)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ unsigned long flags;
|
|
+
|
|
+ accelerator = np->accelerator;
|
|
+
|
|
+ /* Call the stop_napi_interrupts accelerator hook. */
|
|
+ if (np->accel_vif_state.hooks && accelerator != NULL) {
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+ if (np->accel_vif_state.hooks &&
|
|
+ np->accelerator == accelerator)
|
|
+ np->accel_vif_state.hooks->stop_napi_irq(dev);
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * No lock pre-requisites. Takes the vif_states_lock spinlock
|
|
+ */
|
|
+int netfront_accelerator_call_get_stats(struct netfront_info *np,
|
|
+ struct net_device *dev)
|
|
+{
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ unsigned long flags;
|
|
+ int rc = 0;
|
|
+
|
|
+ accelerator = np->accelerator;
|
|
+
|
|
+ /* Call the get_stats accelerator hook. */
|
|
+ if (np->accel_vif_state.hooks && accelerator != NULL) {
|
|
+ spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
+ if (np->accel_vif_state.hooks &&
|
|
+ np->accelerator == accelerator)
|
|
+ rc = np->accel_vif_state.hooks->get_stats(dev,
|
|
+ &np->stats);
|
|
+ spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
+ }
|
|
+ return rc;
|
|
+}
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netfront/netfront.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,2255 @@
|
|
+/******************************************************************************
|
|
+ * Virtual network driver for conversing with remote driver backends.
|
|
+ *
|
|
+ * Copyright (c) 2002-2005, K A Fraser
|
|
+ * Copyright (c) 2005, XenSource Ltd
|
|
+ * Copyright (C) 2007 Solarflare Communications, Inc.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/version.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/inetdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/bitops.h>
|
|
+#include <linux/ethtool.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/if_ether.h>
|
|
+#include <linux/io.h>
|
|
+#include <linux/moduleparam.h>
|
|
+#include <net/sock.h>
|
|
+#include <net/pkt_sched.h>
|
|
+#include <net/arp.h>
|
|
+#include <net/route.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/io/netif.h>
|
|
+#include <xen/interface/memory.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/maddr.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+struct netfront_cb {
|
|
+ struct page *page;
|
|
+ unsigned offset;
|
|
+};
|
|
+
|
|
+#define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
|
|
+
|
|
+#include "netfront.h"
|
|
+
|
|
+/*
|
|
+ * Mutually-exclusive module options to select receive data path:
|
|
+ * rx_copy : Packets are copied by network backend into local memory
|
|
+ * rx_flip : Page containing packet data is transferred to our ownership
|
|
+ * For fully-virtualised guests there is no option - copying must be used.
|
|
+ * For paravirtualised guests, flipping is the default.
|
|
+ */
|
|
+#ifdef CONFIG_XEN
|
|
+static int MODPARM_rx_copy = 0;
|
|
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
|
|
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
|
|
+static int MODPARM_rx_flip = 0;
|
|
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
|
|
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
|
|
+#else
|
|
+static const int MODPARM_rx_copy = 1;
|
|
+static const int MODPARM_rx_flip = 0;
|
|
+#endif
|
|
+
|
|
+#define RX_COPY_THRESHOLD 256
|
|
+
|
|
+/* If we don't have GSO, fake things up so that we never try to use it. */
|
|
+#if defined(NETIF_F_GSO)
|
|
+#define HAVE_GSO 1
|
|
+#define HAVE_TSO 1 /* TSO is a subset of GSO */
|
|
+#define HAVE_CSUM_OFFLOAD 1
|
|
+static inline void dev_disable_gso_features(struct net_device *dev)
|
|
+{
|
|
+ /* Turn off all GSO bits except ROBUST. */
|
|
+ dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
|
|
+ dev->features |= NETIF_F_GSO_ROBUST;
|
|
+}
|
|
+#elif defined(NETIF_F_TSO)
|
|
+#define HAVE_GSO 0
|
|
+#define HAVE_TSO 1
|
|
+
|
|
+/* Some older kernels cannot cope with incorrect checksums,
|
|
+ * particularly in netfilter. I'm not sure there is 100% correlation
|
|
+ * with the presence of NETIF_F_TSO but it appears to be a good first
|
|
+ * approximiation.
|
|
+ */
|
|
+#define HAVE_CSUM_OFFLOAD 0
|
|
+
|
|
+#define gso_size tso_size
|
|
+#define gso_segs tso_segs
|
|
+static inline void dev_disable_gso_features(struct net_device *dev)
|
|
+{
|
|
+ /* Turn off all TSO bits. */
|
|
+ dev->features &= ~NETIF_F_TSO;
|
|
+}
|
|
+static inline int skb_is_gso(const struct sk_buff *skb)
|
|
+{
|
|
+ return skb_shinfo(skb)->tso_size;
|
|
+}
|
|
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
|
|
+{
|
|
+ return (features & NETIF_F_TSO);
|
|
+}
|
|
+
|
|
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
|
|
+{
|
|
+ return skb_is_gso(skb) &&
|
|
+ (!skb_gso_ok(skb, dev->features) ||
|
|
+ unlikely(skb->ip_summed != CHECKSUM_HW));
|
|
+}
|
|
+#else
|
|
+#define HAVE_GSO 0
|
|
+#define HAVE_TSO 0
|
|
+#define HAVE_CSUM_OFFLOAD 0
|
|
+#define netif_needs_gso(dev, skb) 0
|
|
+#define dev_disable_gso_features(dev) ((void)0)
|
|
+#define ethtool_op_set_tso(dev, data) (-ENOSYS)
|
|
+#endif
|
|
+
|
|
+#define GRANT_INVALID_REF 0
|
|
+
|
|
+struct netfront_rx_info {
|
|
+ struct netif_rx_response rx;
|
|
+ struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Implement our own carrier flag: the network stack's version causes delays
|
|
+ * when the carrier is re-enabled (in particular, dev_activate() may not
|
|
+ * immediately be called, which can cause packet loss).
|
|
+ */
|
|
+#define netfront_carrier_on(netif) ((netif)->carrier = 1)
|
|
+#define netfront_carrier_off(netif) ((netif)->carrier = 0)
|
|
+#define netfront_carrier_ok(netif) ((netif)->carrier)
|
|
+
|
|
+/*
|
|
+ * Access macros for acquiring freeing slots in tx_skbs[].
|
|
+ */
|
|
+
|
|
+static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
|
|
+{
|
|
+ list[id] = list[0];
|
|
+ list[0] = (void *)(unsigned long)id;
|
|
+}
|
|
+
|
|
+static inline unsigned short get_id_from_freelist(struct sk_buff **list)
|
|
+{
|
|
+ unsigned int id = (unsigned int)(unsigned long)list[0];
|
|
+ list[0] = list[id];
|
|
+ return id;
|
|
+}
|
|
+
|
|
+static inline int xennet_rxidx(RING_IDX idx)
|
|
+{
|
|
+ return idx & (NET_RX_RING_SIZE - 1);
|
|
+}
|
|
+
|
|
+static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
|
|
+ RING_IDX ri)
|
|
+{
|
|
+ int i = xennet_rxidx(ri);
|
|
+ struct sk_buff *skb = np->rx_skbs[i];
|
|
+ np->rx_skbs[i] = NULL;
|
|
+ return skb;
|
|
+}
|
|
+
|
|
+static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
|
|
+ RING_IDX ri)
|
|
+{
|
|
+ int i = xennet_rxidx(ri);
|
|
+ grant_ref_t ref = np->grant_rx_ref[i];
|
|
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
|
|
+ return ref;
|
|
+}
|
|
+
|
|
+#define DPRINTK(fmt, args...) \
|
|
+ pr_debug("netfront (%s:%d) " fmt, \
|
|
+ __FUNCTION__, __LINE__, ##args)
|
|
+#define IPRINTK(fmt, args...) \
|
|
+ printk(KERN_INFO "netfront: " fmt, ##args)
|
|
+#define WPRINTK(fmt, args...) \
|
|
+ printk(KERN_WARNING "netfront: " fmt, ##args)
|
|
+
|
|
+static int setup_device(struct xenbus_device *, struct netfront_info *);
|
|
+static struct net_device *create_netdev(struct xenbus_device *);
|
|
+
|
|
+static void end_access(int, void *);
|
|
+static void netif_disconnect_backend(struct netfront_info *);
|
|
+
|
|
+static int network_connect(struct net_device *);
|
|
+static void network_tx_buf_gc(struct net_device *);
|
|
+static void network_alloc_rx_buffers(struct net_device *);
|
|
+static void send_fake_arp(struct net_device *);
|
|
+
|
|
+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+
|
|
+#ifdef CONFIG_SYSFS
|
|
+static int xennet_sysfs_addif(struct net_device *netdev);
|
|
+static void xennet_sysfs_delif(struct net_device *netdev);
|
|
+#else /* !CONFIG_SYSFS */
|
|
+#define xennet_sysfs_addif(dev) (0)
|
|
+#define xennet_sysfs_delif(dev) do { } while(0)
|
|
+#endif
|
|
+
|
|
+static inline int xennet_can_sg(struct net_device *dev)
|
|
+{
|
|
+ return dev->features & NETIF_F_SG;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Entry point to this code when a new device is created. Allocate the basic
|
|
+ * structures and the ring buffers for communication with the backend, and
|
|
+ * inform the backend of the appropriate details for those.
|
|
+ */
|
|
+static int __devinit netfront_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct net_device *netdev;
|
|
+ struct netfront_info *info;
|
|
+
|
|
+ netdev = create_netdev(dev);
|
|
+ if (IS_ERR(netdev)) {
|
|
+ err = PTR_ERR(netdev);
|
|
+ xenbus_dev_fatal(dev, err, "creating netdev");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ info = netdev_priv(netdev);
|
|
+ dev->dev.driver_data = info;
|
|
+
|
|
+ err = register_netdev(info->netdev);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING "%s: register_netdev err=%d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xennet_sysfs_addif(info->netdev);
|
|
+ if (err) {
|
|
+ unregister_netdev(info->netdev);
|
|
+ printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ free_netdev(netdev);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __devexit netfront_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *info = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("%s\n", dev->nodename);
|
|
+
|
|
+ netfront_accelerator_call_remove(info, dev);
|
|
+
|
|
+ netif_disconnect_backend(info);
|
|
+
|
|
+ del_timer_sync(&info->rx_refill_timer);
|
|
+
|
|
+ xennet_sysfs_delif(info->netdev);
|
|
+
|
|
+ unregister_netdev(info->netdev);
|
|
+
|
|
+ free_netdev(info->netdev);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_suspend(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *info = dev->dev.driver_data;
|
|
+ return netfront_accelerator_suspend(info, dev);
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_suspend_cancel(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *info = dev->dev.driver_data;
|
|
+ return netfront_accelerator_suspend_cancel(info, dev);
|
|
+}
|
|
+
|
|
+
|
|
+/**
|
|
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
|
|
+ * driver restart. We tear down our netif structure and recreate it, but
|
|
+ * leave the device-layer structures intact so that this is transparent to the
|
|
+ * rest of the kernel.
|
|
+ */
|
|
+static int netfront_resume(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *info = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("%s\n", dev->nodename);
|
|
+
|
|
+ netfront_accelerator_resume(info, dev);
|
|
+
|
|
+ netif_disconnect_backend(info);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
|
|
+{
|
|
+ char *s, *e, *macstr;
|
|
+ int i;
|
|
+
|
|
+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
|
|
+ if (IS_ERR(macstr))
|
|
+ return PTR_ERR(macstr);
|
|
+
|
|
+ for (i = 0; i < ETH_ALEN; i++) {
|
|
+ mac[i] = simple_strtoul(s, &e, 16);
|
|
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
|
|
+ kfree(macstr);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+ s = e+1;
|
|
+ }
|
|
+
|
|
+ kfree(macstr);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Common code used when first setting up, and when resuming. */
|
|
+static int talk_to_backend(struct xenbus_device *dev,
|
|
+ struct netfront_info *info)
|
|
+{
|
|
+ const char *message;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+
|
|
+ /* Read mac only in the first setup. */
|
|
+ if (!is_valid_ether_addr(info->mac)) {
|
|
+ err = xen_net_read_mac(dev, info->mac);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "parsing %s/mac",
|
|
+ dev->nodename);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Create shared ring, alloc event channel. */
|
|
+ err = setup_device(dev, info);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* This will load an accelerator if one is configured when the
|
|
+ * watch fires */
|
|
+ netfront_accelerator_add_watch(info);
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ goto destroy_ring;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
|
|
+ info->tx_ring_ref);
|
|
+ if (err) {
|
|
+ message = "writing tx ring-ref";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
|
|
+ info->rx_ring_ref);
|
|
+ if (err) {
|
|
+ message = "writing rx ring-ref";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
+ "event-channel", "%u",
|
|
+ irq_to_evtchn_port(info->irq));
|
|
+ if (err) {
|
|
+ message = "writing event-channel";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
|
|
+ info->copying_receiver);
|
|
+ if (err) {
|
|
+ message = "writing request-rx-copy";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
|
|
+ if (err) {
|
|
+ message = "writing feature-rx-notify";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload",
|
|
+ "%d", !HAVE_CSUM_OFFLOAD);
|
|
+ if (err) {
|
|
+ message = "writing feature-no-csum-offload";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
|
|
+ if (err) {
|
|
+ message = "writing feature-sg";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d",
|
|
+ HAVE_TSO);
|
|
+ if (err) {
|
|
+ message = "writing feature-gso-tcpv4";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto destroy_ring;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ abort_transaction:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, err, "%s", message);
|
|
+ destroy_ring:
|
|
+ netfront_accelerator_call_remove(info, dev);
|
|
+ netif_disconnect_backend(info);
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
|
|
+{
|
|
+ struct netif_tx_sring *txs;
|
|
+ struct netif_rx_sring *rxs;
|
|
+ int err;
|
|
+ struct net_device *netdev = info->netdev;
|
|
+
|
|
+ info->tx_ring_ref = GRANT_INVALID_REF;
|
|
+ info->rx_ring_ref = GRANT_INVALID_REF;
|
|
+ info->rx.sring = NULL;
|
|
+ info->tx.sring = NULL;
|
|
+ info->irq = 0;
|
|
+
|
|
+ txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
|
|
+ if (!txs) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, err, "allocating tx ring page");
|
|
+ goto fail;
|
|
+ }
|
|
+ SHARED_RING_INIT(txs);
|
|
+ FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(txs));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long)txs);
|
|
+ goto fail;
|
|
+ }
|
|
+ info->tx_ring_ref = err;
|
|
+
|
|
+ rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
|
|
+ if (!rxs) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, err, "allocating rx ring page");
|
|
+ goto fail;
|
|
+ }
|
|
+ SHARED_RING_INIT(rxs);
|
|
+ FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long)rxs);
|
|
+ goto fail;
|
|
+ }
|
|
+ info->rx_ring_ref = err;
|
|
+
|
|
+ memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
|
|
+
|
|
+ err = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
|
|
+ netdev);
|
|
+ if (err < 0)
|
|
+ goto fail;
|
|
+ info->irq = err;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Callback received when the backend's state changes.
|
|
+ */
|
|
+static void backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ struct netfront_info *np = dev->dev.driver_data;
|
|
+ struct net_device *netdev = np->netdev;
|
|
+
|
|
+ DPRINTK("%s\n", xenbus_strstate(backend_state));
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateConnected:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitWait:
|
|
+ if (dev->state != XenbusStateInitialising)
|
|
+ break;
|
|
+ if (network_connect(netdev) != 0)
|
|
+ break;
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ send_fake_arp(netdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ xenbus_frontend_closed(dev);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+/** Send a packet on a net device to encourage switches to learn the
|
|
+ * MAC. We send a fake ARP request.
|
|
+ *
|
|
+ * @param dev device
|
|
+ * @return 0 on success, error code otherwise
|
|
+ */
|
|
+static void send_fake_arp(struct net_device *dev)
|
|
+{
|
|
+#ifdef CONFIG_INET
|
|
+ struct sk_buff *skb;
|
|
+ u32 src_ip, dst_ip;
|
|
+
|
|
+ dst_ip = INADDR_BROADCAST;
|
|
+ src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
|
|
+
|
|
+ /* No IP? Then nothing to do. */
|
|
+ if (src_ip == 0)
|
|
+ return;
|
|
+
|
|
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
|
|
+ dst_ip, dev, src_ip,
|
|
+ /*dst_hw*/ NULL, /*src_hw*/ NULL,
|
|
+ /*target_hw*/ dev->dev_addr);
|
|
+ if (skb == NULL)
|
|
+ return;
|
|
+
|
|
+ dev_queue_xmit(skb);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline int netfront_tx_slot_available(struct netfront_info *np)
|
|
+{
|
|
+ return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
|
|
+ (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
|
|
+}
|
|
+
|
|
+
|
|
+static inline void network_maybe_wake_tx(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+
|
|
+ if (unlikely(netif_queue_stopped(dev)) &&
|
|
+ netfront_tx_slot_available(np) &&
|
|
+ likely(netif_running(dev)) &&
|
|
+ netfront_check_accelerator_queue_ready(dev, np))
|
|
+ netif_wake_queue(dev);
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_check_queue_ready(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+
|
|
+ return unlikely(netif_queue_stopped(dev)) &&
|
|
+ netfront_tx_slot_available(np) &&
|
|
+ likely(netif_running(dev));
|
|
+}
|
|
+EXPORT_SYMBOL(netfront_check_queue_ready);
|
|
+
|
|
+
|
|
+static int network_open(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+
|
|
+ memset(&np->stats, 0, sizeof(np->stats));
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+ if (netfront_carrier_ok(np)) {
|
|
+ network_alloc_rx_buffers(dev);
|
|
+ np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
|
|
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
|
|
+ netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
+
|
|
+ netif_rx_schedule(dev);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+
|
|
+ network_maybe_wake_tx(dev);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void network_tx_buf_gc(struct net_device *dev)
|
|
+{
|
|
+ RING_IDX cons, prod;
|
|
+ unsigned short id;
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ struct sk_buff *skb;
|
|
+
|
|
+ BUG_ON(!netfront_carrier_ok(np));
|
|
+
|
|
+ do {
|
|
+ prod = np->tx.sring->rsp_prod;
|
|
+ rmb(); /* Ensure we see responses up to 'rp'. */
|
|
+
|
|
+ for (cons = np->tx.rsp_cons; cons != prod; cons++) {
|
|
+ struct netif_tx_response *txrsp;
|
|
+
|
|
+ txrsp = RING_GET_RESPONSE(&np->tx, cons);
|
|
+ if (txrsp->status == NETIF_RSP_NULL)
|
|
+ continue;
|
|
+
|
|
+ id = txrsp->id;
|
|
+ skb = np->tx_skbs[id];
|
|
+ if (unlikely(gnttab_query_foreign_access(
|
|
+ np->grant_tx_ref[id]) != 0)) {
|
|
+ printk(KERN_ALERT "network_tx_buf_gc: warning "
|
|
+ "-- grant still in use by backend "
|
|
+ "domain.\n");
|
|
+ BUG();
|
|
+ }
|
|
+ gnttab_end_foreign_access_ref(np->grant_tx_ref[id]);
|
|
+ gnttab_release_grant_reference(
|
|
+ &np->gref_tx_head, np->grant_tx_ref[id]);
|
|
+ np->grant_tx_ref[id] = GRANT_INVALID_REF;
|
|
+ add_id_to_freelist(np->tx_skbs, id);
|
|
+ dev_kfree_skb_irq(skb);
|
|
+ }
|
|
+
|
|
+ np->tx.rsp_cons = prod;
|
|
+
|
|
+ /*
|
|
+ * Set a new event, then check for race with update of tx_cons.
|
|
+ * Note that it is essential to schedule a callback, no matter
|
|
+ * how few buffers are pending. Even if there is space in the
|
|
+ * transmit ring, higher layers may be blocked because too much
|
|
+ * data is outstanding: in such cases notification from Xen is
|
|
+ * likely to be the only kick that we'll get.
|
|
+ */
|
|
+ np->tx.sring->rsp_event =
|
|
+ prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
|
|
+ mb();
|
|
+ } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
|
|
+
|
|
+ network_maybe_wake_tx(dev);
|
|
+}
|
|
+
|
|
+static void rx_refill_timeout(unsigned long data)
|
|
+{
|
|
+ struct net_device *dev = (struct net_device *)data;
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+
|
|
+ netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
+
|
|
+ netif_rx_schedule(dev);
|
|
+}
|
|
+
|
|
+static void network_alloc_rx_buffers(struct net_device *dev)
|
|
+{
|
|
+ unsigned short id;
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ struct sk_buff *skb;
|
|
+ struct page *page;
|
|
+ int i, batch_target, notify;
|
|
+ RING_IDX req_prod = np->rx.req_prod_pvt;
|
|
+ struct xen_memory_reservation reservation;
|
|
+ grant_ref_t ref;
|
|
+ unsigned long pfn;
|
|
+ void *vaddr;
|
|
+ int nr_flips;
|
|
+ netif_rx_request_t *req;
|
|
+
|
|
+ if (unlikely(!netfront_carrier_ok(np)))
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Allocate skbuffs greedily, even though we batch updates to the
|
|
+ * receive ring. This creates a less bursty demand on the memory
|
|
+ * allocator, so should reduce the chance of failed allocation requests
|
|
+ * both for ourself and for other kernel subsystems.
|
|
+ */
|
|
+ batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
|
|
+ for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
|
|
+ /*
|
|
+ * Allocate an skb and a page. Do not use __dev_alloc_skb as
|
|
+ * that will allocate page-sized buffers which is not
|
|
+ * necessary here.
|
|
+ * 16 bytes added as necessary headroom for netif_receive_skb.
|
|
+ */
|
|
+ skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
|
|
+ GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (unlikely(!skb))
|
|
+ goto no_skb;
|
|
+
|
|
+ page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
|
|
+ if (!page) {
|
|
+ kfree_skb(skb);
|
|
+no_skb:
|
|
+ /* Any skbuffs queued for refill? Force them out. */
|
|
+ if (i != 0)
|
|
+ goto refill;
|
|
+ /* Could not allocate any skbuffs. Try again later. */
|
|
+ mod_timer(&np->rx_refill_timer,
|
|
+ jiffies + (HZ/10));
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
|
|
+ skb_shinfo(skb)->frags[0].page = page;
|
|
+ skb_shinfo(skb)->nr_frags = 1;
|
|
+ __skb_queue_tail(&np->rx_batch, skb);
|
|
+ }
|
|
+
|
|
+ /* Is the batch large enough to be worthwhile? */
|
|
+ if (i < (np->rx_target/2)) {
|
|
+ if (req_prod > np->rx.sring->req_prod)
|
|
+ goto push;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Adjust our fill target if we risked running out of buffers. */
|
|
+ if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
|
|
+ ((np->rx_target *= 2) > np->rx_max_target))
|
|
+ np->rx_target = np->rx_max_target;
|
|
+
|
|
+ refill:
|
|
+ for (nr_flips = i = 0; ; i++) {
|
|
+ if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
|
|
+ break;
|
|
+
|
|
+ skb->dev = dev;
|
|
+
|
|
+ id = xennet_rxidx(req_prod + i);
|
|
+
|
|
+ BUG_ON(np->rx_skbs[id]);
|
|
+ np->rx_skbs[id] = skb;
|
|
+
|
|
+ ref = gnttab_claim_grant_reference(&np->gref_rx_head);
|
|
+ BUG_ON((signed short)ref < 0);
|
|
+ np->grant_rx_ref[id] = ref;
|
|
+
|
|
+ pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
|
|
+ vaddr = page_address(skb_shinfo(skb)->frags[0].page);
|
|
+
|
|
+ req = RING_GET_REQUEST(&np->rx, req_prod + i);
|
|
+ if (!np->copying_receiver) {
|
|
+ gnttab_grant_foreign_transfer_ref(ref,
|
|
+ np->xbdev->otherend_id,
|
|
+ pfn);
|
|
+ np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* Remove this page before passing
|
|
+ * back to Xen. */
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ MULTI_update_va_mapping(np->rx_mcl+i,
|
|
+ (unsigned long)vaddr,
|
|
+ __pte(0), 0);
|
|
+ }
|
|
+ nr_flips++;
|
|
+ } else {
|
|
+ gnttab_grant_foreign_access_ref(ref,
|
|
+ np->xbdev->otherend_id,
|
|
+ pfn_to_mfn(pfn),
|
|
+ 0);
|
|
+ }
|
|
+
|
|
+ req->id = id;
|
|
+ req->gref = ref;
|
|
+ }
|
|
+
|
|
+ if ( nr_flips != 0 ) {
|
|
+ /* Tell the ballon driver what is going on. */
|
|
+ balloon_update_driver_allowance(i);
|
|
+
|
|
+ set_xen_guest_handle(reservation.extent_start,
|
|
+ np->rx_pfn_array);
|
|
+ reservation.nr_extents = nr_flips;
|
|
+ reservation.extent_order = 0;
|
|
+ reservation.address_bits = 0;
|
|
+ reservation.domid = DOMID_SELF;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* After all PTEs have been zapped, flush the TLB. */
|
|
+ np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
|
|
+ UVMF_TLB_FLUSH|UVMF_ALL;
|
|
+
|
|
+ /* Give away a batch of pages. */
|
|
+ np->rx_mcl[i].op = __HYPERVISOR_memory_op;
|
|
+ np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
|
|
+ np->rx_mcl[i].args[1] = (unsigned long)&reservation;
|
|
+
|
|
+ /* Zap PTEs and give away pages in one big
|
|
+ * multicall. */
|
|
+ if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1)))
|
|
+ BUG();
|
|
+
|
|
+ /* Check return status of HYPERVISOR_memory_op(). */
|
|
+ if (unlikely(np->rx_mcl[i].result != i))
|
|
+ panic("Unable to reduce memory reservation\n");
|
|
+ while (nr_flips--)
|
|
+ BUG_ON(np->rx_mcl[nr_flips].result);
|
|
+ } else {
|
|
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
+ &reservation) != i)
|
|
+ panic("Unable to reduce memory reservation\n");
|
|
+ }
|
|
+ } else {
|
|
+ wmb();
|
|
+ }
|
|
+
|
|
+ /* Above is a suitable barrier to ensure backend will see requests. */
|
|
+ np->rx.req_prod_pvt = req_prod + i;
|
|
+ push:
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(np->irq);
|
|
+}
|
|
+
|
|
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
|
|
+ struct netif_tx_request *tx)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ char *data = skb->data;
|
|
+ unsigned long mfn;
|
|
+ RING_IDX prod = np->tx.req_prod_pvt;
|
|
+ int frags = skb_shinfo(skb)->nr_frags;
|
|
+ unsigned int offset = offset_in_page(data);
|
|
+ unsigned int len = skb_headlen(skb);
|
|
+ unsigned int id;
|
|
+ grant_ref_t ref;
|
|
+ int i;
|
|
+
|
|
+ while (len > PAGE_SIZE - offset) {
|
|
+ tx->size = PAGE_SIZE - offset;
|
|
+ tx->flags |= NETTXF_more_data;
|
|
+ len -= tx->size;
|
|
+ data += tx->size;
|
|
+ offset = 0;
|
|
+
|
|
+ id = get_id_from_freelist(np->tx_skbs);
|
|
+ np->tx_skbs[id] = skb_get(skb);
|
|
+ tx = RING_GET_REQUEST(&np->tx, prod++);
|
|
+ tx->id = id;
|
|
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
|
|
+ BUG_ON((signed short)ref < 0);
|
|
+
|
|
+ mfn = virt_to_mfn(data);
|
|
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
|
|
+ mfn, GTF_readonly);
|
|
+
|
|
+ tx->gref = np->grant_tx_ref[id] = ref;
|
|
+ tx->offset = offset;
|
|
+ tx->size = len;
|
|
+ tx->flags = 0;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < frags; i++) {
|
|
+ skb_frag_t *frag = skb_shinfo(skb)->frags + i;
|
|
+
|
|
+ tx->flags |= NETTXF_more_data;
|
|
+
|
|
+ id = get_id_from_freelist(np->tx_skbs);
|
|
+ np->tx_skbs[id] = skb_get(skb);
|
|
+ tx = RING_GET_REQUEST(&np->tx, prod++);
|
|
+ tx->id = id;
|
|
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
|
|
+ BUG_ON((signed short)ref < 0);
|
|
+
|
|
+ mfn = pfn_to_mfn(page_to_pfn(frag->page));
|
|
+ gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
|
|
+ mfn, GTF_readonly);
|
|
+
|
|
+ tx->gref = np->grant_tx_ref[id] = ref;
|
|
+ tx->offset = frag->page_offset;
|
|
+ tx->size = frag->size;
|
|
+ tx->flags = 0;
|
|
+ }
|
|
+
|
|
+ np->tx.req_prod_pvt = prod;
|
|
+}
|
|
+
|
|
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
+{
|
|
+ unsigned short id;
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ struct netif_tx_request *tx;
|
|
+ struct netif_extra_info *extra;
|
|
+ char *data = skb->data;
|
|
+ RING_IDX i;
|
|
+ grant_ref_t ref;
|
|
+ unsigned long mfn;
|
|
+ int notify;
|
|
+ int frags = skb_shinfo(skb)->nr_frags;
|
|
+ unsigned int offset = offset_in_page(data);
|
|
+ unsigned int len = skb_headlen(skb);
|
|
+
|
|
+ /* Check the fast path, if hooks are available */
|
|
+ if (np->accel_vif_state.hooks &&
|
|
+ np->accel_vif_state.hooks->start_xmit(skb, dev)) {
|
|
+ /* Fast path has sent this packet */
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
|
|
+ if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
|
|
+ printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
|
|
+ frags);
|
|
+ dump_stack();
|
|
+ goto drop;
|
|
+ }
|
|
+
|
|
+ spin_lock_irq(&np->tx_lock);
|
|
+
|
|
+ if (unlikely(!netfront_carrier_ok(np) ||
|
|
+ (frags > 1 && !xennet_can_sg(dev)) ||
|
|
+ netif_needs_gso(dev, skb))) {
|
|
+ spin_unlock_irq(&np->tx_lock);
|
|
+ goto drop;
|
|
+ }
|
|
+
|
|
+ i = np->tx.req_prod_pvt;
|
|
+
|
|
+ id = get_id_from_freelist(np->tx_skbs);
|
|
+ np->tx_skbs[id] = skb;
|
|
+
|
|
+ tx = RING_GET_REQUEST(&np->tx, i);
|
|
+
|
|
+ tx->id = id;
|
|
+ ref = gnttab_claim_grant_reference(&np->gref_tx_head);
|
|
+ BUG_ON((signed short)ref < 0);
|
|
+ mfn = virt_to_mfn(data);
|
|
+ gnttab_grant_foreign_access_ref(
|
|
+ ref, np->xbdev->otherend_id, mfn, GTF_readonly);
|
|
+ tx->gref = np->grant_tx_ref[id] = ref;
|
|
+ tx->offset = offset;
|
|
+ tx->size = len;
|
|
+
|
|
+ tx->flags = 0;
|
|
+ extra = NULL;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
|
|
+ tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
|
|
+#ifdef CONFIG_XEN
|
|
+ if (skb->proto_data_valid) /* remote but checksummed? */
|
|
+ tx->flags |= NETTXF_data_validated;
|
|
+#endif
|
|
+
|
|
+#if HAVE_TSO
|
|
+ if (skb_shinfo(skb)->gso_size) {
|
|
+ struct netif_extra_info *gso = (struct netif_extra_info *)
|
|
+ RING_GET_REQUEST(&np->tx, ++i);
|
|
+
|
|
+ if (extra)
|
|
+ extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
|
|
+ else
|
|
+ tx->flags |= NETTXF_extra_info;
|
|
+
|
|
+ gso->u.gso.size = skb_shinfo(skb)->gso_size;
|
|
+ gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
|
|
+ gso->u.gso.pad = 0;
|
|
+ gso->u.gso.features = 0;
|
|
+
|
|
+ gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
|
|
+ gso->flags = 0;
|
|
+ extra = gso;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ np->tx.req_prod_pvt = i + 1;
|
|
+
|
|
+ xennet_make_frags(skb, dev, tx);
|
|
+ tx->size = skb->len;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(np->irq);
|
|
+
|
|
+ np->stats.tx_bytes += skb->len;
|
|
+ np->stats.tx_packets++;
|
|
+ dev->trans_start = jiffies;
|
|
+
|
|
+ /* Note: It is not safe to access skb after network_tx_buf_gc()! */
|
|
+ network_tx_buf_gc(dev);
|
|
+
|
|
+ if (!netfront_tx_slot_available(np))
|
|
+ netif_stop_queue(dev);
|
|
+
|
|
+ spin_unlock_irq(&np->tx_lock);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ drop:
|
|
+ np->stats.tx_dropped++;
|
|
+ dev_kfree_skb(skb);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+{
|
|
+ struct net_device *dev = dev_id;
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&np->tx_lock, flags);
|
|
+
|
|
+ if (likely(netfront_carrier_ok(np))) {
|
|
+ network_tx_buf_gc(dev);
|
|
+ /* Under tx_lock: protects access to rx shared-ring indexes. */
|
|
+ if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
|
|
+ netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
+
|
|
+ netif_rx_schedule(dev);
|
|
+ dev->last_rx = jiffies;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&np->tx_lock, flags);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
|
|
+ grant_ref_t ref)
|
|
+{
|
|
+ int new = xennet_rxidx(np->rx.req_prod_pvt);
|
|
+
|
|
+ BUG_ON(np->rx_skbs[new]);
|
|
+ np->rx_skbs[new] = skb;
|
|
+ np->grant_rx_ref[new] = ref;
|
|
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
|
|
+ RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
|
|
+ np->rx.req_prod_pvt++;
|
|
+}
|
|
+
|
|
+int xennet_get_extras(struct netfront_info *np,
|
|
+ struct netif_extra_info *extras, RING_IDX rp)
|
|
+
|
|
+{
|
|
+ struct netif_extra_info *extra;
|
|
+ RING_IDX cons = np->rx.rsp_cons;
|
|
+ int err = 0;
|
|
+
|
|
+ do {
|
|
+ struct sk_buff *skb;
|
|
+ grant_ref_t ref;
|
|
+
|
|
+ if (unlikely(cons + 1 == rp)) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Missing extra info\n");
|
|
+ err = -EBADR;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ extra = (struct netif_extra_info *)
|
|
+ RING_GET_RESPONSE(&np->rx, ++cons);
|
|
+
|
|
+ if (unlikely(!extra->type ||
|
|
+ extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Invalid extra type: %d\n",
|
|
+ extra->type);
|
|
+ err = -EINVAL;
|
|
+ } else {
|
|
+ memcpy(&extras[extra->type - 1], extra,
|
|
+ sizeof(*extra));
|
|
+ }
|
|
+
|
|
+ skb = xennet_get_rx_skb(np, cons);
|
|
+ ref = xennet_get_rx_ref(np, cons);
|
|
+ xennet_move_rx_slot(np, skb, ref);
|
|
+ } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
|
|
+
|
|
+ np->rx.rsp_cons = cons;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int xennet_get_responses(struct netfront_info *np,
|
|
+ struct netfront_rx_info *rinfo, RING_IDX rp,
|
|
+ struct sk_buff_head *list,
|
|
+ int *pages_flipped_p)
|
|
+{
|
|
+ int pages_flipped = *pages_flipped_p;
|
|
+ struct mmu_update *mmu;
|
|
+ struct multicall_entry *mcl;
|
|
+ struct netif_rx_response *rx = &rinfo->rx;
|
|
+ struct netif_extra_info *extras = rinfo->extras;
|
|
+ RING_IDX cons = np->rx.rsp_cons;
|
|
+ struct sk_buff *skb = xennet_get_rx_skb(np, cons);
|
|
+ grant_ref_t ref = xennet_get_rx_ref(np, cons);
|
|
+ int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
|
|
+ int frags = 1;
|
|
+ int err = 0;
|
|
+ unsigned long ret;
|
|
+
|
|
+ if (rx->flags & NETRXF_extra_info) {
|
|
+ err = xennet_get_extras(np, extras, rp);
|
|
+ cons = np->rx.rsp_cons;
|
|
+ }
|
|
+
|
|
+ for (;;) {
|
|
+ unsigned long mfn;
|
|
+
|
|
+ if (unlikely(rx->status < 0 ||
|
|
+ rx->offset + rx->status > PAGE_SIZE)) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("rx->offset: %x, size: %u\n",
|
|
+ rx->offset, rx->status);
|
|
+ xennet_move_rx_slot(np, skb, ref);
|
|
+ err = -EINVAL;
|
|
+ goto next;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * This definitely indicates a bug, either in this driver or in
|
|
+ * the backend driver. In future this should flag the bad
|
|
+ * situation to the system controller to reboot the backed.
|
|
+ */
|
|
+ if (ref == GRANT_INVALID_REF) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Bad rx response id %d.\n", rx->id);
|
|
+ err = -EINVAL;
|
|
+ goto next;
|
|
+ }
|
|
+
|
|
+ if (!np->copying_receiver) {
|
|
+ /* Memory pressure, insufficient buffer
|
|
+ * headroom, ... */
|
|
+ if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Unfulfilled rx req "
|
|
+ "(id=%d, st=%d).\n",
|
|
+ rx->id, rx->status);
|
|
+ xennet_move_rx_slot(np, skb, ref);
|
|
+ err = -ENOMEM;
|
|
+ goto next;
|
|
+ }
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* Remap the page. */
|
|
+ struct page *page =
|
|
+ skb_shinfo(skb)->frags[0].page;
|
|
+ unsigned long pfn = page_to_pfn(page);
|
|
+ void *vaddr = page_address(page);
|
|
+
|
|
+ mcl = np->rx_mcl + pages_flipped;
|
|
+ mmu = np->rx_mmu + pages_flipped;
|
|
+
|
|
+ MULTI_update_va_mapping(mcl,
|
|
+ (unsigned long)vaddr,
|
|
+ pfn_pte_ma(mfn,
|
|
+ PAGE_KERNEL),
|
|
+ 0);
|
|
+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
|
|
+ | MMU_MACHPHYS_UPDATE;
|
|
+ mmu->val = pfn;
|
|
+
|
|
+ set_phys_to_machine(pfn, mfn);
|
|
+ }
|
|
+ pages_flipped++;
|
|
+ } else {
|
|
+ ret = gnttab_end_foreign_access_ref(ref);
|
|
+ BUG_ON(!ret);
|
|
+ }
|
|
+
|
|
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
|
|
+
|
|
+ __skb_queue_tail(list, skb);
|
|
+
|
|
+next:
|
|
+ if (!(rx->flags & NETRXF_more_data))
|
|
+ break;
|
|
+
|
|
+ if (cons + frags == rp) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Need more frags\n");
|
|
+ err = -ENOENT;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ rx = RING_GET_RESPONSE(&np->rx, cons + frags);
|
|
+ skb = xennet_get_rx_skb(np, cons + frags);
|
|
+ ref = xennet_get_rx_ref(np, cons + frags);
|
|
+ frags++;
|
|
+ }
|
|
+
|
|
+ if (unlikely(frags > max)) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Too many frags\n");
|
|
+ err = -E2BIG;
|
|
+ }
|
|
+
|
|
+ if (unlikely(err))
|
|
+ np->rx.rsp_cons = cons + frags;
|
|
+
|
|
+ *pages_flipped_p = pages_flipped;
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
|
|
+ struct sk_buff *skb,
|
|
+ struct sk_buff_head *list)
|
|
+{
|
|
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
|
|
+ int nr_frags = shinfo->nr_frags;
|
|
+ RING_IDX cons = np->rx.rsp_cons;
|
|
+ skb_frag_t *frag = shinfo->frags + nr_frags;
|
|
+ struct sk_buff *nskb;
|
|
+
|
|
+ while ((nskb = __skb_dequeue(list))) {
|
|
+ struct netif_rx_response *rx =
|
|
+ RING_GET_RESPONSE(&np->rx, ++cons);
|
|
+
|
|
+ frag->page = skb_shinfo(nskb)->frags[0].page;
|
|
+ frag->page_offset = rx->offset;
|
|
+ frag->size = rx->status;
|
|
+
|
|
+ skb->data_len += rx->status;
|
|
+
|
|
+ skb_shinfo(nskb)->nr_frags = 0;
|
|
+ kfree_skb(nskb);
|
|
+
|
|
+ frag++;
|
|
+ nr_frags++;
|
|
+ }
|
|
+
|
|
+ shinfo->nr_frags = nr_frags;
|
|
+ return cons;
|
|
+}
|
|
+
|
|
+static int xennet_set_skb_gso(struct sk_buff *skb,
|
|
+ struct netif_extra_info *gso)
|
|
+{
|
|
+ if (!gso->u.gso.size) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("GSO size must not be zero.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Currently only TCPv4 S.O. is supported. */
|
|
+ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+#if HAVE_TSO
|
|
+ skb_shinfo(skb)->gso_size = gso->u.gso.size;
|
|
+#if HAVE_GSO
|
|
+ skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
|
|
+
|
|
+ /* Header must be checked, and gso_segs computed. */
|
|
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
|
|
+#endif
|
|
+ skb_shinfo(skb)->gso_segs = 0;
|
|
+
|
|
+ return 0;
|
|
+#else
|
|
+ if (net_ratelimit())
|
|
+ WPRINTK("GSO unsupported by this kernel.\n");
|
|
+ return -EINVAL;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static int netif_poll(struct net_device *dev, int *pbudget)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ struct sk_buff *skb;
|
|
+ struct netfront_rx_info rinfo;
|
|
+ struct netif_rx_response *rx = &rinfo.rx;
|
|
+ struct netif_extra_info *extras = rinfo.extras;
|
|
+ RING_IDX i, rp;
|
|
+ struct multicall_entry *mcl;
|
|
+ int work_done, budget, more_to_do = 1, accel_more_to_do = 1;
|
|
+ struct sk_buff_head rxq;
|
|
+ struct sk_buff_head errq;
|
|
+ struct sk_buff_head tmpq;
|
|
+ unsigned long flags;
|
|
+ unsigned int len;
|
|
+ int pages_flipped = 0;
|
|
+ int err;
|
|
+
|
|
+ spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
|
|
+
|
|
+ if (unlikely(!netfront_carrier_ok(np))) {
|
|
+ spin_unlock(&np->rx_lock);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ skb_queue_head_init(&rxq);
|
|
+ skb_queue_head_init(&errq);
|
|
+ skb_queue_head_init(&tmpq);
|
|
+
|
|
+ if ((budget = *pbudget) > dev->quota)
|
|
+ budget = dev->quota;
|
|
+ rp = np->rx.sring->rsp_prod;
|
|
+ rmb(); /* Ensure we see queued responses up to 'rp'. */
|
|
+
|
|
+ i = np->rx.rsp_cons;
|
|
+ work_done = 0;
|
|
+ while ((i != rp) && (work_done < budget)) {
|
|
+ memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
|
|
+ memset(extras, 0, sizeof(rinfo.extras));
|
|
+
|
|
+ err = xennet_get_responses(np, &rinfo, rp, &tmpq,
|
|
+ &pages_flipped);
|
|
+
|
|
+ if (unlikely(err)) {
|
|
+err:
|
|
+ while ((skb = __skb_dequeue(&tmpq)))
|
|
+ __skb_queue_tail(&errq, skb);
|
|
+ np->stats.rx_errors++;
|
|
+ i = np->rx.rsp_cons;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ skb = __skb_dequeue(&tmpq);
|
|
+
|
|
+ if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
|
|
+ struct netif_extra_info *gso;
|
|
+ gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
|
|
+
|
|
+ if (unlikely(xennet_set_skb_gso(skb, gso))) {
|
|
+ __skb_queue_head(&tmpq, skb);
|
|
+ np->rx.rsp_cons += skb_queue_len(&tmpq);
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
|
|
+ NETFRONT_SKB_CB(skb)->offset = rx->offset;
|
|
+
|
|
+ len = rx->status;
|
|
+ if (len > RX_COPY_THRESHOLD)
|
|
+ len = RX_COPY_THRESHOLD;
|
|
+ skb_put(skb, len);
|
|
+
|
|
+ if (rx->status > len) {
|
|
+ skb_shinfo(skb)->frags[0].page_offset =
|
|
+ rx->offset + len;
|
|
+ skb_shinfo(skb)->frags[0].size = rx->status - len;
|
|
+ skb->data_len = rx->status - len;
|
|
+ } else {
|
|
+ skb_shinfo(skb)->frags[0].page = NULL;
|
|
+ skb_shinfo(skb)->nr_frags = 0;
|
|
+ }
|
|
+
|
|
+ i = xennet_fill_frags(np, skb, &tmpq);
|
|
+
|
|
+ /*
|
|
+ * Truesize must approximates the size of true data plus
|
|
+ * any supervisor overheads. Adding hypervisor overheads
|
|
+ * has been shown to significantly reduce achievable
|
|
+ * bandwidth with the default receive buffer size. It is
|
|
+ * therefore not wise to account for it here.
|
|
+ *
|
|
+ * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
|
|
+ * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
|
|
+ * add the size of the data pulled in xennet_fill_frags().
|
|
+ *
|
|
+ * We also adjust for any unused space in the main data
|
|
+ * area by subtracting (RX_COPY_THRESHOLD - len). This is
|
|
+ * especially important with drivers which split incoming
|
|
+ * packets into header and data, using only 66 bytes of
|
|
+ * the main data area (see the e1000 driver for example.)
|
|
+ * On such systems, without this last adjustement, our
|
|
+ * achievable receive throughout using the standard receive
|
|
+ * buffer size was cut by 25%(!!!).
|
|
+ */
|
|
+ skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
|
|
+ skb->len += skb->data_len;
|
|
+
|
|
+ /*
|
|
+ * Old backends do not assert data_validated but we
|
|
+ * can infer it from csum_blank so test both flags.
|
|
+ */
|
|
+ if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
|
|
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
+ else
|
|
+ skb->ip_summed = CHECKSUM_NONE;
|
|
+#ifdef CONFIG_XEN
|
|
+ skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
|
|
+ skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
|
|
+#endif
|
|
+ np->stats.rx_packets++;
|
|
+ np->stats.rx_bytes += skb->len;
|
|
+
|
|
+ __skb_queue_tail(&rxq, skb);
|
|
+
|
|
+ np->rx.rsp_cons = ++i;
|
|
+ work_done++;
|
|
+ }
|
|
+
|
|
+ if (pages_flipped) {
|
|
+ /* Some pages are no longer absent... */
|
|
+ balloon_update_driver_allowance(-pages_flipped);
|
|
+
|
|
+ /* Do all the remapping work and M2P updates. */
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ mcl = np->rx_mcl + pages_flipped;
|
|
+ mcl->op = __HYPERVISOR_mmu_update;
|
|
+ mcl->args[0] = (unsigned long)np->rx_mmu;
|
|
+ mcl->args[1] = pages_flipped;
|
|
+ mcl->args[2] = 0;
|
|
+ mcl->args[3] = DOMID_SELF;
|
|
+ err = HYPERVISOR_multicall_check(np->rx_mcl,
|
|
+ pages_flipped + 1,
|
|
+ NULL);
|
|
+ BUG_ON(err);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ while ((skb = __skb_dequeue(&errq)))
|
|
+ kfree_skb(skb);
|
|
+
|
|
+ while ((skb = __skb_dequeue(&rxq)) != NULL) {
|
|
+ struct page *page = NETFRONT_SKB_CB(skb)->page;
|
|
+ void *vaddr = page_address(page);
|
|
+ unsigned offset = NETFRONT_SKB_CB(skb)->offset;
|
|
+
|
|
+ memcpy(skb->data, vaddr + offset, skb_headlen(skb));
|
|
+
|
|
+ if (page != skb_shinfo(skb)->frags[0].page)
|
|
+ __free_page(page);
|
|
+
|
|
+ /* Ethernet work: Delayed to here as it peeks the header. */
|
|
+ skb->protocol = eth_type_trans(skb, dev);
|
|
+
|
|
+ /* Pass it up. */
|
|
+ netif_receive_skb(skb);
|
|
+ dev->last_rx = jiffies;
|
|
+ }
|
|
+
|
|
+ /* If we get a callback with very few responses, reduce fill target. */
|
|
+ /* NB. Note exponential increase, linear decrease. */
|
|
+ if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
|
|
+ ((3*np->rx_target) / 4)) &&
|
|
+ (--np->rx_target < np->rx_min_target))
|
|
+ np->rx_target = np->rx_min_target;
|
|
+
|
|
+ network_alloc_rx_buffers(dev);
|
|
+
|
|
+ if (work_done < budget) {
|
|
+ /* there's some spare capacity, try the accelerated path */
|
|
+ int accel_budget = budget - work_done;
|
|
+ int accel_budget_start = accel_budget;
|
|
+
|
|
+ if (np->accel_vif_state.hooks) {
|
|
+ accel_more_to_do =
|
|
+ np->accel_vif_state.hooks->netdev_poll
|
|
+ (dev, &accel_budget);
|
|
+ work_done += (accel_budget_start - accel_budget);
|
|
+ } else
|
|
+ accel_more_to_do = 0;
|
|
+ }
|
|
+
|
|
+ *pbudget -= work_done;
|
|
+ dev->quota -= work_done;
|
|
+
|
|
+ if (work_done < budget) {
|
|
+ local_irq_save(flags);
|
|
+
|
|
+ RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
|
|
+
|
|
+ if (!more_to_do && !accel_more_to_do &&
|
|
+ np->accel_vif_state.hooks) {
|
|
+ /*
|
|
+ * Slow path has nothing more to do, see if
|
|
+ * fast path is likewise
|
|
+ */
|
|
+ accel_more_to_do =
|
|
+ np->accel_vif_state.hooks->start_napi_irq(dev);
|
|
+ }
|
|
+
|
|
+ if (!more_to_do && !accel_more_to_do)
|
|
+ __netif_rx_complete(dev);
|
|
+
|
|
+ local_irq_restore(flags);
|
|
+ }
|
|
+
|
|
+ spin_unlock(&np->rx_lock);
|
|
+
|
|
+ return more_to_do | accel_more_to_do;
|
|
+}
|
|
+
|
|
+static void netif_release_tx_bufs(struct netfront_info *np)
|
|
+{
|
|
+ struct sk_buff *skb;
|
|
+ int i;
|
|
+
|
|
+ for (i = 1; i <= NET_TX_RING_SIZE; i++) {
|
|
+ if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
|
|
+ continue;
|
|
+
|
|
+ skb = np->tx_skbs[i];
|
|
+ gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
|
|
+ gnttab_release_grant_reference(
|
|
+ &np->gref_tx_head, np->grant_tx_ref[i]);
|
|
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
|
|
+ add_id_to_freelist(np->tx_skbs, i);
|
|
+ dev_kfree_skb_irq(skb);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void netif_release_rx_bufs_flip(struct netfront_info *np)
|
|
+{
|
|
+ struct mmu_update *mmu = np->rx_mmu;
|
|
+ struct multicall_entry *mcl = np->rx_mcl;
|
|
+ struct sk_buff_head free_list;
|
|
+ struct sk_buff *skb;
|
|
+ unsigned long mfn;
|
|
+ int xfer = 0, noxfer = 0, unused = 0;
|
|
+ int id, ref, rc;
|
|
+
|
|
+ skb_queue_head_init(&free_list);
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+
|
|
+ for (id = 0; id < NET_RX_RING_SIZE; id++) {
|
|
+ if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
|
|
+ unused++;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ skb = np->rx_skbs[id];
|
|
+ mfn = gnttab_end_foreign_transfer_ref(ref);
|
|
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
|
|
+ np->grant_rx_ref[id] = GRANT_INVALID_REF;
|
|
+ add_id_to_freelist(np->rx_skbs, id);
|
|
+
|
|
+ if (0 == mfn) {
|
|
+ struct page *page = skb_shinfo(skb)->frags[0].page;
|
|
+ balloon_release_driver_page(page);
|
|
+ skb_shinfo(skb)->nr_frags = 0;
|
|
+ dev_kfree_skb(skb);
|
|
+ noxfer++;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* Remap the page. */
|
|
+ struct page *page = skb_shinfo(skb)->frags[0].page;
|
|
+ unsigned long pfn = page_to_pfn(page);
|
|
+ void *vaddr = page_address(page);
|
|
+
|
|
+ MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
|
|
+ pfn_pte_ma(mfn, PAGE_KERNEL),
|
|
+ 0);
|
|
+ mcl++;
|
|
+ mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
|
|
+ | MMU_MACHPHYS_UPDATE;
|
|
+ mmu->val = pfn;
|
|
+ mmu++;
|
|
+
|
|
+ set_phys_to_machine(pfn, mfn);
|
|
+ }
|
|
+ __skb_queue_tail(&free_list, skb);
|
|
+ xfer++;
|
|
+ }
|
|
+
|
|
+ DPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
|
|
+ __FUNCTION__, xfer, noxfer, unused);
|
|
+
|
|
+ if (xfer) {
|
|
+ /* Some pages are no longer absent... */
|
|
+ balloon_update_driver_allowance(-xfer);
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ /* Do all the remapping work and M2P updates. */
|
|
+ mcl->op = __HYPERVISOR_mmu_update;
|
|
+ mcl->args[0] = (unsigned long)np->rx_mmu;
|
|
+ mcl->args[1] = mmu - np->rx_mmu;
|
|
+ mcl->args[2] = 0;
|
|
+ mcl->args[3] = DOMID_SELF;
|
|
+ mcl++;
|
|
+ rc = HYPERVISOR_multicall_check(
|
|
+ np->rx_mcl, mcl - np->rx_mcl, NULL);
|
|
+ BUG_ON(rc);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ while ((skb = __skb_dequeue(&free_list)) != NULL)
|
|
+ dev_kfree_skb(skb);
|
|
+
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+}
|
|
+
|
|
+static void netif_release_rx_bufs_copy(struct netfront_info *np)
|
|
+{
|
|
+ struct sk_buff *skb;
|
|
+ int i, ref;
|
|
+ int busy = 0, inuse = 0;
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+
|
|
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
|
|
+ ref = np->grant_rx_ref[i];
|
|
+
|
|
+ if (ref == GRANT_INVALID_REF)
|
|
+ continue;
|
|
+
|
|
+ inuse++;
|
|
+
|
|
+ skb = np->rx_skbs[i];
|
|
+
|
|
+ if (!gnttab_end_foreign_access_ref(ref))
|
|
+ {
|
|
+ busy++;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ gnttab_release_grant_reference(&np->gref_rx_head, ref);
|
|
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
|
|
+ add_id_to_freelist(np->rx_skbs, i);
|
|
+
|
|
+ dev_kfree_skb(skb);
|
|
+ }
|
|
+
|
|
+ if (busy)
|
|
+ DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n",
|
|
+ __FUNCTION__, busy, inuse, NET_RX_RING_SIZE);
|
|
+
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+}
|
|
+
|
|
+static int network_close(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ netif_stop_queue(np->netdev);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static struct net_device_stats *network_get_stats(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+
|
|
+ netfront_accelerator_call_get_stats(np, dev);
|
|
+ return &np->stats;
|
|
+}
|
|
+
|
|
+static int xennet_set_mac_address(struct net_device *dev, void *p)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ struct sockaddr *addr = p;
|
|
+
|
|
+ if (netif_running(dev))
|
|
+ return -EBUSY;
|
|
+
|
|
+ if (!is_valid_ether_addr(addr->sa_data))
|
|
+ return -EADDRNOTAVAIL;
|
|
+
|
|
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
|
|
+ memcpy(np->mac, addr->sa_data, ETH_ALEN);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xennet_change_mtu(struct net_device *dev, int mtu)
|
|
+{
|
|
+ int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
|
|
+
|
|
+ if (mtu > max)
|
|
+ return -EINVAL;
|
|
+ dev->mtu = mtu;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xennet_set_sg(struct net_device *dev, u32 data)
|
|
+{
|
|
+ if (data) {
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ int val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
|
|
+ "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ if (!val)
|
|
+ return -ENOSYS;
|
|
+ } else if (dev->mtu > ETH_DATA_LEN)
|
|
+ dev->mtu = ETH_DATA_LEN;
|
|
+
|
|
+ return ethtool_op_set_sg(dev, data);
|
|
+}
|
|
+
|
|
+static int xennet_set_tso(struct net_device *dev, u32 data)
|
|
+{
|
|
+ if (data) {
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ int val;
|
|
+
|
|
+ if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
|
|
+ "feature-gso-tcpv4", "%d", &val) < 0)
|
|
+ val = 0;
|
|
+ if (!val)
|
|
+ return -ENOSYS;
|
|
+ }
|
|
+
|
|
+ return ethtool_op_set_tso(dev, data);
|
|
+}
|
|
+
|
|
+static void xennet_set_features(struct net_device *dev)
|
|
+{
|
|
+ dev_disable_gso_features(dev);
|
|
+ xennet_set_sg(dev, 0);
|
|
+
|
|
+ /* We need checksum offload to enable scatter/gather and TSO. */
|
|
+ if (!(dev->features & NETIF_F_IP_CSUM))
|
|
+ return;
|
|
+
|
|
+ if (xennet_set_sg(dev, 1))
|
|
+ return;
|
|
+
|
|
+ /* Before 2.6.9 TSO seems to be unreliable so do not enable it
|
|
+ * on older kernels.
|
|
+ */
|
|
+ if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9))
|
|
+ xennet_set_tso(dev, 1);
|
|
+}
|
|
+
|
|
+static void netfront_get_drvinfo(struct net_device *dev,
|
|
+ struct ethtool_drvinfo *info)
|
|
+{
|
|
+ strcpy(info->driver, "netfront");
|
|
+ strcpy(info->bus_info, dev->class_dev.dev->bus_id);
|
|
+}
|
|
+
|
|
+static int network_connect(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ int i, requeue_idx, err;
|
|
+ struct sk_buff *skb;
|
|
+ grant_ref_t ref;
|
|
+ netif_rx_request_t *req;
|
|
+ unsigned int feature_rx_copy, feature_rx_flip;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
|
|
+ "feature-rx-copy", "%u", &feature_rx_copy);
|
|
+ if (err != 1)
|
|
+ feature_rx_copy = 0;
|
|
+ err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
|
|
+ "feature-rx-flip", "%u", &feature_rx_flip);
|
|
+ if (err != 1)
|
|
+ feature_rx_flip = 1;
|
|
+
|
|
+ /*
|
|
+ * Copy packets on receive path if:
|
|
+ * (a) This was requested by user, and the backend supports it; or
|
|
+ * (b) Flipping was requested, but this is unsupported by the backend.
|
|
+ */
|
|
+ np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
|
|
+ (MODPARM_rx_flip && !feature_rx_flip));
|
|
+
|
|
+ err = talk_to_backend(np->xbdev, np);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ xennet_set_features(dev);
|
|
+
|
|
+ DPRINTK("device %s has %sing receive path.\n",
|
|
+ dev->name, np->copying_receiver ? "copy" : "flipp");
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+ spin_lock_irq(&np->tx_lock);
|
|
+
|
|
+ /*
|
|
+ * Recovery procedure:
|
|
+ * NB. Freelist index entries are always going to be less than
|
|
+ * PAGE_OFFSET, whereas pointers to skbs will always be equal or
|
|
+ * greater than PAGE_OFFSET: we use this property to distinguish
|
|
+ * them.
|
|
+ */
|
|
+
|
|
+ /* Step 1: Discard all pending TX packet fragments. */
|
|
+ netif_release_tx_bufs(np);
|
|
+
|
|
+ /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
|
|
+ for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
|
|
+ if (!np->rx_skbs[i])
|
|
+ continue;
|
|
+
|
|
+ skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
|
|
+ ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
|
|
+ req = RING_GET_REQUEST(&np->rx, requeue_idx);
|
|
+
|
|
+ if (!np->copying_receiver) {
|
|
+ gnttab_grant_foreign_transfer_ref(
|
|
+ ref, np->xbdev->otherend_id,
|
|
+ page_to_pfn(skb_shinfo(skb)->frags->page));
|
|
+ } else {
|
|
+ gnttab_grant_foreign_access_ref(
|
|
+ ref, np->xbdev->otherend_id,
|
|
+ pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
|
|
+ frags->page)),
|
|
+ 0);
|
|
+ }
|
|
+ req->gref = ref;
|
|
+ req->id = requeue_idx;
|
|
+
|
|
+ requeue_idx++;
|
|
+ }
|
|
+
|
|
+ np->rx.req_prod_pvt = requeue_idx;
|
|
+
|
|
+ /*
|
|
+ * Step 3: All public and private state should now be sane. Get
|
|
+ * ready to start sending and receiving packets and give the driver
|
|
+ * domain a kick because we've probably just requeued some
|
|
+ * packets.
|
|
+ */
|
|
+ netfront_carrier_on(np);
|
|
+ notify_remote_via_irq(np->irq);
|
|
+ network_tx_buf_gc(dev);
|
|
+ network_alloc_rx_buffers(dev);
|
|
+
|
|
+ spin_unlock_irq(&np->tx_lock);
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void netif_uninit(struct net_device *dev)
|
|
+{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
+ netif_release_tx_bufs(np);
|
|
+ if (np->copying_receiver)
|
|
+ netif_release_rx_bufs_copy(np);
|
|
+ else
|
|
+ netif_release_rx_bufs_flip(np);
|
|
+ gnttab_free_grant_references(np->gref_tx_head);
|
|
+ gnttab_free_grant_references(np->gref_rx_head);
|
|
+}
|
|
+
|
|
+static struct ethtool_ops network_ethtool_ops =
|
|
+{
|
|
+ .get_drvinfo = netfront_get_drvinfo,
|
|
+ .get_tx_csum = ethtool_op_get_tx_csum,
|
|
+ .set_tx_csum = ethtool_op_set_tx_csum,
|
|
+ .get_sg = ethtool_op_get_sg,
|
|
+ .set_sg = xennet_set_sg,
|
|
+#if HAVE_TSO
|
|
+ .get_tso = ethtool_op_get_tso,
|
|
+ .set_tso = xennet_set_tso,
|
|
+#endif
|
|
+ .get_link = ethtool_op_get_link,
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_SYSFS
|
|
+static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
|
|
+{
|
|
+ struct net_device *netdev = container_of(cd, struct net_device,
|
|
+ class_dev);
|
|
+ struct netfront_info *info = netdev_priv(netdev);
|
|
+
|
|
+ return sprintf(buf, "%u\n", info->rx_min_target);
|
|
+}
|
|
+
|
|
+static ssize_t store_rxbuf_min(struct class_device *cd,
|
|
+ const char *buf, size_t len)
|
|
+{
|
|
+ struct net_device *netdev = container_of(cd, struct net_device,
|
|
+ class_dev);
|
|
+ struct netfront_info *np = netdev_priv(netdev);
|
|
+ char *endp;
|
|
+ unsigned long target;
|
|
+
|
|
+ if (!capable(CAP_NET_ADMIN))
|
|
+ return -EPERM;
|
|
+
|
|
+ target = simple_strtoul(buf, &endp, 0);
|
|
+ if (endp == buf)
|
|
+ return -EBADMSG;
|
|
+
|
|
+ if (target < RX_MIN_TARGET)
|
|
+ target = RX_MIN_TARGET;
|
|
+ if (target > RX_MAX_TARGET)
|
|
+ target = RX_MAX_TARGET;
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+ if (target > np->rx_max_target)
|
|
+ np->rx_max_target = target;
|
|
+ np->rx_min_target = target;
|
|
+ if (target > np->rx_target)
|
|
+ np->rx_target = target;
|
|
+
|
|
+ network_alloc_rx_buffers(netdev);
|
|
+
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+ return len;
|
|
+}
|
|
+
|
|
+static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
|
|
+{
|
|
+ struct net_device *netdev = container_of(cd, struct net_device,
|
|
+ class_dev);
|
|
+ struct netfront_info *info = netdev_priv(netdev);
|
|
+
|
|
+ return sprintf(buf, "%u\n", info->rx_max_target);
|
|
+}
|
|
+
|
|
+static ssize_t store_rxbuf_max(struct class_device *cd,
|
|
+ const char *buf, size_t len)
|
|
+{
|
|
+ struct net_device *netdev = container_of(cd, struct net_device,
|
|
+ class_dev);
|
|
+ struct netfront_info *np = netdev_priv(netdev);
|
|
+ char *endp;
|
|
+ unsigned long target;
|
|
+
|
|
+ if (!capable(CAP_NET_ADMIN))
|
|
+ return -EPERM;
|
|
+
|
|
+ target = simple_strtoul(buf, &endp, 0);
|
|
+ if (endp == buf)
|
|
+ return -EBADMSG;
|
|
+
|
|
+ if (target < RX_MIN_TARGET)
|
|
+ target = RX_MIN_TARGET;
|
|
+ if (target > RX_MAX_TARGET)
|
|
+ target = RX_MAX_TARGET;
|
|
+
|
|
+ spin_lock_bh(&np->rx_lock);
|
|
+ if (target < np->rx_min_target)
|
|
+ np->rx_min_target = target;
|
|
+ np->rx_max_target = target;
|
|
+ if (target < np->rx_target)
|
|
+ np->rx_target = target;
|
|
+
|
|
+ network_alloc_rx_buffers(netdev);
|
|
+
|
|
+ spin_unlock_bh(&np->rx_lock);
|
|
+ return len;
|
|
+}
|
|
+
|
|
+static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
|
|
+{
|
|
+ struct net_device *netdev = container_of(cd, struct net_device,
|
|
+ class_dev);
|
|
+ struct netfront_info *info = netdev_priv(netdev);
|
|
+
|
|
+ return sprintf(buf, "%u\n", info->rx_target);
|
|
+}
|
|
+
|
|
+static const struct class_device_attribute xennet_attrs[] = {
|
|
+ __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
|
|
+ __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
|
|
+ __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
|
|
+};
|
|
+
|
|
+static int xennet_sysfs_addif(struct net_device *netdev)
|
|
+{
|
|
+ int i;
|
|
+ int error = 0;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
|
|
+ error = class_device_create_file(&netdev->class_dev,
|
|
+ &xennet_attrs[i]);
|
|
+ if (error)
|
|
+ goto fail;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ while (--i >= 0)
|
|
+ class_device_remove_file(&netdev->class_dev,
|
|
+ &xennet_attrs[i]);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+static void xennet_sysfs_delif(struct net_device *netdev)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
|
|
+ class_device_remove_file(&netdev->class_dev,
|
|
+ &xennet_attrs[i]);
|
|
+ }
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_SYSFS */
|
|
+
|
|
+
|
|
+/*
|
|
+ * Nothing to do here. Virtual interface is point-to-point and the
|
|
+ * physical interface is probably promiscuous anyway.
|
|
+ */
|
|
+static void network_set_multicast_list(struct net_device *dev)
|
|
+{
|
|
+}
|
|
+
|
|
+static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ struct net_device *netdev = NULL;
|
|
+ struct netfront_info *np = NULL;
|
|
+
|
|
+ netdev = alloc_etherdev(sizeof(struct netfront_info));
|
|
+ if (!netdev) {
|
|
+ printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
|
|
+ __FUNCTION__);
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+ }
|
|
+
|
|
+ np = netdev_priv(netdev);
|
|
+ np->xbdev = dev;
|
|
+
|
|
+ spin_lock_init(&np->tx_lock);
|
|
+ spin_lock_init(&np->rx_lock);
|
|
+
|
|
+ init_accelerator_vif(np, dev);
|
|
+
|
|
+ skb_queue_head_init(&np->rx_batch);
|
|
+ np->rx_target = RX_DFL_MIN_TARGET;
|
|
+ np->rx_min_target = RX_DFL_MIN_TARGET;
|
|
+ np->rx_max_target = RX_MAX_TARGET;
|
|
+
|
|
+ init_timer(&np->rx_refill_timer);
|
|
+ np->rx_refill_timer.data = (unsigned long)netdev;
|
|
+ np->rx_refill_timer.function = rx_refill_timeout;
|
|
+
|
|
+ /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
|
|
+ for (i = 0; i <= NET_TX_RING_SIZE; i++) {
|
|
+ np->tx_skbs[i] = (void *)((unsigned long) i+1);
|
|
+ np->grant_tx_ref[i] = GRANT_INVALID_REF;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < NET_RX_RING_SIZE; i++) {
|
|
+ np->rx_skbs[i] = NULL;
|
|
+ np->grant_rx_ref[i] = GRANT_INVALID_REF;
|
|
+ }
|
|
+
|
|
+ /* A grant for every tx ring slot */
|
|
+ if (gnttab_alloc_grant_references(TX_MAX_TARGET,
|
|
+ &np->gref_tx_head) < 0) {
|
|
+ printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
|
|
+ err = -ENOMEM;
|
|
+ goto exit;
|
|
+ }
|
|
+ /* A grant for every rx ring slot */
|
|
+ if (gnttab_alloc_grant_references(RX_MAX_TARGET,
|
|
+ &np->gref_rx_head) < 0) {
|
|
+ printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
|
|
+ err = -ENOMEM;
|
|
+ goto exit_free_tx;
|
|
+ }
|
|
+
|
|
+ netdev->open = network_open;
|
|
+ netdev->hard_start_xmit = network_start_xmit;
|
|
+ netdev->stop = network_close;
|
|
+ netdev->get_stats = network_get_stats;
|
|
+ netdev->poll = netif_poll;
|
|
+ netdev->set_multicast_list = network_set_multicast_list;
|
|
+ netdev->uninit = netif_uninit;
|
|
+ netdev->set_mac_address = xennet_set_mac_address;
|
|
+ netdev->change_mtu = xennet_change_mtu;
|
|
+ netdev->weight = 64;
|
|
+ netdev->features = NETIF_F_IP_CSUM;
|
|
+
|
|
+ SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
|
|
+ SET_MODULE_OWNER(netdev);
|
|
+ SET_NETDEV_DEV(netdev, &dev->dev);
|
|
+
|
|
+ np->netdev = netdev;
|
|
+
|
|
+ netfront_carrier_off(np);
|
|
+
|
|
+ return netdev;
|
|
+
|
|
+ exit_free_tx:
|
|
+ gnttab_free_grant_references(np->gref_tx_head);
|
|
+ exit:
|
|
+ free_netdev(netdev);
|
|
+ return ERR_PTR(err);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_INET
|
|
+/*
|
|
+ * We use this notifier to send out a fake ARP reply to reset switches and
|
|
+ * router ARP caches when an IP interface is brought up on a VIF.
|
|
+ */
|
|
+static int
|
|
+inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
|
|
+{
|
|
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
|
|
+ struct net_device *dev = ifa->ifa_dev->dev;
|
|
+
|
|
+ /* UP event and is it one of our devices? */
|
|
+ if (event == NETDEV_UP && dev->open == network_open)
|
|
+ send_fake_arp(dev);
|
|
+
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+
|
|
+static struct notifier_block notifier_inetdev = {
|
|
+ .notifier_call = inetdev_notify,
|
|
+ .next = NULL,
|
|
+ .priority = 0
|
|
+};
|
|
+#endif
|
|
+
|
|
+
|
|
+static void netif_disconnect_backend(struct netfront_info *info)
|
|
+{
|
|
+ /* Stop old i/f to prevent errors whilst we rebuild the state. */
|
|
+ spin_lock_bh(&info->rx_lock);
|
|
+ spin_lock_irq(&info->tx_lock);
|
|
+ netfront_carrier_off(info);
|
|
+ spin_unlock_irq(&info->tx_lock);
|
|
+ spin_unlock_bh(&info->rx_lock);
|
|
+
|
|
+ if (info->irq)
|
|
+ unbind_from_irqhandler(info->irq, info->netdev);
|
|
+ info->irq = 0;
|
|
+
|
|
+ end_access(info->tx_ring_ref, info->tx.sring);
|
|
+ end_access(info->rx_ring_ref, info->rx.sring);
|
|
+ info->tx_ring_ref = GRANT_INVALID_REF;
|
|
+ info->rx_ring_ref = GRANT_INVALID_REF;
|
|
+ info->tx.sring = NULL;
|
|
+ info->rx.sring = NULL;
|
|
+}
|
|
+
|
|
+
|
|
+static void end_access(int ref, void *page)
|
|
+{
|
|
+ if (ref != GRANT_INVALID_REF)
|
|
+ gnttab_end_foreign_access(ref, (unsigned long)page);
|
|
+}
|
|
+
|
|
+
|
|
+/* ** Driver registration ** */
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id netfront_ids[] = {
|
|
+ { "vif" },
|
|
+ { "" }
|
|
+};
|
|
+MODULE_ALIAS("xen:vif");
|
|
+
|
|
+
|
|
+static struct xenbus_driver netfront_driver = {
|
|
+ .name = "vif",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = netfront_ids,
|
|
+ .probe = netfront_probe,
|
|
+ .remove = __devexit_p(netfront_remove),
|
|
+ .suspend = netfront_suspend,
|
|
+ .suspend_cancel = netfront_suspend_cancel,
|
|
+ .resume = netfront_resume,
|
|
+ .otherend_changed = backend_changed,
|
|
+};
|
|
+
|
|
+
|
|
+static int __init netif_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (MODPARM_rx_flip && MODPARM_rx_copy) {
|
|
+ WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ if (!MODPARM_rx_flip && !MODPARM_rx_copy)
|
|
+ MODPARM_rx_copy = 1; /* Default is to copy. */
|
|
+#endif
|
|
+
|
|
+ netif_init_accel();
|
|
+
|
|
+ IPRINTK("Initialising virtual ethernet driver.\n");
|
|
+
|
|
+#ifdef CONFIG_INET
|
|
+ (void)register_inetaddr_notifier(¬ifier_inetdev);
|
|
+#endif
|
|
+
|
|
+ err = xenbus_register_frontend(&netfront_driver);
|
|
+ if (err) {
|
|
+#ifdef CONFIG_INET
|
|
+ unregister_inetaddr_notifier(¬ifier_inetdev);
|
|
+#endif
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+module_init(netif_init);
|
|
+
|
|
+
|
|
+static void __exit netif_exit(void)
|
|
+{
|
|
+#ifdef CONFIG_INET
|
|
+ unregister_inetaddr_notifier(¬ifier_inetdev);
|
|
+#endif
|
|
+ xenbus_unregister_driver(&netfront_driver);
|
|
+
|
|
+ netif_exit_accel();
|
|
+}
|
|
+module_exit(netif_exit);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/netfront/netfront.h 2010-02-24 13:13:46.000000000 +0100
|
|
@@ -0,0 +1,274 @@
|
|
+/******************************************************************************
|
|
+ * Virtual network driver for conversing with remote driver backends.
|
|
+ *
|
|
+ * Copyright (c) 2002-2005, K A Fraser
|
|
+ * Copyright (c) 2005, XenSource Ltd
|
|
+ * Copyright (C) 2007 Solarflare Communications, Inc.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef NETFRONT_H
|
|
+#define NETFRONT_H
|
|
+
|
|
+#include <xen/interface/io/netif.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+#define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
|
|
+#define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Function pointer table for hooks into a network acceleration
|
|
+ * plugin. These are called at appropriate points from the netfront
|
|
+ * driver
|
|
+ */
|
|
+struct netfront_accel_hooks {
|
|
+ /*
|
|
+ * new_device: Accelerator hook to ask the plugin to support a
|
|
+ * new network interface
|
|
+ */
|
|
+ int (*new_device)(struct net_device *net_dev, struct xenbus_device *dev);
|
|
+ /*
|
|
+ * remove: Opposite of new_device
|
|
+ */
|
|
+ int (*remove)(struct xenbus_device *dev);
|
|
+ /*
|
|
+ * The net_device is being polled, check the accelerated
|
|
+ * hardware for any pending packets
|
|
+ */
|
|
+ int (*netdev_poll)(struct net_device *dev, int *pbudget);
|
|
+ /*
|
|
+ * start_xmit: Used to give the accelerated plugin the option
|
|
+ * of sending a packet. Returns non-zero if has done so, or
|
|
+ * zero to decline and force the packet onto normal send
|
|
+ * path
|
|
+ */
|
|
+ int (*start_xmit)(struct sk_buff *skb, struct net_device *dev);
|
|
+ /*
|
|
+ * start/stop_napi_interrupts Used by netfront to indicate
|
|
+ * when napi interrupts should be enabled or disabled
|
|
+ */
|
|
+ int (*start_napi_irq)(struct net_device *dev);
|
|
+ void (*stop_napi_irq)(struct net_device *dev);
|
|
+ /*
|
|
+ * Called before re-enabling the TX queue to check the fast
|
|
+ * path has slots too
|
|
+ */
|
|
+ int (*check_ready)(struct net_device *dev);
|
|
+ /*
|
|
+ * Get the fastpath network statistics
|
|
+ */
|
|
+ int (*get_stats)(struct net_device *dev,
|
|
+ struct net_device_stats *stats);
|
|
+};
|
|
+
|
|
+
|
|
+/* Version of API/protocol for communication between netfront and
|
|
+ acceleration plugin supported */
|
|
+#define NETFRONT_ACCEL_VERSION 0x00010003
|
|
+
|
|
+/*
|
|
+ * Per-netfront device state for the accelerator. This is used to
|
|
+ * allow efficient per-netfront device access to the accelerator
|
|
+ * hooks
|
|
+ */
|
|
+struct netfront_accel_vif_state {
|
|
+ struct list_head link;
|
|
+
|
|
+ struct xenbus_device *dev;
|
|
+ struct netfront_info *np;
|
|
+ struct netfront_accel_hooks *hooks;
|
|
+
|
|
+ /* Watch on the accelerator configuration value */
|
|
+ struct xenbus_watch accel_watch;
|
|
+ /* Work item to process change in accelerator */
|
|
+ struct work_struct accel_work;
|
|
+ /* The string from xenbus last time accel_watch fired */
|
|
+ char *accel_frontend;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Per-accelerator state stored in netfront. These form a list that
|
|
+ * is used to track which devices are accelerated by which plugins,
|
|
+ * and what plugins are available/have been requested
|
|
+ */
|
|
+struct netfront_accelerator {
|
|
+ /* Used to make a list */
|
|
+ struct list_head link;
|
|
+ /* ID of the accelerator */
|
|
+ int id;
|
|
+ /*
|
|
+ * String describing the accelerator. Currently this is the
|
|
+ * name of the accelerator module. This is provided by the
|
|
+ * backend accelerator through xenstore
|
|
+ */
|
|
+ char *frontend;
|
|
+ /* The hooks into the accelerator plugin module */
|
|
+ struct netfront_accel_hooks *hooks;
|
|
+
|
|
+ /*
|
|
+ * List of per-netfront device state (struct
|
|
+ * netfront_accel_vif_state) for each netfront device that is
|
|
+ * using this accelerator
|
|
+ */
|
|
+ struct list_head vif_states;
|
|
+ spinlock_t vif_states_lock;
|
|
+};
|
|
+
|
|
+struct netfront_info {
|
|
+ struct list_head list;
|
|
+ struct net_device *netdev;
|
|
+
|
|
+ struct net_device_stats stats;
|
|
+
|
|
+ struct netif_tx_front_ring tx;
|
|
+ struct netif_rx_front_ring rx;
|
|
+
|
|
+ spinlock_t tx_lock;
|
|
+ spinlock_t rx_lock;
|
|
+
|
|
+ unsigned int irq;
|
|
+ unsigned int copying_receiver;
|
|
+ unsigned int carrier;
|
|
+
|
|
+ /* Receive-ring batched refills. */
|
|
+#define RX_MIN_TARGET 8
|
|
+#define RX_DFL_MIN_TARGET 64
|
|
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
|
|
+ unsigned rx_min_target, rx_max_target, rx_target;
|
|
+ struct sk_buff_head rx_batch;
|
|
+
|
|
+ struct timer_list rx_refill_timer;
|
|
+
|
|
+ /*
|
|
+ * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
|
|
+ * is an index into a chain of free entries.
|
|
+ */
|
|
+ struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
|
|
+ struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
|
|
+
|
|
+#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
|
|
+ grant_ref_t gref_tx_head;
|
|
+ grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
|
|
+ grant_ref_t gref_rx_head;
|
|
+ grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
|
|
+
|
|
+ struct xenbus_device *xbdev;
|
|
+ int tx_ring_ref;
|
|
+ int rx_ring_ref;
|
|
+ u8 mac[ETH_ALEN];
|
|
+
|
|
+ unsigned long rx_pfn_array[NET_RX_RING_SIZE];
|
|
+ struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
|
|
+ struct mmu_update rx_mmu[NET_RX_RING_SIZE];
|
|
+
|
|
+ /* Private pointer to state internal to accelerator module */
|
|
+ void *accel_priv;
|
|
+ /* The accelerator used by this netfront device */
|
|
+ struct netfront_accelerator *accelerator;
|
|
+ /* The accelerator state for this netfront device */
|
|
+ struct netfront_accel_vif_state accel_vif_state;
|
|
+};
|
|
+
|
|
+
|
|
+/* Exported Functions */
|
|
+
|
|
+/*
|
|
+ * Called by an accelerator plugin module when it has loaded.
|
|
+ *
|
|
+ * frontend: the string describing the accelerator, currently the module name
|
|
+ * hooks: the hooks for netfront to use to call into the accelerator
|
|
+ * version: the version of API between frontend and plugin requested
|
|
+ *
|
|
+ * return: 0 on success, <0 on error, >0 (with version supported) on
|
|
+ * version mismatch
|
|
+ */
|
|
+extern int netfront_accelerator_loaded(int version, const char *frontend,
|
|
+ struct netfront_accel_hooks *hooks);
|
|
+
|
|
+/*
|
|
+ * Called by an accelerator plugin module when it is about to unload.
|
|
+ *
|
|
+ * frontend: the string describing the accelerator. Must match the
|
|
+ * one passed to netfront_accelerator_loaded()
|
|
+ */
|
|
+extern void netfront_accelerator_stop(const char *frontend);
|
|
+
|
|
+/*
|
|
+ * Called by an accelerator before waking the net device's TX queue to
|
|
+ * ensure the slow path has available slots. Returns true if OK to
|
|
+ * wake, false if still busy
|
|
+ */
|
|
+extern int netfront_check_queue_ready(struct net_device *net_dev);
|
|
+
|
|
+
|
|
+/* Internal-to-netfront Functions */
|
|
+
|
|
+/*
|
|
+ * Call into accelerator and check to see if it has tx space before we
|
|
+ * wake the net device's TX queue. Returns true if OK to wake, false
|
|
+ * if still busy
|
|
+ */
|
|
+extern
|
|
+int netfront_check_accelerator_queue_ready(struct net_device *dev,
|
|
+ struct netfront_info *np);
|
|
+extern
|
|
+int netfront_accelerator_call_remove(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+int netfront_accelerator_suspend(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+int netfront_accelerator_suspend_cancel(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+void netfront_accelerator_resume(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+extern
|
|
+void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
|
|
+ struct net_device *dev);
|
|
+extern
|
|
+int netfront_accelerator_call_get_stats(struct netfront_info *np,
|
|
+ struct net_device *dev);
|
|
+extern
|
|
+void netfront_accelerator_add_watch(struct netfront_info *np);
|
|
+
|
|
+extern
|
|
+void netif_init_accel(void);
|
|
+extern
|
|
+void netif_exit_accel(void);
|
|
+
|
|
+extern
|
|
+void init_accelerator_vif(struct netfront_info *np,
|
|
+ struct xenbus_device *dev);
|
|
+#endif /* NETFRONT_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200
|
|
@@ -0,0 +1,17 @@
|
|
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
|
|
+
|
|
+pciback-y := pci_stub.o pciback_ops.o xenbus.o
|
|
+pciback-y += conf_space.o conf_space_header.o \
|
|
+ conf_space_capability.o \
|
|
+ conf_space_capability_vpd.o \
|
|
+ conf_space_capability_pm.o \
|
|
+ conf_space_quirks.o
|
|
+pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
|
|
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
|
|
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
|
|
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
|
|
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
|
|
+
|
|
+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
|
|
+EXTRA_CFLAGS += -DDEBUG
|
|
+endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space.c 2009-05-04 10:01:03.000000000 +0200
|
|
@@ -0,0 +1,435 @@
|
|
+/*
|
|
+ * PCI Backend - Functions for creating a virtual configuration space for
|
|
+ * exported PCI Devices.
|
|
+ * It's dangerous to allow PCI Driver Domains to change their
|
|
+ * device's resources (memory, i/o ports, interrupts). We need to
|
|
+ * restrict changes to certain PCI Configuration registers:
|
|
+ * BARs, INTERRUPT_PIN, most registers in the header...
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+static int permissive;
|
|
+module_param(permissive, bool, 0644);
|
|
+
|
|
+#define DEFINE_PCI_CONFIG(op,size,type) \
|
|
+int pciback_##op##_config_##size \
|
|
+(struct pci_dev *dev, int offset, type value, void *data) \
|
|
+{ \
|
|
+ return pci_##op##_config_##size (dev, offset, value); \
|
|
+}
|
|
+
|
|
+DEFINE_PCI_CONFIG(read, byte, u8 *)
|
|
+DEFINE_PCI_CONFIG(read, word, u16 *)
|
|
+DEFINE_PCI_CONFIG(read, dword, u32 *)
|
|
+
|
|
+DEFINE_PCI_CONFIG(write, byte, u8)
|
|
+DEFINE_PCI_CONFIG(write, word, u16)
|
|
+DEFINE_PCI_CONFIG(write, dword, u32)
|
|
+
|
|
+static int conf_space_read(struct pci_dev *dev,
|
|
+ const struct config_field_entry *entry,
|
|
+ int offset, u32 *value)
|
|
+{
|
|
+ int ret = 0;
|
|
+ const struct config_field *field = entry->field;
|
|
+
|
|
+ *value = 0;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ if (field->u.b.read)
|
|
+ ret = field->u.b.read(dev, offset, (u8 *) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 2:
|
|
+ if (field->u.w.read)
|
|
+ ret = field->u.w.read(dev, offset, (u16 *) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 4:
|
|
+ if (field->u.dw.read)
|
|
+ ret = field->u.dw.read(dev, offset, value, entry->data);
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int conf_space_write(struct pci_dev *dev,
|
|
+ const struct config_field_entry *entry,
|
|
+ int offset, u32 value)
|
|
+{
|
|
+ int ret = 0;
|
|
+ const struct config_field *field = entry->field;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ if (field->u.b.write)
|
|
+ ret = field->u.b.write(dev, offset, (u8) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 2:
|
|
+ if (field->u.w.write)
|
|
+ ret = field->u.w.write(dev, offset, (u16) value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ case 4:
|
|
+ if (field->u.dw.write)
|
|
+ ret = field->u.dw.write(dev, offset, value,
|
|
+ entry->data);
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline u32 get_mask(int size)
|
|
+{
|
|
+ if (size == 1)
|
|
+ return 0xff;
|
|
+ else if (size == 2)
|
|
+ return 0xffff;
|
|
+ else
|
|
+ return 0xffffffff;
|
|
+}
|
|
+
|
|
+static inline int valid_request(int offset, int size)
|
|
+{
|
|
+ /* Validate request (no un-aligned requests) */
|
|
+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
|
|
+ int offset)
|
|
+{
|
|
+ if (offset >= 0) {
|
|
+ new_val_mask <<= (offset * 8);
|
|
+ new_val <<= (offset * 8);
|
|
+ } else {
|
|
+ new_val_mask >>= (offset * -8);
|
|
+ new_val >>= (offset * -8);
|
|
+ }
|
|
+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
|
|
+
|
|
+ return val;
|
|
+}
|
|
+
|
|
+static int pcibios_err_to_errno(int err)
|
|
+{
|
|
+ switch (err) {
|
|
+ case PCIBIOS_SUCCESSFUL:
|
|
+ return XEN_PCI_ERR_success;
|
|
+ case PCIBIOS_DEVICE_NOT_FOUND:
|
|
+ return XEN_PCI_ERR_dev_not_found;
|
|
+ case PCIBIOS_BAD_REGISTER_NUMBER:
|
|
+ return XEN_PCI_ERR_invalid_offset;
|
|
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
|
|
+ return XEN_PCI_ERR_not_implemented;
|
|
+ case PCIBIOS_SET_FAILED:
|
|
+ return XEN_PCI_ERR_access_denied;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
|
|
+ u32 * ret_val)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+ int req_start, req_end, field_start, field_end;
|
|
+ /* if read fails for any reason, return 0 (as if device didn't respond) */
|
|
+ u32 value = 0, tmp_val;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
|
|
+ pci_name(dev), size, offset);
|
|
+
|
|
+ if (!valid_request(offset, size)) {
|
|
+ err = XEN_PCI_ERR_invalid_offset;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Get the real value first, then modify as appropriate */
|
|
+ switch (size) {
|
|
+ case 1:
|
|
+ err = pci_read_config_byte(dev, offset, (u8 *) & value);
|
|
+ break;
|
|
+ case 2:
|
|
+ err = pci_read_config_word(dev, offset, (u16 *) & value);
|
|
+ break;
|
|
+ case 4:
|
|
+ err = pci_read_config_dword(dev, offset, &value);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ req_start = offset;
|
|
+ req_end = offset + size;
|
|
+ field_start = OFFSET(cfg_entry);
|
|
+ field_end = OFFSET(cfg_entry) + field->size;
|
|
+
|
|
+ if ((req_start >= field_start && req_start < field_end)
|
|
+ || (req_end > field_start && req_end <= field_end)) {
|
|
+ err = conf_space_read(dev, cfg_entry, field_start,
|
|
+ &tmp_val);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ value = merge_value(value, tmp_val,
|
|
+ get_mask(field->size),
|
|
+ field_start - req_start);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
|
|
+ pci_name(dev), size, offset, value);
|
|
+
|
|
+ *ret_val = value;
|
|
+ return pcibios_err_to_errno(err);
|
|
+}
|
|
+
|
|
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
|
|
+{
|
|
+ int err = 0, handled = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+ u32 tmp_val;
|
|
+ int req_start, req_end, field_start, field_end;
|
|
+
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG
|
|
+ "pciback: %s: write request %d bytes at 0x%x = %x\n",
|
|
+ pci_name(dev), size, offset, value);
|
|
+
|
|
+ if (!valid_request(offset, size))
|
|
+ return XEN_PCI_ERR_invalid_offset;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ req_start = offset;
|
|
+ req_end = offset + size;
|
|
+ field_start = OFFSET(cfg_entry);
|
|
+ field_end = OFFSET(cfg_entry) + field->size;
|
|
+
|
|
+ if ((req_start >= field_start && req_start < field_end)
|
|
+ || (req_end > field_start && req_end <= field_end)) {
|
|
+ tmp_val = 0;
|
|
+
|
|
+ err = pciback_config_read(dev, field_start,
|
|
+ field->size, &tmp_val);
|
|
+ if (err)
|
|
+ break;
|
|
+
|
|
+ tmp_val = merge_value(tmp_val, value, get_mask(size),
|
|
+ req_start - field_start);
|
|
+
|
|
+ err = conf_space_write(dev, cfg_entry, field_start,
|
|
+ tmp_val);
|
|
+
|
|
+ /* handled is set true here, but not every byte
|
|
+ * may have been written! Properly detecting if
|
|
+ * every byte is handled is unnecessary as the
|
|
+ * flag is used to detect devices that need
|
|
+ * special helpers to work correctly.
|
|
+ */
|
|
+ handled = 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!handled && !err) {
|
|
+ /* By default, anything not specificially handled above is
|
|
+ * read-only. The permissive flag changes this behavior so
|
|
+ * that anything not specifically handled above is writable.
|
|
+ * This means that some fields may still be read-only because
|
|
+ * they have entries in the config_field list that intercept
|
|
+ * the write and do nothing. */
|
|
+ if (dev_data->permissive || permissive) {
|
|
+ switch (size) {
|
|
+ case 1:
|
|
+ err = pci_write_config_byte(dev, offset,
|
|
+ (u8) value);
|
|
+ break;
|
|
+ case 2:
|
|
+ err = pci_write_config_word(dev, offset,
|
|
+ (u16) value);
|
|
+ break;
|
|
+ case 4:
|
|
+ err = pci_write_config_dword(dev, offset,
|
|
+ (u32) value);
|
|
+ break;
|
|
+ }
|
|
+ } else if (!dev_data->warned_on_write) {
|
|
+ dev_data->warned_on_write = 1;
|
|
+ dev_warn(&dev->dev, "Driver tried to write to a "
|
|
+ "read-only configuration space field at offset "
|
|
+ "0x%x, size %d. This may be harmless, but if "
|
|
+ "you have problems with your device:\n"
|
|
+ "1) see permissive attribute in sysfs\n"
|
|
+ "2) report problems to the xen-devel "
|
|
+ "mailing list along with details of your "
|
|
+ "device obtained from lspci.\n", offset, size);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pcibios_err_to_errno(err);
|
|
+}
|
|
+
|
|
+void pciback_config_free_dyn_fields(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry, *t;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev,
|
|
+ "free-ing dynamically allocated virtual configuration space fields\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->clean) {
|
|
+ field->clean((struct config_field *)field);
|
|
+
|
|
+ if (cfg_entry->data)
|
|
+ kfree(cfg_entry->data);
|
|
+
|
|
+ list_del(&cfg_entry->list);
|
|
+ kfree(cfg_entry);
|
|
+ }
|
|
+
|
|
+ }
|
|
+}
|
|
+
|
|
+void pciback_config_reset_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->reset)
|
|
+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
|
|
+ }
|
|
+}
|
|
+
|
|
+void pciback_config_free_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry, *t;
|
|
+ const struct config_field *field;
|
|
+
|
|
+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
|
|
+ if (!dev_data)
|
|
+ return;
|
|
+
|
|
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
|
+ list_del(&cfg_entry->list);
|
|
+
|
|
+ field = cfg_entry->field;
|
|
+
|
|
+ if (field->release)
|
|
+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
|
|
+
|
|
+ kfree(cfg_entry);
|
|
+ }
|
|
+}
|
|
+
|
|
+int pciback_config_add_field_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int base_offset)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry;
|
|
+ void *tmp;
|
|
+
|
|
+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
|
|
+ if (!cfg_entry) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cfg_entry->data = NULL;
|
|
+ cfg_entry->field = field;
|
|
+ cfg_entry->base_offset = base_offset;
|
|
+
|
|
+ /* silently ignore duplicate fields */
|
|
+ err = pciback_field_is_dup(dev,OFFSET(cfg_entry));
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if (field->init) {
|
|
+ tmp = field->init(dev, OFFSET(cfg_entry));
|
|
+
|
|
+ if (IS_ERR(tmp)) {
|
|
+ err = PTR_ERR(tmp);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cfg_entry->data = tmp;
|
|
+ }
|
|
+
|
|
+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
|
|
+ OFFSET(cfg_entry));
|
|
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
|
|
+
|
|
+ out:
|
|
+ if (err)
|
|
+ kfree(cfg_entry);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* This sets up the device's virtual configuration space to keep track of
|
|
+ * certain registers (like the base address registers (BARs) so that we can
|
|
+ * keep the client from manipulating them directly.
|
|
+ */
|
|
+int pciback_config_init_dev(struct pci_dev *dev)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->config_fields);
|
|
+
|
|
+ err = pciback_config_header_add_fields(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pciback_config_capability_add_fields(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pciback_config_quirks_init(dev);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_init(void)
|
|
+{
|
|
+ return pciback_config_capability_init();
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space.h 2008-10-29 09:55:56.000000000 +0100
|
|
@@ -0,0 +1,126 @@
|
|
+/*
|
|
+ * PCI Backend - Common data structures for overriding the configuration space
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
|
|
+#define __XEN_PCIBACK_CONF_SPACE_H__
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/err.h>
|
|
+
|
|
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
|
|
+typedef void *(*conf_field_init) (struct pci_dev * dev, int offset);
|
|
+typedef void (*conf_field_reset) (struct pci_dev * dev, int offset, void *data);
|
|
+typedef void (*conf_field_free) (struct pci_dev * dev, int offset, void *data);
|
|
+
|
|
+typedef int (*conf_dword_write) (struct pci_dev * dev, int offset, u32 value,
|
|
+ void *data);
|
|
+typedef int (*conf_word_write) (struct pci_dev * dev, int offset, u16 value,
|
|
+ void *data);
|
|
+typedef int (*conf_byte_write) (struct pci_dev * dev, int offset, u8 value,
|
|
+ void *data);
|
|
+typedef int (*conf_dword_read) (struct pci_dev * dev, int offset, u32 * value,
|
|
+ void *data);
|
|
+typedef int (*conf_word_read) (struct pci_dev * dev, int offset, u16 * value,
|
|
+ void *data);
|
|
+typedef int (*conf_byte_read) (struct pci_dev * dev, int offset, u8 * value,
|
|
+ void *data);
|
|
+
|
|
+/* These are the fields within the configuration space which we
|
|
+ * are interested in intercepting reads/writes to and changing their
|
|
+ * values.
|
|
+ */
|
|
+struct config_field {
|
|
+ unsigned int offset;
|
|
+ unsigned int size;
|
|
+ unsigned int mask;
|
|
+ conf_field_init init;
|
|
+ conf_field_reset reset;
|
|
+ conf_field_free release;
|
|
+ void (*clean) (struct config_field * field);
|
|
+ union {
|
|
+ struct {
|
|
+ conf_dword_write write;
|
|
+ conf_dword_read read;
|
|
+ } dw;
|
|
+ struct {
|
|
+ conf_word_write write;
|
|
+ conf_word_read read;
|
|
+ } w;
|
|
+ struct {
|
|
+ conf_byte_write write;
|
|
+ conf_byte_read read;
|
|
+ } b;
|
|
+ } u;
|
|
+ struct list_head list;
|
|
+};
|
|
+
|
|
+struct config_field_entry {
|
|
+ struct list_head list;
|
|
+ const struct config_field *field;
|
|
+ unsigned int base_offset;
|
|
+ void *data;
|
|
+};
|
|
+
|
|
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
|
|
+
|
|
+/* Add fields to a device - the add_fields macro expects to get a pointer to
|
|
+ * the first entry in an array (of which the ending is marked by size==0)
|
|
+ */
|
|
+int pciback_config_add_field_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int offset);
|
|
+
|
|
+static inline int pciback_config_add_field(struct pci_dev *dev,
|
|
+ const struct config_field *field)
|
|
+{
|
|
+ return pciback_config_add_field_offset(dev, field, 0);
|
|
+}
|
|
+
|
|
+static inline int pciback_config_add_fields(struct pci_dev *dev,
|
|
+ const struct config_field *field)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ for (i = 0; field[i].size != 0; i++) {
|
|
+ err = pciback_config_add_field(dev, &field[i]);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
|
|
+ const struct config_field *field,
|
|
+ unsigned int offset)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ for (i = 0; field[i].size != 0; i++) {
|
|
+ err = pciback_config_add_field_offset(dev, &field[i], offset);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* Read/Write the real configuration space */
|
|
+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 * value,
|
|
+ void *data);
|
|
+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 * value,
|
|
+ void *data);
|
|
+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 * value,
|
|
+ void *data);
|
|
+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
|
|
+ void *data);
|
|
+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
|
|
+ void *data);
|
|
+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
|
|
+ void *data);
|
|
+
|
|
+int pciback_config_capability_init(void);
|
|
+
|
|
+int pciback_config_header_add_fields(struct pci_dev *dev);
|
|
+int pciback_config_capability_add_fields(struct pci_dev *dev);
|
|
+
|
|
+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_capability.c 2008-10-29 09:55:56.000000000 +0100
|
|
@@ -0,0 +1,69 @@
|
|
+/*
|
|
+ * PCI Backend - Handles the virtual fields found on the capability lists
|
|
+ * in the configuration space.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static LIST_HEAD(capabilities);
|
|
+
|
|
+static const struct config_field caplist_header[] = {
|
|
+ {
|
|
+ .offset = PCI_CAP_LIST_ID,
|
|
+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = NULL,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+static inline void register_capability(struct pciback_config_capability *cap)
|
|
+{
|
|
+ list_add_tail(&cap->cap_list, &capabilities);
|
|
+}
|
|
+
|
|
+int pciback_config_capability_add_fields(struct pci_dev *dev)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_config_capability *cap;
|
|
+ int cap_offset;
|
|
+
|
|
+ list_for_each_entry(cap, &capabilities, cap_list) {
|
|
+ cap_offset = pci_find_capability(dev, cap->capability);
|
|
+ if (cap_offset) {
|
|
+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
|
|
+ cap->capability, cap_offset);
|
|
+
|
|
+ err = pciback_config_add_fields_offset(dev,
|
|
+ caplist_header,
|
|
+ cap_offset);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ err = pciback_config_add_fields_offset(dev,
|
|
+ cap->fields,
|
|
+ cap_offset);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+extern struct pciback_config_capability pciback_config_capability_vpd;
|
|
+extern struct pciback_config_capability pciback_config_capability_pm;
|
|
+
|
|
+int pciback_config_capability_init(void)
|
|
+{
|
|
+ register_capability(&pciback_config_capability_vpd);
|
|
+ register_capability(&pciback_config_capability_pm);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_capability.h 2008-10-29 09:55:56.000000000 +0100
|
|
@@ -0,0 +1,23 @@
|
|
+/*
|
|
+ * PCI Backend - Data structures for special overlays for structures on
|
|
+ * the capability list.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
|
|
+#define __PCIBACK_CONFIG_CAPABILITY_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+struct pciback_config_capability {
|
|
+ struct list_head cap_list;
|
|
+
|
|
+ int capability;
|
|
+
|
|
+ /* If the device has the capability found above, add these fields */
|
|
+ const struct config_field *fields;
|
|
+};
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200
|
|
@@ -0,0 +1,79 @@
|
|
+/*
|
|
+ * PCI Backend -- Configuration overlay for MSI capability
|
|
+ */
|
|
+#include <linux/pci.h>
|
|
+#include <linux/slab.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+#include <xen/interface/io/pciif.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+int pciback_enable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ int otherend = pdev->xdev->otherend_id;
|
|
+ int status;
|
|
+
|
|
+ status = pci_enable_msi(dev);
|
|
+
|
|
+ if (status) {
|
|
+ printk("error enable msi for guest %x status %x\n", otherend, status);
|
|
+ op->value = 0;
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ op->value = dev->irq;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_disable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ pci_disable_msi(dev);
|
|
+
|
|
+ op->value = dev->irq;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_enable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+ int i, result;
|
|
+ struct msix_entry *entries;
|
|
+
|
|
+ if (op->value > SH_INFO_MAX_VEC)
|
|
+ return -EINVAL;
|
|
+
|
|
+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
|
|
+ if (entries == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ for (i = 0; i < op->value; i++) {
|
|
+ entries[i].entry = op->msix_entries[i].entry;
|
|
+ entries[i].vector = op->msix_entries[i].vector;
|
|
+ }
|
|
+
|
|
+ result = pci_enable_msix(dev, entries, op->value);
|
|
+
|
|
+ for (i = 0; i < op->value; i++) {
|
|
+ op->msix_entries[i].entry = entries[i].entry;
|
|
+ op->msix_entries[i].vector = entries[i].vector;
|
|
+ }
|
|
+
|
|
+ kfree(entries);
|
|
+
|
|
+ op->value = result;
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
+int pciback_disable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op)
|
|
+{
|
|
+
|
|
+ pci_disable_msix(dev);
|
|
+
|
|
+ op->value = dev->irq;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_capability_pm.c 2008-10-29 09:55:56.000000000 +0100
|
|
@@ -0,0 +1,126 @@
|
|
+/*
|
|
+ * PCI Backend - Configuration space overlay for power management
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
|
|
+ void *data)
|
|
+{
|
|
+ int err;
|
|
+ u16 real_value;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &real_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
|
|
+ * Can't allow driver domain to enable PMEs - they're shared */
|
|
+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
|
|
+
|
|
+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
|
|
+ void *data)
|
|
+{
|
|
+ int err;
|
|
+ u16 old_value;
|
|
+ pci_power_t new_state, old_state;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &old_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
|
|
+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
|
|
+
|
|
+ new_value &= PM_OK_BITS;
|
|
+ if ((old_value & PM_OK_BITS) != new_value) {
|
|
+ new_value = (old_value & ~PM_OK_BITS) | new_value;
|
|
+ err = pci_write_config_word(dev, offset, new_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Let pci core handle the power management change */
|
|
+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
|
|
+ err = pci_set_power_state(dev, new_state);
|
|
+ if (err) {
|
|
+ err = PCIBIOS_SET_FAILED;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Device may lose PCI config info on D3->D0 transition. This
|
|
+ * is a problem for some guests which will not reset BARs. Even
|
|
+ * those that have a go will be foiled by our BAR-write handler
|
|
+ * which will discard the write! Since Linux won't re-init
|
|
+ * the config space automatically in all cases, we do it here.
|
|
+ * Future: Should we re-initialise all first 64 bytes of config space?
|
|
+ */
|
|
+ if (new_state == PCI_D0 &&
|
|
+ (old_state == PCI_D3hot || old_state == PCI_D3cold) &&
|
|
+ !(old_value & PCI_PM_CTRL_NO_SOFT_RESET))
|
|
+ pci_restore_bars(dev);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* Ensure PMEs are disabled */
|
|
+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ int err;
|
|
+ u16 value;
|
|
+
|
|
+ err = pci_read_config_word(dev, offset, &value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if (value & PCI_PM_CTRL_PME_ENABLE) {
|
|
+ value &= ~PCI_PM_CTRL_PME_ENABLE;
|
|
+ err = pci_write_config_word(dev, offset, value);
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ return ERR_PTR(err);
|
|
+}
|
|
+
|
|
+static const struct config_field caplist_pm[] = {
|
|
+ {
|
|
+ .offset = PCI_PM_PMC,
|
|
+ .size = 2,
|
|
+ .u.w.read = pm_caps_read,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_CTRL,
|
|
+ .size = 2,
|
|
+ .init = pm_ctrl_init,
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = pm_ctrl_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_PPB_EXTENSIONS,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_PM_DATA_REGISTER,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+struct pciback_config_capability pciback_config_capability_pm = {
|
|
+ .capability = PCI_CAP_ID_PM,
|
|
+ .fields = caplist_pm,
|
|
+};
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_capability_vpd.c 2008-10-29 09:55:56.000000000 +0100
|
|
@@ -0,0 +1,40 @@
|
|
+/*
|
|
+ * PCI Backend - Configuration space overlay for Vital Product Data
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_capability.h"
|
|
+
|
|
+static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
|
|
+ void *data)
|
|
+{
|
|
+ /* Disallow writes to the vital product data */
|
|
+ if (value & PCI_VPD_ADDR_F)
|
|
+ return PCIBIOS_SET_FAILED;
|
|
+ else
|
|
+ return pci_write_config_word(dev, offset, value);
|
|
+}
|
|
+
|
|
+static const struct config_field caplist_vpd[] = {
|
|
+ {
|
|
+ .offset = PCI_VPD_ADDR,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_config_word,
|
|
+ .u.w.write = vpd_address_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_VPD_DATA,
|
|
+ .size = 4,
|
|
+ .u.dw.read = pciback_read_config_dword,
|
|
+ .u.dw.write = NULL,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+struct pciback_config_capability pciback_config_capability_vpd = {
|
|
+ .capability = PCI_CAP_ID_VPD,
|
|
+ .fields = caplist_vpd,
|
|
+};
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_header.c 2010-03-02 09:56:10.000000000 +0100
|
|
@@ -0,0 +1,378 @@
|
|
+/*
|
|
+ * PCI Backend - Handles the virtual fields in the configuration space headers.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+
|
|
+struct pci_bar_info {
|
|
+ u32 val;
|
|
+ u32 len_val;
|
|
+ int which;
|
|
+};
|
|
+
|
|
+#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
|
|
+#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
|
|
+
|
|
+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
|
|
+{
|
|
+ int i;
|
|
+ int ret;
|
|
+
|
|
+ ret = pciback_read_config_word(dev, offset, value, data);
|
|
+ if (!dev->is_enabled)
|
|
+ return ret;
|
|
+
|
|
+ for (i = 0; i < PCI_ROM_RESOURCE; i++) {
|
|
+ if (dev->resource[i].flags & IORESOURCE_IO)
|
|
+ *value |= PCI_COMMAND_IO;
|
|
+ if (dev->resource[i].flags & IORESOURCE_MEM)
|
|
+ *value |= PCI_COMMAND_MEMORY;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!dev->is_enabled && is_enable_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: enable\n",
|
|
+ pci_name(dev));
|
|
+ err = pci_enable_device(dev);
|
|
+ if (err)
|
|
+ return err;
|
|
+ } else if (dev->is_enabled && !is_enable_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: disable\n",
|
|
+ pci_name(dev));
|
|
+ pci_disable_device(dev);
|
|
+ }
|
|
+
|
|
+ if (!dev->is_busmaster && is_master_cmd(value)) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG "pciback: %s: set bus master\n",
|
|
+ pci_name(dev));
|
|
+ pci_set_master(dev);
|
|
+ }
|
|
+
|
|
+ if (value & PCI_COMMAND_INVALIDATE) {
|
|
+ if (unlikely(verbose_request))
|
|
+ printk(KERN_DEBUG
|
|
+ "pciback: %s: enable memory-write-invalidate\n",
|
|
+ pci_name(dev));
|
|
+ err = pci_set_mwi(dev);
|
|
+ if (err) {
|
|
+ printk(KERN_WARNING
|
|
+ "pciback: %s: cannot enable memory-write-invalidate (%d)\n",
|
|
+ pci_name(dev), err);
|
|
+ value &= ~PCI_COMMAND_INVALIDATE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pci_write_config_word(dev, offset, value);
|
|
+}
|
|
+
|
|
+static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ /* A write to obtain the length must happen as a 32-bit write.
|
|
+ * This does not (yet) support writing individual bytes
|
|
+ */
|
|
+ if (value == ~PCI_ROM_ADDRESS_ENABLE)
|
|
+ bar->which = 1;
|
|
+ else {
|
|
+ u32 tmpval;
|
|
+ pci_read_config_dword(dev, offset, &tmpval);
|
|
+ if (tmpval != bar->val && value == bar->val) {
|
|
+ /* Allow restoration of bar value. */
|
|
+ pci_write_config_dword(dev, offset, bar->val);
|
|
+ }
|
|
+ bar->which = 0;
|
|
+ }
|
|
+
|
|
+ /* Do we need to support enabling/disabling the rom address here? */
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* For the BARs, only allow writes which write ~0 or
|
|
+ * the correct resource information
|
|
+ * (Needed for when the driver probes the resource usage)
|
|
+ */
|
|
+static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ /* A write to obtain the length must happen as a 32-bit write.
|
|
+ * This does not (yet) support writing individual bytes
|
|
+ */
|
|
+ if (value == ~0)
|
|
+ bar->which = 1;
|
|
+ else {
|
|
+ u32 tmpval;
|
|
+ pci_read_config_dword(dev, offset, &tmpval);
|
|
+ if (tmpval != bar->val && value == bar->val) {
|
|
+ /* Allow restoration of bar value. */
|
|
+ pci_write_config_dword(dev, offset, bar->val);
|
|
+ }
|
|
+ bar->which = 0;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ if (unlikely(!bar)) {
|
|
+ printk(KERN_WARNING "pciback: driver data not found for %s\n",
|
|
+ pci_name(dev));
|
|
+ return XEN_PCI_ERR_op_failed;
|
|
+ }
|
|
+
|
|
+ *value = bar->which ? bar->len_val : bar->val;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline void read_dev_bar(struct pci_dev *dev,
|
|
+ struct pci_bar_info *bar_info, int offset,
|
|
+ u32 len_mask)
|
|
+{
|
|
+ int pos;
|
|
+ struct resource *res = dev->resource;
|
|
+
|
|
+ if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
|
|
+ pos = PCI_ROM_RESOURCE;
|
|
+ else {
|
|
+ pos = (offset - PCI_BASE_ADDRESS_0) / 4;
|
|
+ if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
|
|
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
|
|
+ (PCI_BASE_ADDRESS_SPACE_MEMORY |
|
|
+ PCI_BASE_ADDRESS_MEM_TYPE_64))) {
|
|
+ bar_info->val = res[pos - 1].start >> 32;
|
|
+ bar_info->len_val = res[pos - 1].end >> 32;
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ bar_info->val = res[pos].start |
|
|
+ (res[pos].flags & PCI_REGION_FLAG_MASK);
|
|
+ bar_info->len_val = res[pos].end - res[pos].start + 1;
|
|
+}
|
|
+
|
|
+static void *bar_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
|
+
|
|
+ if (!bar)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ read_dev_bar(dev, bar, offset, ~0);
|
|
+ bar->which = 0;
|
|
+
|
|
+ return bar;
|
|
+}
|
|
+
|
|
+static void *rom_init(struct pci_dev *dev, int offset)
|
|
+{
|
|
+ struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
|
+
|
|
+ if (!bar)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
|
|
+ bar->which = 0;
|
|
+
|
|
+ return bar;
|
|
+}
|
|
+
|
|
+static void bar_reset(struct pci_dev *dev, int offset, void *data)
|
|
+{
|
|
+ struct pci_bar_info *bar = data;
|
|
+
|
|
+ bar->which = 0;
|
|
+}
|
|
+
|
|
+static void bar_release(struct pci_dev *dev, int offset, void *data)
|
|
+{
|
|
+ kfree(data);
|
|
+}
|
|
+
|
|
+static int pciback_read_vendor(struct pci_dev *dev, int offset,
|
|
+ u16 *value, void *data)
|
|
+{
|
|
+ *value = dev->vendor;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pciback_read_device(struct pci_dev *dev, int offset,
|
|
+ u16 *value, void *data)
|
|
+{
|
|
+ *value = dev->device;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
|
|
+ void *data)
|
|
+{
|
|
+ *value = (u8) dev->irq;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
|
|
+{
|
|
+ u8 cur_value;
|
|
+ int err;
|
|
+
|
|
+ err = pci_read_config_byte(dev, offset, &cur_value);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
|
|
+ || value == PCI_BIST_START)
|
|
+ err = pci_write_config_byte(dev, offset, value);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static const struct config_field header_common[] = {
|
|
+ {
|
|
+ .offset = PCI_VENDOR_ID,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_vendor,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_DEVICE_ID,
|
|
+ .size = 2,
|
|
+ .u.w.read = pciback_read_device,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_COMMAND,
|
|
+ .size = 2,
|
|
+ .u.w.read = command_read,
|
|
+ .u.w.write = command_write,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_INTERRUPT_LINE,
|
|
+ .size = 1,
|
|
+ .u.b.read = interrupt_read,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_INTERRUPT_PIN,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ /* Any side effects of letting driver domain control cache line? */
|
|
+ .offset = PCI_CACHE_LINE_SIZE,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ .u.b.write = pciback_write_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_LATENCY_TIMER,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ },
|
|
+ {
|
|
+ .offset = PCI_BIST,
|
|
+ .size = 1,
|
|
+ .u.b.read = pciback_read_config_byte,
|
|
+ .u.b.write = bist_write,
|
|
+ },
|
|
+ {}
|
|
+};
|
|
+
|
|
+#define CFG_FIELD_BAR(reg_offset) \
|
|
+ { \
|
|
+ .offset = reg_offset, \
|
|
+ .size = 4, \
|
|
+ .init = bar_init, \
|
|
+ .reset = bar_reset, \
|
|
+ .release = bar_release, \
|
|
+ .u.dw.read = bar_read, \
|
|
+ .u.dw.write = bar_write, \
|
|
+ }
|
|
+
|
|
+#define CFG_FIELD_ROM(reg_offset) \
|
|
+ { \
|
|
+ .offset = reg_offset, \
|
|
+ .size = 4, \
|
|
+ .init = rom_init, \
|
|
+ .reset = bar_reset, \
|
|
+ .release = bar_release, \
|
|
+ .u.dw.read = bar_read, \
|
|
+ .u.dw.write = rom_write, \
|
|
+ }
|
|
+
|
|
+static const struct config_field header_0[] = {
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
|
|
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS),
|
|
+ {}
|
|
+};
|
|
+
|
|
+static const struct config_field header_1[] = {
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
|
+ CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
|
+ CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
|
|
+ {}
|
|
+};
|
|
+
|
|
+int pciback_config_header_add_fields(struct pci_dev *dev)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = pciback_config_add_fields(dev, header_common);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ switch (dev->hdr_type) {
|
|
+ case PCI_HEADER_TYPE_NORMAL:
|
|
+ err = pciback_config_add_fields(dev, header_0);
|
|
+ break;
|
|
+
|
|
+ case PCI_HEADER_TYPE_BRIDGE:
|
|
+ err = pciback_config_add_fields(dev, header_1);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ err = -EINVAL;
|
|
+ printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n",
|
|
+ pci_name(dev), dev->hdr_type);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_quirks.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,138 @@
|
|
+/*
|
|
+ * PCI Backend - Handle special overlays for broken devices.
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Author: Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/pci.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+LIST_HEAD(pciback_quirks);
|
|
+
|
|
+static inline const struct pci_device_id *
|
|
+match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
|
|
+{
|
|
+ if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
|
|
+ (id->device == PCI_ANY_ID || id->device == dev->device) &&
|
|
+ (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) &&
|
|
+ (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) &&
|
|
+ !((id->class ^ dev->class) & id->class_mask))
|
|
+ return id;
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *tmp_quirk;
|
|
+
|
|
+ list_for_each_entry(tmp_quirk, &pciback_quirks, quirks_list)
|
|
+ if (match_one_device(&tmp_quirk->devid, dev) != NULL)
|
|
+ goto out;
|
|
+ tmp_quirk = NULL;
|
|
+ printk(KERN_DEBUG
|
|
+ "quirk didn't match any device pciback knows about\n");
|
|
+ out:
|
|
+ return tmp_quirk;
|
|
+}
|
|
+
|
|
+static inline void register_quirk(struct pciback_config_quirk *quirk)
|
|
+{
|
|
+ list_add_tail(&quirk->quirks_list, &pciback_quirks);
|
|
+}
|
|
+
|
|
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg)
|
|
+{
|
|
+ int ret = 0;
|
|
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
|
|
+ struct config_field_entry *cfg_entry;
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ if ( OFFSET(cfg_entry) == reg) {
|
|
+ ret = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
|
+ *field)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ switch (field->size) {
|
|
+ case 1:
|
|
+ field->u.b.read = pciback_read_config_byte;
|
|
+ field->u.b.write = pciback_write_config_byte;
|
|
+ break;
|
|
+ case 2:
|
|
+ field->u.w.read = pciback_read_config_word;
|
|
+ field->u.w.write = pciback_write_config_word;
|
|
+ break;
|
|
+ case 4:
|
|
+ field->u.dw.read = pciback_read_config_dword;
|
|
+ field->u.dw.write = pciback_write_config_dword;
|
|
+ break;
|
|
+ default:
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pciback_config_add_field(dev, field);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int pciback_config_quirks_init(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ int ret = 0;
|
|
+
|
|
+ quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
|
|
+ if (!quirk) {
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ quirk->devid.vendor = dev->vendor;
|
|
+ quirk->devid.device = dev->device;
|
|
+ quirk->devid.subvendor = dev->subsystem_vendor;
|
|
+ quirk->devid.subdevice = dev->subsystem_device;
|
|
+ quirk->devid.class = 0;
|
|
+ quirk->devid.class_mask = 0;
|
|
+ quirk->devid.driver_data = 0UL;
|
|
+
|
|
+ quirk->pdev = dev;
|
|
+
|
|
+ register_quirk(quirk);
|
|
+ out:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pciback_config_field_free(struct config_field *field)
|
|
+{
|
|
+ kfree(field);
|
|
+}
|
|
+
|
|
+int pciback_config_quirk_release(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ int ret = 0;
|
|
+
|
|
+ quirk = pciback_find_quirk(dev);
|
|
+ if (!quirk) {
|
|
+ ret = -ENXIO;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ list_del(&quirk->quirks_list);
|
|
+ kfree(quirk);
|
|
+
|
|
+ out:
|
|
+ return ret;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/conf_space_quirks.h 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,35 @@
|
|
+/*
|
|
+ * PCI Backend - Data structures for special overlays for broken devices.
|
|
+ *
|
|
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
|
+#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+struct pciback_config_quirk {
|
|
+ struct list_head quirks_list;
|
|
+ struct pci_device_id devid;
|
|
+ struct pci_dev *pdev;
|
|
+};
|
|
+
|
|
+struct pciback_config_quirk *pciback_find_quirk(struct pci_dev *dev);
|
|
+
|
|
+int pciback_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
|
+ *field);
|
|
+
|
|
+int pciback_config_quirks_remove_field(struct pci_dev *dev, int reg);
|
|
+
|
|
+int pciback_config_quirks_init(struct pci_dev *dev);
|
|
+
|
|
+void pciback_config_field_free(struct config_field *field);
|
|
+
|
|
+int pciback_config_quirk_release(struct pci_dev *dev);
|
|
+
|
|
+int pciback_field_is_dup(struct pci_dev *dev, unsigned int reg);
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/controller.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,443 @@
|
|
+/*
|
|
+ * Copyright (C) 2007 Hewlett-Packard Development Company, L.P.
|
|
+ * Alex Williamson <alex.williamson@hp.com>
|
|
+ *
|
|
+ * PCI "Controller" Backend - virtualize PCI bus topology based on PCI
|
|
+ * controllers. Devices under the same PCI controller are exposed on the
|
|
+ * same virtual domain:bus. Within a bus, device slots are virtualized
|
|
+ * to compact the bus.
|
|
+ *
|
|
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
+ */
|
|
+
|
|
+#include <linux/acpi.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define PCI_MAX_BUSSES 255
|
|
+#define PCI_MAX_SLOTS 32
|
|
+
|
|
+struct controller_dev_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_dev *dev;
|
|
+ unsigned int devfn;
|
|
+};
|
|
+
|
|
+struct controller_list_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_controller *controller;
|
|
+ unsigned int domain;
|
|
+ unsigned int bus;
|
|
+ unsigned int next_devfn;
|
|
+ struct list_head dev_list;
|
|
+};
|
|
+
|
|
+struct controller_dev_data {
|
|
+ struct list_head list;
|
|
+ unsigned int next_domain;
|
|
+ unsigned int next_bus;
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct walk_info {
|
|
+ struct pciback_device *pdev;
|
|
+ int resource_count;
|
|
+ int root_num;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->domain != domain ||
|
|
+ cntrl_entry->bus != bus)
|
|
+ continue;
|
|
+
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if (devfn == dev_entry->devfn) {
|
|
+ dev = dev_entry->dev;
|
|
+ goto found;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+found:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct pci_controller *dev_controller = PCI_CONTROLLER(dev);
|
|
+ unsigned long flags;
|
|
+ int ret = 0, found = 0;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ /* Look to see if we already have a domain:bus for this controller */
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->controller == dev_controller) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!found) {
|
|
+ cntrl_entry = kmalloc(sizeof(*cntrl_entry), GFP_ATOMIC);
|
|
+ if (!cntrl_entry) {
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cntrl_entry->controller = dev_controller;
|
|
+ cntrl_entry->next_devfn = PCI_DEVFN(0, 0);
|
|
+
|
|
+ cntrl_entry->domain = dev_data->next_domain;
|
|
+ cntrl_entry->bus = dev_data->next_bus++;
|
|
+ if (dev_data->next_bus > PCI_MAX_BUSSES) {
|
|
+ dev_data->next_domain++;
|
|
+ dev_data->next_bus = 0;
|
|
+ }
|
|
+
|
|
+ INIT_LIST_HEAD(&cntrl_entry->dev_list);
|
|
+
|
|
+ list_add_tail(&cntrl_entry->list, &dev_data->list);
|
|
+ }
|
|
+
|
|
+ if (PCI_SLOT(cntrl_entry->next_devfn) > PCI_MAX_SLOTS) {
|
|
+ /*
|
|
+ * While it seems unlikely, this can actually happen if
|
|
+ * a controller has P2P bridges under it.
|
|
+ */
|
|
+ xenbus_dev_fatal(pdev->xdev, -ENOSPC, "Virtual bus %04x:%02x "
|
|
+ "is full, no room to export %04x:%02x:%02x.%x",
|
|
+ cntrl_entry->domain, cntrl_entry->bus,
|
|
+ pci_domain_nr(dev->bus), dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
|
|
+ ret = -ENOSPC;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_ATOMIC);
|
|
+ if (!dev_entry) {
|
|
+ if (list_empty(&cntrl_entry->dev_list)) {
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry->dev = dev;
|
|
+ dev_entry->devfn = cntrl_entry->next_devfn;
|
|
+
|
|
+ list_add_tail(&dev_entry->list, &cntrl_entry->dev_list);
|
|
+
|
|
+ cntrl_entry->next_devfn += PCI_DEVFN(1, 0);
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ /* TODO: Publish virtual domain:bus:slot.func here. */
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ struct controller_dev_entry *dev_entry = NULL;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ if (cntrl_entry->controller != PCI_CONTROLLER(dev))
|
|
+ continue;
|
|
+
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if (dev_entry->dev == dev) {
|
|
+ found_dev = dev_entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!found_dev) {
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ list_del(&dev_entry->list);
|
|
+ kfree(dev_entry);
|
|
+
|
|
+ if (list_empty(&cntrl_entry->dev_list)) {
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data;
|
|
+
|
|
+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
|
|
+ if (!dev_data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&dev_data->lock);
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->list);
|
|
+
|
|
+ /* Starting domain:bus numbers */
|
|
+ dev_data->next_domain = 0;
|
|
+ dev_data->next_bus = 0;
|
|
+
|
|
+ pdev->pci_dev_data = dev_data;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static acpi_status write_xenbus_resource(struct acpi_resource *res, void *data)
|
|
+{
|
|
+ struct walk_info *info = data;
|
|
+ struct acpi_resource_address64 addr;
|
|
+ acpi_status status;
|
|
+ int i, len, err;
|
|
+ char str[32], tmp[3];
|
|
+ unsigned char *ptr, *buf;
|
|
+
|
|
+ status = acpi_resource_to_address64(res, &addr);
|
|
+
|
|
+ /* Do we care about this range? Let's check. */
|
|
+ if (!ACPI_SUCCESS(status) ||
|
|
+ !(addr.resource_type == ACPI_MEMORY_RANGE ||
|
|
+ addr.resource_type == ACPI_IO_RANGE) ||
|
|
+ !addr.address_length || addr.producer_consumer != ACPI_PRODUCER)
|
|
+ return AE_OK;
|
|
+
|
|
+ /*
|
|
+ * Furthermore, we really only care to tell the guest about
|
|
+ * address ranges that require address translation of some sort.
|
|
+ */
|
|
+ if (!(addr.resource_type == ACPI_MEMORY_RANGE &&
|
|
+ addr.info.mem.translation) &&
|
|
+ !(addr.resource_type == ACPI_IO_RANGE &&
|
|
+ addr.info.io.translation))
|
|
+ return AE_OK;
|
|
+
|
|
+ /* Store the resource in xenbus for the guest */
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resource-%d",
|
|
+ info->root_num, info->resource_count);
|
|
+ if (unlikely(len >= (sizeof(str) - 1)))
|
|
+ return AE_OK;
|
|
+
|
|
+ buf = kzalloc((sizeof(*res) * 2) + 1, GFP_KERNEL);
|
|
+ if (!buf)
|
|
+ return AE_OK;
|
|
+
|
|
+ /* Clean out resource_source */
|
|
+ res->data.address64.resource_source.index = 0xFF;
|
|
+ res->data.address64.resource_source.string_length = 0;
|
|
+ res->data.address64.resource_source.string_ptr = NULL;
|
|
+
|
|
+ ptr = (unsigned char *)res;
|
|
+
|
|
+ /* Turn the acpi_resource into an ASCII byte stream */
|
|
+ for (i = 0; i < sizeof(*res); i++) {
|
|
+ snprintf(tmp, sizeof(tmp), "%02x", ptr[i]);
|
|
+ strncat(buf, tmp, 2);
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, info->pdev->xdev->nodename,
|
|
+ str, "%s", buf);
|
|
+
|
|
+ if (!err)
|
|
+ info->resource_count++;
|
|
+
|
|
+ kfree(buf);
|
|
+
|
|
+ return AE_OK;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_root_cb)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ int i, root_num, len, err = 0;
|
|
+ unsigned int domain, bus;
|
|
+ char str[64];
|
|
+ struct walk_info info;
|
|
+
|
|
+ spin_lock(&dev_data->lock);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ /* First publish all the domain:bus info */
|
|
+ err = publish_root_cb(pdev, cntrl_entry->domain,
|
|
+ cntrl_entry->bus);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /*
|
|
+ * Now figure out which root-%d this belongs to
|
|
+ * so we can associate resources with it.
|
|
+ */
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", &root_num);
|
|
+
|
|
+ if (err != 1)
|
|
+ goto out;
|
|
+
|
|
+ for (i = 0; i < root_num; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ str, "%x:%x", &domain, &bus);
|
|
+ if (err != 2)
|
|
+ goto out;
|
|
+
|
|
+ /* Is this the one we just published? */
|
|
+ if (domain == cntrl_entry->domain &&
|
|
+ bus == cntrl_entry->bus)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (i == root_num)
|
|
+ goto out;
|
|
+
|
|
+ info.pdev = pdev;
|
|
+ info.resource_count = 0;
|
|
+ info.root_num = i;
|
|
+
|
|
+ /* Let ACPI do the heavy lifting on decoding resources */
|
|
+ acpi_walk_resources(cntrl_entry->controller->acpi_handle,
|
|
+ METHOD_NAME__CRS, write_xenbus_resource,
|
|
+ &info);
|
|
+
|
|
+ /* No resouces. OK. On to the next one */
|
|
+ if (!info.resource_count)
|
|
+ continue;
|
|
+
|
|
+ /* Store the number of resources we wrote for this root-%d */
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%d", info.resource_count);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Finally, write some magic to synchronize with the guest. */
|
|
+ len = snprintf(str, sizeof(str), "root-resource-magic");
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%lx", (sizeof(struct acpi_resource) * 2) + 1);
|
|
+
|
|
+out:
|
|
+ spin_unlock(&dev_data->lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_list_entry *cntrl_entry, *c;
|
|
+ struct controller_dev_entry *dev_entry, *d;
|
|
+
|
|
+ list_for_each_entry_safe(cntrl_entry, c, &dev_data->list, list) {
|
|
+ list_for_each_entry_safe(dev_entry, d,
|
|
+ &cntrl_entry->dev_list, list) {
|
|
+ list_del(&dev_entry->list);
|
|
+ pcistub_put_pci_dev(dev_entry->dev);
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+ list_del(&cntrl_entry->list);
|
|
+ kfree(cntrl_entry);
|
|
+ }
|
|
+
|
|
+ kfree(dev_data);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev,
|
|
+ struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
|
|
+{
|
|
+ struct controller_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct controller_dev_entry *dev_entry;
|
|
+ struct controller_list_entry *cntrl_entry;
|
|
+ unsigned long flags;
|
|
+ int found = 0;
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(cntrl_entry, &dev_data->list, list) {
|
|
+ list_for_each_entry(dev_entry, &cntrl_entry->dev_list, list) {
|
|
+ if ( (dev_entry->dev->bus->number ==
|
|
+ pcidev->bus->number) &&
|
|
+ (dev_entry->dev->devfn ==
|
|
+ pcidev->devfn) &&
|
|
+ (pci_domain_nr(dev_entry->dev->bus) ==
|
|
+ pci_domain_nr(pcidev->bus)))
|
|
+ {
|
|
+ found = 1;
|
|
+ *domain = cntrl_entry->domain;
|
|
+ *bus = cntrl_entry->bus;
|
|
+ *devfn = dev_entry->devfn;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+ return found;
|
|
+
|
|
+}
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/passthrough.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,176 @@
|
|
+/*
|
|
+ * PCI Backend - Provides restricted access to the real PCI bus topology
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+struct passthrough_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct list_head dev_list;
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
|
|
+ if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
|
|
+ && bus == (unsigned int)dev_entry->dev->bus->number
|
|
+ && devfn == dev_entry->dev->devfn) {
|
|
+ dev = dev_entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry;
|
|
+ unsigned long flags;
|
|
+ unsigned int domain, bus, devfn;
|
|
+ int err;
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
|
+ if (!dev_entry)
|
|
+ return -ENOMEM;
|
|
+ dev_entry->dev = dev;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+ list_add_tail(&dev_entry->list, &dev_data->dev_list);
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ domain = (unsigned int)pci_domain_nr(dev->bus);
|
|
+ bus = (unsigned int)dev->bus->number;
|
|
+ devfn = dev->devfn;
|
|
+ err = publish_cb(pdev, domain, bus, devfn, devid);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *t;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&dev_data->lock, flags);
|
|
+
|
|
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
|
+ if (dev_entry->dev == dev) {
|
|
+ list_del(&dev_entry->list);
|
|
+ found_dev = dev_entry->dev;
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&dev_data->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data;
|
|
+
|
|
+ dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
|
|
+ if (!dev_data)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&dev_data->lock);
|
|
+
|
|
+ INIT_LIST_HEAD(&dev_data->dev_list);
|
|
+
|
|
+ pdev->pci_dev_data = dev_data;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_root_cb)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *e;
|
|
+ struct pci_dev *dev;
|
|
+ int found;
|
|
+ unsigned int domain, bus;
|
|
+
|
|
+ spin_lock(&dev_data->lock);
|
|
+
|
|
+ list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
|
|
+ /* Only publish this device as a root if none of its
|
|
+ * parent bridges are exported
|
|
+ */
|
|
+ found = 0;
|
|
+ dev = dev_entry->dev->bus->self;
|
|
+ for (; !found && dev != NULL; dev = dev->bus->self) {
|
|
+ list_for_each_entry(e, &dev_data->dev_list, list) {
|
|
+ if (dev == e->dev) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
|
|
+ bus = (unsigned int)dev_entry->dev->bus->number;
|
|
+
|
|
+ if (!found) {
|
|
+ err = publish_root_cb(pdev, domain, bus);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock(&dev_data->lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
|
+ struct pci_dev_entry *dev_entry, *t;
|
|
+
|
|
+ list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
|
+ list_del(&dev_entry->list);
|
|
+ pcistub_put_pci_dev(dev_entry->dev);
|
|
+ kfree(dev_entry);
|
|
+ }
|
|
+
|
|
+ kfree(dev_data);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
|
|
+
|
|
+{
|
|
+ *domain = pci_domain_nr(pcidev->bus);
|
|
+ *bus = pcidev->bus->number;
|
|
+ *devfn = pcidev->devfn;
|
|
+ return 1;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/pci_stub.c 2011-03-02 12:00:16.000000000 +0100
|
|
@@ -0,0 +1,1311 @@
|
|
+/*
|
|
+ * PCI Stub Driver - Grabs devices in backend to be exported later
|
|
+ *
|
|
+ * Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ * Chris Bookholt <hap10@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/rwsem.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kref.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/wait.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include "pciback.h"
|
|
+#include "conf_space.h"
|
|
+#include "conf_space_quirks.h"
|
|
+
|
|
+static char *pci_devs_to_hide = NULL;
|
|
+wait_queue_head_t aer_wait_queue;
|
|
+/*Add sem for sync AER handling and pciback remove/reconfigue ops,
|
|
+* We want to avoid in middle of AER ops, pciback devices is being removed
|
|
+*/
|
|
+static DECLARE_RWSEM(pcistub_sem);
|
|
+module_param_named(hide, pci_devs_to_hide, charp, 0444);
|
|
+
|
|
+struct pcistub_device_id {
|
|
+ struct list_head slot_list;
|
|
+ int domain;
|
|
+ unsigned char bus;
|
|
+ unsigned int devfn;
|
|
+};
|
|
+static LIST_HEAD(pcistub_device_ids);
|
|
+static DEFINE_SPINLOCK(device_ids_lock);
|
|
+
|
|
+struct pcistub_device {
|
|
+ struct kref kref;
|
|
+ struct list_head dev_list;
|
|
+ spinlock_t lock;
|
|
+
|
|
+ struct pci_dev *dev;
|
|
+ struct pciback_device *pdev; /* non-NULL if struct pci_dev is in use */
|
|
+};
|
|
+
|
|
+/* Access to pcistub_devices & seized_devices lists and the initialize_devices
|
|
+ * flag must be locked with pcistub_devices_lock
|
|
+ */
|
|
+static DEFINE_SPINLOCK(pcistub_devices_lock);
|
|
+static LIST_HEAD(pcistub_devices);
|
|
+
|
|
+/* wait for device_initcall before initializing our devices
|
|
+ * (see pcistub_init_devices_late)
|
|
+ */
|
|
+static int initialize_devices = 0;
|
|
+static LIST_HEAD(seized_devices);
|
|
+
|
|
+static struct pcistub_device *pcistub_device_alloc(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ dev_dbg(&dev->dev, "pcistub_device_alloc\n");
|
|
+
|
|
+ psdev = kzalloc(sizeof(*psdev), GFP_ATOMIC);
|
|
+ if (!psdev)
|
|
+ return NULL;
|
|
+
|
|
+ psdev->dev = pci_dev_get(dev);
|
|
+ if (!psdev->dev) {
|
|
+ kfree(psdev);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ kref_init(&psdev->kref);
|
|
+ spin_lock_init(&psdev->lock);
|
|
+
|
|
+ return psdev;
|
|
+}
|
|
+
|
|
+/* Don't call this directly as it's called by pcistub_device_put */
|
|
+static void pcistub_device_release(struct kref *kref)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ psdev = container_of(kref, struct pcistub_device, kref);
|
|
+
|
|
+ dev_dbg(&psdev->dev->dev, "pcistub_device_release\n");
|
|
+
|
|
+ /* Clean-up the device */
|
|
+ pciback_reset_device(psdev->dev);
|
|
+ pciback_config_free_dyn_fields(psdev->dev);
|
|
+ pciback_config_free_dev(psdev->dev);
|
|
+ kfree(pci_get_drvdata(psdev->dev));
|
|
+ pci_set_drvdata(psdev->dev, NULL);
|
|
+
|
|
+ pci_dev_put(psdev->dev);
|
|
+
|
|
+ kfree(psdev);
|
|
+}
|
|
+
|
|
+static inline void pcistub_device_get(struct pcistub_device *psdev)
|
|
+{
|
|
+ kref_get(&psdev->kref);
|
|
+}
|
|
+
|
|
+static inline void pcistub_device_put(struct pcistub_device *psdev)
|
|
+{
|
|
+ kref_put(&psdev->kref, pcistub_device_release);
|
|
+}
|
|
+
|
|
+static struct pcistub_device *pcistub_device_find(int domain, int bus,
|
|
+ int slot, int func)
|
|
+{
|
|
+ struct pcistub_device *psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev != NULL
|
|
+ && domain == pci_domain_nr(psdev->dev->bus)
|
|
+ && bus == psdev->dev->bus->number
|
|
+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
|
|
+ pcistub_device_get(psdev);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* didn't find it */
|
|
+ psdev = NULL;
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return psdev;
|
|
+}
|
|
+
|
|
+static struct pci_dev *pcistub_device_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pcistub_device *psdev)
|
|
+{
|
|
+ struct pci_dev *pci_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ pcistub_device_get(psdev);
|
|
+
|
|
+ spin_lock_irqsave(&psdev->lock, flags);
|
|
+ if (!psdev->pdev) {
|
|
+ psdev->pdev = pdev;
|
|
+ pci_dev = psdev->dev;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&psdev->lock, flags);
|
|
+
|
|
+ if (!pci_dev)
|
|
+ pcistub_device_put(psdev);
|
|
+
|
|
+ return pci_dev;
|
|
+}
|
|
+
|
|
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
|
|
+ int domain, int bus,
|
|
+ int slot, int func)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev != NULL
|
|
+ && domain == pci_domain_nr(psdev->dev->bus)
|
|
+ && bus == psdev->dev->bus->number
|
|
+ && PCI_DEVFN(slot, func) == psdev->dev->devfn) {
|
|
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return found_dev;
|
|
+}
|
|
+
|
|
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_dev = pcistub_device_get_pci_dev(pdev, psdev);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return found_dev;
|
|
+}
|
|
+
|
|
+void pcistub_put_pci_dev(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev, *found_psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_psdev = psdev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /*hold this lock for avoiding breaking link between
|
|
+ * pcistub and pciback when AER is in processing
|
|
+ */
|
|
+ down_write(&pcistub_sem);
|
|
+ /* Cleanup our device
|
|
+ * (so it's ready for the next domain)
|
|
+ */
|
|
+ pciback_reset_device(found_psdev->dev);
|
|
+ pciback_config_free_dyn_fields(found_psdev->dev);
|
|
+ pciback_config_reset_dev(found_psdev->dev);
|
|
+
|
|
+ spin_lock_irqsave(&found_psdev->lock, flags);
|
|
+ found_psdev->pdev = NULL;
|
|
+ spin_unlock_irqrestore(&found_psdev->lock, flags);
|
|
+
|
|
+ pcistub_device_put(found_psdev);
|
|
+ up_write(&pcistub_sem);
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_match_one(struct pci_dev *dev,
|
|
+ struct pcistub_device_id *pdev_id)
|
|
+{
|
|
+ /* Match the specified device by domain, bus, slot, func and also if
|
|
+ * any of the device's parent bridges match.
|
|
+ */
|
|
+ for (; dev != NULL; dev = dev->bus->self) {
|
|
+ if (pci_domain_nr(dev->bus) == pdev_id->domain
|
|
+ && dev->bus->number == pdev_id->bus
|
|
+ && dev->devfn == pdev_id->devfn)
|
|
+ return 1;
|
|
+
|
|
+ /* Sometimes topmost bridge links to itself. */
|
|
+ if (dev == dev->bus->self)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_match(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device_id *pdev_id;
|
|
+ unsigned long flags;
|
|
+ int found = 0;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(pdev_id, &pcistub_device_ids, slot_list) {
|
|
+ if (pcistub_match_one(dev, pdev_id)) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return found;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_init_device(struct pci_dev *dev)
|
|
+{
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing...\n");
|
|
+
|
|
+ /* The PCI backend is not intended to be a module (or to work with
|
|
+ * removable PCI devices (yet). If it were, pciback_config_free()
|
|
+ * would need to be called somewhere to free the memory allocated
|
|
+ * here and then to call kfree(pci_get_drvdata(psdev->dev)).
|
|
+ */
|
|
+ dev_data = kzalloc(sizeof(*dev_data), GFP_ATOMIC);
|
|
+ if (!dev_data) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ pci_set_drvdata(dev, dev_data);
|
|
+
|
|
+ dev_dbg(&dev->dev, "initializing config\n");
|
|
+
|
|
+ init_waitqueue_head(&aer_wait_queue);
|
|
+ err = pciback_config_init_dev(dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* HACK: Force device (& ACPI) to determine what IRQ it's on - we
|
|
+ * must do this here because pcibios_enable_device may specify
|
|
+ * the pci device's true irq (and possibly its other resources)
|
|
+ * if they differ from what's in the configuration space.
|
|
+ * This makes the assumption that the device's resources won't
|
|
+ * change after this point (otherwise this code may break!)
|
|
+ */
|
|
+ dev_dbg(&dev->dev, "enabling device\n");
|
|
+ err = pci_enable_device(dev);
|
|
+ if (err)
|
|
+ goto config_release;
|
|
+
|
|
+ /* Now disable the device (this also ensures some private device
|
|
+ * data is setup before we export)
|
|
+ */
|
|
+ dev_dbg(&dev->dev, "reset device\n");
|
|
+ pciback_reset_device(dev);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ config_release:
|
|
+ pciback_config_free_dev(dev);
|
|
+
|
|
+ out:
|
|
+ pci_set_drvdata(dev, NULL);
|
|
+ kfree(dev_data);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Because some initialization still happens on
|
|
+ * devices during fs_initcall, we need to defer
|
|
+ * full initialization of our devices until
|
|
+ * device_initcall.
|
|
+ */
|
|
+static int __init pcistub_init_devices_late(void)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ unsigned long flags;
|
|
+ int err = 0;
|
|
+
|
|
+ pr_debug("pciback: pcistub_init_devices_late\n");
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ while (!list_empty(&seized_devices)) {
|
|
+ psdev = container_of(seized_devices.next,
|
|
+ struct pcistub_device, dev_list);
|
|
+ list_del(&psdev->dev_list);
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ err = pcistub_init_device(psdev->dev);
|
|
+ if (err) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d initializing device\n", err);
|
|
+ kfree(psdev);
|
|
+ psdev = NULL;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (psdev)
|
|
+ list_add_tail(&psdev->dev_list, &pcistub_devices);
|
|
+ }
|
|
+
|
|
+ initialize_devices = 1;
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_seize(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ unsigned long flags;
|
|
+ int err = 0;
|
|
+
|
|
+ psdev = pcistub_device_alloc(dev);
|
|
+ if (!psdev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (initialize_devices) {
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /* don't want irqs disabled when calling pcistub_init_device */
|
|
+ err = pcistub_init_device(psdev->dev);
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (!err)
|
|
+ list_add(&psdev->dev_list, &pcistub_devices);
|
|
+ } else {
|
|
+ dev_dbg(&dev->dev, "deferring initialization\n");
|
|
+ list_add(&psdev->dev_list, &seized_devices);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (err)
|
|
+ pcistub_device_put(psdev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __devinit pcistub_probe(struct pci_dev *dev,
|
|
+ const struct pci_device_id *id)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&dev->dev, "probing...\n");
|
|
+
|
|
+ if (pcistub_match(dev)) {
|
|
+
|
|
+ if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL
|
|
+ && dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) {
|
|
+ dev_err(&dev->dev, "can't export pci devices that "
|
|
+ "don't have a normal (0) or bridge (1) "
|
|
+ "header type!\n");
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_info(&dev->dev, "seizing device\n");
|
|
+ err = pcistub_seize(dev);
|
|
+#ifdef CONFIG_PCI_GUESTDEV
|
|
+ } else if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
|
|
+ if (!pci_is_guestdev(dev)) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_info(&dev->dev, "seizing device\n");
|
|
+ err = pcistub_seize(dev);
|
|
+#endif /* CONFIG_PCI_GUESTDEV */
|
|
+ } else
|
|
+ /* Didn't find the device */
|
|
+ err = -ENODEV;
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void pcistub_remove(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev, *found_psdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ dev_dbg(&dev->dev, "removing\n");
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+
|
|
+ pciback_config_quirk_release(dev);
|
|
+
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (psdev->dev == dev) {
|
|
+ found_psdev = psdev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ if (found_psdev) {
|
|
+ dev_dbg(&dev->dev, "found device to remove - in use? %p\n",
|
|
+ found_psdev->pdev);
|
|
+
|
|
+ if (found_psdev->pdev) {
|
|
+ printk(KERN_WARNING "pciback: ****** removing device "
|
|
+ "%s while still in-use! ******\n",
|
|
+ pci_name(found_psdev->dev));
|
|
+ printk(KERN_WARNING "pciback: ****** driver domain may "
|
|
+ "still access this device's i/o resources!\n");
|
|
+ printk(KERN_WARNING "pciback: ****** shutdown driver "
|
|
+ "domain before binding device\n");
|
|
+ printk(KERN_WARNING "pciback: ****** to other drivers "
|
|
+ "or domains\n");
|
|
+
|
|
+ pciback_release_pci_dev(found_psdev->pdev,
|
|
+ found_psdev->dev);
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+ list_del(&found_psdev->dev_list);
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+
|
|
+ /* the final put for releasing from the list */
|
|
+ pcistub_device_put(found_psdev);
|
|
+ }
|
|
+}
|
|
+
|
|
+static const struct pci_device_id pcistub_ids[] = {
|
|
+ {
|
|
+ .vendor = PCI_ANY_ID,
|
|
+ .device = PCI_ANY_ID,
|
|
+ .subvendor = PCI_ANY_ID,
|
|
+ .subdevice = PCI_ANY_ID,
|
|
+ },
|
|
+ {0,},
|
|
+};
|
|
+
|
|
+static void kill_domain_by_device(struct pcistub_device *psdev)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ char nodename[1024];
|
|
+
|
|
+ if (!psdev)
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "device is NULL when do AER recovery/kill_domain\n");
|
|
+ sprintf(nodename, "/local/domain/0/backend/pci/%d/0",
|
|
+ psdev->pdev->xdev->otherend_id);
|
|
+ nodename[strlen(nodename)] = '\0';
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err)
|
|
+ {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d when start xenbus transaction\n", err);
|
|
+ return;
|
|
+ }
|
|
+ /*PV AER handlers will set this flag*/
|
|
+ xenbus_printf(xbt, nodename, "aerState" , "aerfail" );
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err)
|
|
+ {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "error %d when end xenbus transaction\n", err);
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* For each aer recovery step error_detected, mmio_enabled, etc, front_end and
|
|
+ * backend need to have cooperation. In pciback, those steps will do similar
|
|
+ * jobs: send service request and waiting for front_end response.
|
|
+*/
|
|
+static pci_ers_result_t common_process(struct pcistub_device *psdev,
|
|
+ pci_channel_state_t state, int aer_cmd, pci_ers_result_t result)
|
|
+{
|
|
+ pci_ers_result_t res = result;
|
|
+ struct xen_pcie_aer_op *aer_op;
|
|
+ int ret;
|
|
+
|
|
+ /*with PV AER drivers*/
|
|
+ aer_op = &(psdev->pdev->sh_info->aer_op);
|
|
+ aer_op->cmd = aer_cmd ;
|
|
+ /*useful for error_detected callback*/
|
|
+ aer_op->err = state;
|
|
+ /*pcifront_end BDF*/
|
|
+ ret = pciback_get_pcifront_dev(psdev->dev, psdev->pdev,
|
|
+ &aer_op->domain, &aer_op->bus, &aer_op->devfn);
|
|
+ if (!ret) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "pciback: failed to get pcifront device\n");
|
|
+ return PCI_ERS_RESULT_NONE;
|
|
+ }
|
|
+ wmb();
|
|
+
|
|
+ dev_dbg(&psdev->dev->dev,
|
|
+ "pciback: aer_op %x dom %x bus %x devfn %x\n",
|
|
+ aer_cmd, aer_op->domain, aer_op->bus, aer_op->devfn);
|
|
+ /*local flag to mark there's aer request, pciback callback will use this
|
|
+ * flag to judge whether we need to check pci-front give aer service
|
|
+ * ack signal
|
|
+ */
|
|
+ set_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
|
|
+
|
|
+ /*It is possible that a pcifront conf_read_write ops request invokes
|
|
+ * the callback which cause the spurious execution of wake_up.
|
|
+ * Yet it is harmless and better than a spinlock here
|
|
+ */
|
|
+ set_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags);
|
|
+ wmb();
|
|
+ notify_remote_via_irq(psdev->pdev->evtchn_irq);
|
|
+
|
|
+ ret = wait_event_timeout(aer_wait_queue, !(test_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)), 300*HZ);
|
|
+
|
|
+ if (!ret) {
|
|
+ if (test_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_err(&psdev->dev->dev,
|
|
+ "pcifront aer process not responding!\n");
|
|
+ clear_bit(_XEN_PCIB_active,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags);
|
|
+ aer_op->err = PCI_ERS_RESULT_NONE;
|
|
+ return res;
|
|
+ }
|
|
+ }
|
|
+ clear_bit(_PCIB_op_pending, (unsigned long *)&psdev->pdev->flags);
|
|
+
|
|
+ if ( test_bit( _XEN_PCIF_active,
|
|
+ (unsigned long*)&psdev->pdev->sh_info->flags)) {
|
|
+ dev_dbg(&psdev->dev->dev,
|
|
+ "schedule pci_conf service in pciback \n");
|
|
+ test_and_schedule_op(psdev->pdev);
|
|
+ }
|
|
+
|
|
+ res = (pci_ers_result_t)aer_op->err;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+/*
|
|
+* pciback_slot_reset: it will send the slot_reset request to pcifront in case
|
|
+* of the device driver could provide this service, and then wait for pcifront
|
|
+* ack.
|
|
+* @dev: pointer to PCI devices
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+static pci_ers_result_t pciback_slot_reset(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_RECOVERED;
|
|
+ dev_dbg(&dev->dev, "pciback_slot_reset(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if ( !psdev || !psdev->pdev )
|
|
+ {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if ( !psdev->pdev->sh_info )
|
|
+ {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if ( !test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_slotreset, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER slot_reset service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+
|
|
+}
|
|
+
|
|
+
|
|
+/*pciback_mmio_enabled: it will send the mmio_enabled request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack.
|
|
+* @dev: pointer to PCI devices
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+
|
|
+static pci_ers_result_t pciback_mmio_enabled(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_RECOVERED;
|
|
+ dev_dbg(&dev->dev, "pciback_mmio_enabled(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if ( !psdev || !psdev->pdev )
|
|
+ {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if ( !psdev->pdev->sh_info )
|
|
+ {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if ( !test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, 1, XEN_PCI_OP_aer_mmio, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER mmio_enabled service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+/*pciback_error_detected: it will send the error_detected request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack.
|
|
+* @dev: pointer to PCI devices
|
|
+* @error: the current PCI connection state
|
|
+* return value is used by aer_core do_recovery policy
|
|
+*/
|
|
+
|
|
+static pci_ers_result_t pciback_error_detected(struct pci_dev *dev,
|
|
+ pci_channel_state_t error)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ pci_ers_result_t result;
|
|
+
|
|
+ result = PCI_ERS_RESULT_CAN_RECOVER;
|
|
+ dev_dbg(&dev->dev, "pciback_error_detected(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if ( !psdev || !psdev->pdev )
|
|
+ {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if ( !psdev->pdev->sh_info )
|
|
+ {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ /*Guest owns the device yet no aer handler regiested, kill guest*/
|
|
+ if ( !test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
|
|
+ dev_dbg(&dev->dev, "guest may have no aer driver, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+ result = common_process(psdev, error, XEN_PCI_OP_aer_detected, result);
|
|
+
|
|
+ if (result == PCI_ERS_RESULT_NONE ||
|
|
+ result == PCI_ERS_RESULT_DISCONNECT) {
|
|
+ dev_dbg(&dev->dev,
|
|
+ "No AER error_detected service or disconnected!\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ }
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return result;
|
|
+}
|
|
+
|
|
+/*pciback_error_resume: it will send the error_resume request to pcifront
|
|
+* in case of the device driver could provide this service, and then wait
|
|
+* for pcifront ack.
|
|
+* @dev: pointer to PCI devices
|
|
+*/
|
|
+
|
|
+static void pciback_error_resume(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ dev_dbg(&dev->dev, "pciback_error_resume(bus:%x,devfn:%x)\n",
|
|
+ dev->bus->number, dev->devfn);
|
|
+
|
|
+ down_write(&pcistub_sem);
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus),
|
|
+ dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn),
|
|
+ PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if ( !psdev || !psdev->pdev )
|
|
+ {
|
|
+ dev_err(&dev->dev,
|
|
+ "pciback device is not found/assigned\n");
|
|
+ goto end;
|
|
+ }
|
|
+
|
|
+ if ( !psdev->pdev->sh_info )
|
|
+ {
|
|
+ dev_err(&dev->dev, "pciback device is not connected or owned"
|
|
+ " by HVM, kill it\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+
|
|
+ if ( !test_bit(_XEN_PCIB_AERHANDLER,
|
|
+ (unsigned long *)&psdev->pdev->sh_info->flags) ) {
|
|
+ dev_err(&dev->dev,
|
|
+ "guest with no AER driver should have been killed\n");
|
|
+ kill_domain_by_device(psdev);
|
|
+ goto release;
|
|
+ }
|
|
+ common_process(psdev, 1, XEN_PCI_OP_aer_resume, PCI_ERS_RESULT_RECOVERED);
|
|
+release:
|
|
+ pcistub_device_put(psdev);
|
|
+end:
|
|
+ up_write(&pcistub_sem);
|
|
+ return;
|
|
+}
|
|
+
|
|
+/*add pciback AER handling*/
|
|
+static struct pci_error_handlers pciback_error_handler = {
|
|
+ .error_detected = pciback_error_detected,
|
|
+ .mmio_enabled = pciback_mmio_enabled,
|
|
+ .slot_reset = pciback_slot_reset,
|
|
+ .resume = pciback_error_resume,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Note: There is no MODULE_DEVICE_TABLE entry here because this isn't
|
|
+ * for a normal device. I don't want it to be loaded automatically.
|
|
+ */
|
|
+
|
|
+static struct pci_driver pciback_pci_driver = {
|
|
+ .name = "pciback",
|
|
+ .id_table = pcistub_ids,
|
|
+ .probe = pcistub_probe,
|
|
+ .remove = pcistub_remove,
|
|
+ .err_handler = &pciback_error_handler,
|
|
+};
|
|
+
|
|
+static inline int str_to_slot(const char *buf, int *domain, int *bus,
|
|
+ int *slot, int *func)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = sscanf(buf, " %x:%x:%x.%x", domain, bus, slot, func);
|
|
+ if (err == 4)
|
|
+ return 0;
|
|
+ else if (err < 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* try again without domain */
|
|
+ *domain = 0;
|
|
+ err = sscanf(buf, " %x:%x.%x", bus, slot, func);
|
|
+ if (err == 3)
|
|
+ return 0;
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static inline int str_to_quirk(const char *buf, int *domain, int *bus, int
|
|
+ *slot, int *func, int *reg, int *size, int *mask)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err =
|
|
+ sscanf(buf, " %04x:%02x:%02x.%1x-%08x:%1x:%08x", domain, bus, slot,
|
|
+ func, reg, size, mask);
|
|
+ if (err == 7)
|
|
+ return 0;
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static int pcistub_device_id_add(int domain, int bus, int slot, int func)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id;
|
|
+ unsigned long flags;
|
|
+
|
|
+ pci_dev_id = kmalloc(sizeof(*pci_dev_id), GFP_KERNEL);
|
|
+ if (!pci_dev_id)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ pci_dev_id->domain = domain;
|
|
+ pci_dev_id->bus = bus;
|
|
+ pci_dev_id->devfn = PCI_DEVFN(slot, func);
|
|
+
|
|
+ pr_debug("pciback: wants to seize %04x:%02x:%02x.%01x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_add_tail(&pci_dev_id->slot_list, &pcistub_device_ids);
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int pcistub_device_id_remove(int domain, int bus, int slot, int func)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id, *t;
|
|
+ int devfn = PCI_DEVFN(slot, func);
|
|
+ int err = -ENOENT;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry_safe(pci_dev_id, t, &pcistub_device_ids, slot_list) {
|
|
+
|
|
+ if (pci_dev_id->domain == domain
|
|
+ && pci_dev_id->bus == bus && pci_dev_id->devfn == devfn) {
|
|
+ /* Don't break; here because it's possible the same
|
|
+ * slot could be in the list more than once
|
|
+ */
|
|
+ list_del(&pci_dev_id->slot_list);
|
|
+ kfree(pci_dev_id);
|
|
+
|
|
+ err = 0;
|
|
+
|
|
+ pr_debug("pciback: removed %04x:%02x:%02x.%01x from "
|
|
+ "seize list\n", domain, bus, slot, func);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pcistub_reg_add(int domain, int bus, int slot, int func, int reg,
|
|
+ int size, int mask)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pci_dev *dev;
|
|
+ struct config_field *field;
|
|
+
|
|
+ psdev = pcistub_device_find(domain, bus, slot, func);
|
|
+ if (!psdev || !psdev->dev) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+ dev = psdev->dev;
|
|
+
|
|
+ field = kzalloc(sizeof(*field), GFP_ATOMIC);
|
|
+ if (!field) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ field->offset = reg;
|
|
+ field->size = size;
|
|
+ field->mask = mask;
|
|
+ field->init = NULL;
|
|
+ field->reset = NULL;
|
|
+ field->release = NULL;
|
|
+ field->clean = pciback_config_field_free;
|
|
+
|
|
+ err = pciback_config_quirks_add_field(dev, field);
|
|
+ if (err)
|
|
+ kfree(field);
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_device_id_add(domain, bus, slot, func);
|
|
+
|
|
+ out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
|
|
+
|
|
+static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_device_id_remove(domain, bus, slot, func);
|
|
+
|
|
+ out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
|
|
+
|
|
+static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ struct pcistub_device_id *pci_dev_id;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(pci_dev_id, &pcistub_device_ids, slot_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "%04x:%02x:%02x.%01x\n",
|
|
+ pci_dev_id->domain, pci_dev_id->bus,
|
|
+ PCI_SLOT(pci_dev_id->devfn),
|
|
+ PCI_FUNC(pci_dev_id->devfn));
|
|
+ }
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
|
|
+
|
|
+static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func, reg, size, mask;
|
|
+ int err;
|
|
+
|
|
+ err = str_to_quirk(buf, &domain, &bus, &slot, &func, ®, &size,
|
|
+ &mask);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = pcistub_reg_add(domain, bus, slot, func, reg, size, mask);
|
|
+
|
|
+ out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ int count = 0;
|
|
+ unsigned long flags;
|
|
+ extern struct list_head pciback_quirks;
|
|
+ struct pciback_config_quirk *quirk;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ const struct config_field *field;
|
|
+ const struct config_field_entry *cfg_entry;
|
|
+
|
|
+ spin_lock_irqsave(&device_ids_lock, flags);
|
|
+ list_for_each_entry(quirk, &pciback_quirks, quirks_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ goto out;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "%02x:%02x.%01x\n\t%04x:%04x:%04x:%04x\n",
|
|
+ quirk->pdev->bus->number,
|
|
+ PCI_SLOT(quirk->pdev->devfn),
|
|
+ PCI_FUNC(quirk->pdev->devfn),
|
|
+ quirk->devid.vendor, quirk->devid.device,
|
|
+ quirk->devid.subvendor,
|
|
+ quirk->devid.subdevice);
|
|
+
|
|
+ dev_data = pci_get_drvdata(quirk->pdev);
|
|
+
|
|
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
|
+ field = cfg_entry->field;
|
|
+ if (count >= PAGE_SIZE)
|
|
+ goto out;
|
|
+
|
|
+ count += scnprintf(buf + count, PAGE_SIZE - count,
|
|
+ "\t\t%08x:%01x:%08x\n",
|
|
+ cfg_entry->base_offset + field->offset,
|
|
+ field->size, field->mask);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&device_ids_lock, flags);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
|
|
+
|
|
+static ssize_t permissive_add(struct device_driver *drv, const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ int domain, bus, slot, func;
|
|
+ int err;
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ err = str_to_slot(buf, &domain, &bus, &slot, &func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ psdev = pcistub_device_find(domain, bus, slot, func);
|
|
+ if (!psdev) {
|
|
+ err = -ENODEV;
|
|
+ goto out;
|
|
+ }
|
|
+ if (!psdev->dev) {
|
|
+ err = -ENODEV;
|
|
+ goto release;
|
|
+ }
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ /* the driver data for a device should never be null at this point */
|
|
+ if (!dev_data) {
|
|
+ err = -ENXIO;
|
|
+ goto release;
|
|
+ }
|
|
+ if (!dev_data->permissive) {
|
|
+ dev_data->permissive = 1;
|
|
+ /* Let user know that what they're doing could be unsafe */
|
|
+ dev_warn(&psdev->dev->dev,
|
|
+ "enabling permissive mode configuration space accesses!\n");
|
|
+ dev_warn(&psdev->dev->dev,
|
|
+ "permissive mode is potentially unsafe!\n");
|
|
+ }
|
|
+ release:
|
|
+ pcistub_device_put(psdev);
|
|
+ out:
|
|
+ if (!err)
|
|
+ err = count;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static ssize_t permissive_show(struct device_driver *drv, char *buf)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+ struct pciback_dev_data *dev_data;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&pcistub_devices_lock, flags);
|
|
+ list_for_each_entry(psdev, &pcistub_devices, dev_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+ if (!psdev->dev)
|
|
+ continue;
|
|
+ dev_data = pci_get_drvdata(psdev->dev);
|
|
+ if (!dev_data || !dev_data->permissive)
|
|
+ continue;
|
|
+ count +=
|
|
+ scnprintf(buf + count, PAGE_SIZE - count, "%s\n",
|
|
+ pci_name(psdev->dev));
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pcistub_devices_lock, flags);
|
|
+ return count;
|
|
+}
|
|
+static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
|
|
+
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+
|
|
+int pciback_get_owner(struct pci_dev *dev)
|
|
+{
|
|
+ struct pcistub_device *psdev;
|
|
+
|
|
+ psdev = pcistub_device_find(pci_domain_nr(dev->bus), dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
|
|
+
|
|
+ if (!psdev || !psdev->pdev)
|
|
+ return -1;
|
|
+
|
|
+ return psdev->pdev->xdev->otherend_id;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void pcistub_exit(void)
|
|
+{
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_new_slot);
|
|
+ driver_remove_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_remove_slot);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_slots);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_quirks);
|
|
+ driver_remove_file(&pciback_pci_driver.driver, &driver_attr_permissive);
|
|
+
|
|
+ pci_unregister_driver(&pciback_pci_driver);
|
|
+ WARN_ON(unregister_msi_get_owner(pciback_get_owner));
|
|
+}
|
|
+
|
|
+static int __init pcistub_init(void)
|
|
+{
|
|
+ int pos = 0;
|
|
+ int err = 0;
|
|
+ int domain, bus, slot, func;
|
|
+ int parsed;
|
|
+
|
|
+ if (pci_devs_to_hide && *pci_devs_to_hide) {
|
|
+ do {
|
|
+ parsed = 0;
|
|
+
|
|
+ err = sscanf(pci_devs_to_hide + pos,
|
|
+ " (%x:%x:%x.%x) %n",
|
|
+ &domain, &bus, &slot, &func, &parsed);
|
|
+ if (err != 4) {
|
|
+ domain = 0;
|
|
+ err = sscanf(pci_devs_to_hide + pos,
|
|
+ " (%x:%x.%x) %n",
|
|
+ &bus, &slot, &func, &parsed);
|
|
+ if (err != 3)
|
|
+ goto parse_error;
|
|
+ }
|
|
+
|
|
+ err = pcistub_device_id_add(domain, bus, slot, func);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* if parsed<=0, we've reached the end of the string */
|
|
+ pos += parsed;
|
|
+ } while (parsed > 0 && pci_devs_to_hide[pos]);
|
|
+ }
|
|
+
|
|
+ /* If we're the first PCI Device Driver to register, we're the
|
|
+ * first one to get offered PCI devices as they become
|
|
+ * available (and thus we can be the first to grab them)
|
|
+ */
|
|
+ err = pci_register_driver(&pciback_pci_driver);
|
|
+ if (err < 0)
|
|
+ goto out;
|
|
+
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_new_slot);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_remove_slot);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_slots);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_quirks);
|
|
+ if (!err)
|
|
+ err = driver_create_file(&pciback_pci_driver.driver,
|
|
+ &driver_attr_permissive);
|
|
+
|
|
+ if (!err)
|
|
+ err = register_msi_get_owner(pciback_get_owner);
|
|
+ if (err)
|
|
+ pcistub_exit();
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+
|
|
+ parse_error:
|
|
+ printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n",
|
|
+ pci_devs_to_hide + pos);
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+#ifndef MODULE
|
|
+/*
|
|
+ * fs_initcall happens before device_initcall
|
|
+ * so pciback *should* get called first (b/c we
|
|
+ * want to suck up any device before other drivers
|
|
+ * get a chance by being the first pci device
|
|
+ * driver to register)
|
|
+ */
|
|
+fs_initcall(pcistub_init);
|
|
+#endif
|
|
+
|
|
+static int __init pciback_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = pciback_config_init();
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+#ifdef MODULE
|
|
+ err = pcistub_init();
|
|
+ if (err < 0)
|
|
+ return err;
|
|
+#endif
|
|
+
|
|
+ pcistub_init_devices_late();
|
|
+ err = pciback_xenbus_register();
|
|
+ if (err)
|
|
+ pcistub_exit();
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit pciback_cleanup(void)
|
|
+{
|
|
+ pciback_xenbus_unregister();
|
|
+ pcistub_exit();
|
|
+}
|
|
+
|
|
+module_init(pciback_init);
|
|
+module_exit(pciback_cleanup);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/pciback.h 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,126 @@
|
|
+/*
|
|
+ * PCI Backend Common Data Structures & Function Declarations
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#ifndef __XEN_PCIBACK_H__
|
|
+#define __XEN_PCIBACK_H__
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/workqueue.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <xen/interface/io/pciif.h>
|
|
+
|
|
+struct pci_dev_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_dev *dev;
|
|
+};
|
|
+
|
|
+#define _PDEVF_op_active (0)
|
|
+#define PDEVF_op_active (1<<(_PDEVF_op_active))
|
|
+#define _PCIB_op_pending (1)
|
|
+#define PCIB_op_pending (1<<(_PCIB_op_pending))
|
|
+
|
|
+struct pciback_device {
|
|
+ void *pci_dev_data;
|
|
+ spinlock_t dev_lock;
|
|
+
|
|
+ struct xenbus_device *xdev;
|
|
+
|
|
+ struct xenbus_watch be_watch;
|
|
+ u8 be_watching;
|
|
+
|
|
+ int evtchn_irq;
|
|
+
|
|
+ struct vm_struct *sh_area;
|
|
+ struct xen_pci_sharedinfo *sh_info;
|
|
+
|
|
+ unsigned long flags;
|
|
+
|
|
+ struct work_struct op_work;
|
|
+};
|
|
+
|
|
+struct pciback_dev_data {
|
|
+ struct list_head config_fields;
|
|
+ int permissive;
|
|
+ int warned_on_write;
|
|
+};
|
|
+
|
|
+/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
|
|
+struct pci_dev *pcistub_get_pci_dev_by_slot(struct pciback_device *pdev,
|
|
+ int domain, int bus,
|
|
+ int slot, int func);
|
|
+struct pci_dev *pcistub_get_pci_dev(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev);
|
|
+void pcistub_put_pci_dev(struct pci_dev *dev);
|
|
+
|
|
+/* Ensure a device is turned off or reset */
|
|
+void pciback_reset_device(struct pci_dev *pdev);
|
|
+
|
|
+/* Access a virtual configuration space for a PCI device */
|
|
+int pciback_config_init(void);
|
|
+int pciback_config_init_dev(struct pci_dev *dev);
|
|
+void pciback_config_free_dyn_fields(struct pci_dev *dev);
|
|
+void pciback_config_reset_dev(struct pci_dev *dev);
|
|
+void pciback_config_free_dev(struct pci_dev *dev);
|
|
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
|
|
+ u32 * ret_val);
|
|
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value);
|
|
+
|
|
+/* Handle requests for specific devices from the frontend */
|
|
+typedef int (*publish_pci_dev_cb) (struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn, unsigned int devid);
|
|
+typedef int (*publish_pci_root_cb) (struct pciback_device * pdev,
|
|
+ unsigned int domain, unsigned int bus);
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb);
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev);
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn);
|
|
+
|
|
+/**
|
|
+* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in pciback
|
|
+* before sending aer request to pcifront, so that guest could identify
|
|
+* device, coopearte with pciback to finish aer recovery job if device driver
|
|
+* has the capability
|
|
+*/
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn);
|
|
+int pciback_init_devices(struct pciback_device *pdev);
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb cb);
|
|
+void pciback_release_devices(struct pciback_device *pdev);
|
|
+
|
|
+/* Handles events from front-end */
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
|
|
+void pciback_do_op(void *data);
|
|
+
|
|
+int pciback_xenbus_register(void);
|
|
+void pciback_xenbus_unregister(void);
|
|
+
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+int pciback_enable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+int pciback_disable_msi(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+
|
|
+int pciback_enable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+
|
|
+int pciback_disable_msix(struct pciback_device *pdev,
|
|
+ struct pci_dev *dev, struct xen_pci_op *op);
|
|
+#endif
|
|
+extern int verbose_request;
|
|
+
|
|
+void test_and_schedule_op(struct pciback_device *pdev);
|
|
+#endif
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/pciback_ops.c 2011-02-17 09:58:10.000000000 +0100
|
|
@@ -0,0 +1,142 @@
|
|
+/*
|
|
+ * PCI Backend Operations - respond to PCI requests from Frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/wait.h>
|
|
+#include <asm/bitops.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+int verbose_request = 0;
|
|
+module_param(verbose_request, int, 0644);
|
|
+
|
|
+/* Ensure a device is "turned off" and ready to be exported.
|
|
+ * (Also see pciback_config_reset to ensure virtual configuration space is
|
|
+ * ready to be re-exported)
|
|
+ */
|
|
+void pciback_reset_device(struct pci_dev *dev)
|
|
+{
|
|
+ u16 cmd;
|
|
+
|
|
+ /* Disable devices (but not bridges) */
|
|
+ if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+ /* The guest could have been abruptly killed without
|
|
+ * disabling MSI/MSI-X interrupts.*/
|
|
+ if (dev->msix_enabled)
|
|
+ pci_disable_msix(dev);
|
|
+ if (dev->msi_enabled)
|
|
+ pci_disable_msi(dev);
|
|
+#endif
|
|
+ pci_disable_device(dev);
|
|
+
|
|
+ pci_write_config_word(dev, PCI_COMMAND, 0);
|
|
+
|
|
+ dev->is_enabled = 0;
|
|
+ dev->is_busmaster = 0;
|
|
+ } else {
|
|
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
|
+ if (cmd & (PCI_COMMAND_INVALIDATE)) {
|
|
+ cmd &= ~(PCI_COMMAND_INVALIDATE);
|
|
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
|
|
+
|
|
+ dev->is_busmaster = 0;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+extern wait_queue_head_t aer_wait_queue;
|
|
+extern struct workqueue_struct *pciback_wq;
|
|
+/*
|
|
+* Now the same evtchn is used for both pcifront conf_read_write request
|
|
+* as well as pcie aer front end ack. We use a new work_queue to schedule
|
|
+* pciback conf_read_write service for avoiding confict with aer_core
|
|
+* do_recovery job which also use the system default work_queue
|
|
+*/
|
|
+void test_and_schedule_op(struct pciback_device *pdev)
|
|
+{
|
|
+ /* Check that frontend is requesting an operation and that we are not
|
|
+ * already processing a request */
|
|
+ if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
|
|
+ && !test_and_set_bit(_PDEVF_op_active, &pdev->flags))
|
|
+ {
|
|
+ queue_work(pciback_wq, &pdev->op_work);
|
|
+ }
|
|
+ /*_XEN_PCIB_active should have been cleared by pcifront. And also make
|
|
+ sure pciback is waiting for ack by checking _PCIB_op_pending*/
|
|
+ if (!test_bit(_XEN_PCIB_active,(unsigned long *)&pdev->sh_info->flags)
|
|
+ &&test_bit(_PCIB_op_pending, &pdev->flags)) {
|
|
+ wake_up(&aer_wait_queue);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Performing the configuration space reads/writes must not be done in atomic
|
|
+ * context because some of the pci_* functions can sleep (mostly due to ACPI
|
|
+ * use of semaphores). This function is intended to be called from a work
|
|
+ * queue in process context taking a struct pciback_device as a parameter */
|
|
+void pciback_do_op(void *data)
|
|
+{
|
|
+ struct pciback_device *pdev = data;
|
|
+ struct pci_dev *dev;
|
|
+ struct xen_pci_op *op = &pdev->sh_info->op;
|
|
+
|
|
+ dev = pciback_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
|
|
+
|
|
+ if (dev == NULL)
|
|
+ op->err = XEN_PCI_ERR_dev_not_found;
|
|
+ else
|
|
+ {
|
|
+ switch (op->cmd)
|
|
+ {
|
|
+ case XEN_PCI_OP_conf_read:
|
|
+ op->err = pciback_config_read(dev,
|
|
+ op->offset, op->size, &op->value);
|
|
+ break;
|
|
+ case XEN_PCI_OP_conf_write:
|
|
+ op->err = pciback_config_write(dev,
|
|
+ op->offset, op->size, op->value);
|
|
+ break;
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+ case XEN_PCI_OP_enable_msi:
|
|
+ op->err = pciback_enable_msi(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_disable_msi:
|
|
+ op->err = pciback_disable_msi(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_enable_msix:
|
|
+ op->err = pciback_enable_msix(pdev, dev, op);
|
|
+ break;
|
|
+ case XEN_PCI_OP_disable_msix:
|
|
+ op->err = pciback_disable_msix(pdev, dev, op);
|
|
+ break;
|
|
+#endif
|
|
+ default:
|
|
+ op->err = XEN_PCI_ERR_not_implemented;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ /* Tell the driver domain that we're done. */
|
|
+ wmb();
|
|
+ clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
|
|
+ notify_remote_via_irq(pdev->evtchn_irq);
|
|
+
|
|
+ /* Mark that we're done. */
|
|
+ smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
|
|
+ clear_bit(_PDEVF_op_active, &pdev->flags);
|
|
+ smp_mb__after_clear_bit(); /* /before/ final check for work */
|
|
+
|
|
+ /* Check to see if the driver domain tried to start another request in
|
|
+ * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
|
|
+ */
|
|
+ test_and_schedule_op(pdev);
|
|
+}
|
|
+
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ struct pciback_device *pdev = dev_id;
|
|
+
|
|
+ test_and_schedule_op(pdev);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/slot.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,187 @@
|
|
+/*
|
|
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil> (vpci.c)
|
|
+ * Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+/* There are at most 32 slots in a pci bus. */
|
|
+#define PCI_SLOT_MAX 32
|
|
+
|
|
+#define PCI_BUS_NBR 2
|
|
+
|
|
+struct slot_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct pci_dev *slots[PCI_BUS_NBR][PCI_SLOT_MAX];
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (domain != 0 || PCI_FUNC(devfn) != 0)
|
|
+ return NULL;
|
|
+
|
|
+ if (PCI_SLOT(devfn) >= PCI_SLOT_MAX || bus >= PCI_BUS_NBR)
|
|
+ return NULL;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+ dev = slot_dev->slots[bus][PCI_SLOT(devfn)];
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ int err = 0, slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
|
|
+ err = -EFAULT;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Can't export bridges on the virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ /* Assign to a new slot on the virtual PCI bus */
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (slot_dev->slots[bus][slot] == NULL) {
|
|
+ printk(KERN_INFO
|
|
+ "pciback: slot: %s: assign to virtual slot %d, bus %d\n",
|
|
+ pci_name(dev), slot, bus);
|
|
+ slot_dev->slots[bus][slot] = dev;
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "No more space on root virtual PCI bus");
|
|
+
|
|
+ unlock:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ if(!err)
|
|
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, 0), devid);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (slot_dev->slots[bus][slot] == dev) {
|
|
+ slot_dev->slots[bus][slot] = NULL;
|
|
+ found_dev = dev;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev;
|
|
+
|
|
+ slot_dev = kmalloc(sizeof(*slot_dev), GFP_KERNEL);
|
|
+ if (!slot_dev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&slot_dev->lock);
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++)
|
|
+ slot_dev->slots[bus][slot] = NULL;
|
|
+
|
|
+ pdev->pci_dev_data = slot_dev;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_cb)
|
|
+{
|
|
+ /* The Virtual PCI bus has only one root */
|
|
+ return publish_cb(pdev, 0, 0);
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot, bus;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *dev;
|
|
+
|
|
+ for (bus = 0; bus < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ dev = slot_dev->slots[bus][slot];
|
|
+ if (dev != NULL)
|
|
+ pcistub_put_pci_dev(dev);
|
|
+ }
|
|
+
|
|
+ kfree(slot_dev);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
|
|
+{
|
|
+ int slot, busnr;
|
|
+ struct slot_dev_data *slot_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *dev;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&slot_dev->lock, flags);
|
|
+
|
|
+ for (busnr = 0; busnr < PCI_BUS_NBR; bus++)
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ dev = slot_dev->slots[busnr][slot];
|
|
+ if (dev && dev->bus->number == pcidev->bus->number
|
|
+ && dev->devfn == pcidev->devfn
|
|
+ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)) {
|
|
+ found = 1;
|
|
+ *domain = 0;
|
|
+ *bus = busnr;
|
|
+ *devfn = PCI_DEVFN(slot,0);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ spin_unlock_irqrestore(&slot_dev->lock, flags);
|
|
+ return found;
|
|
+
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/vpci.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,242 @@
|
|
+/*
|
|
+ * PCI Backend - Provides a Virtual PCI bus (with real devices)
|
|
+ * to the frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define PCI_SLOT_MAX 32
|
|
+
|
|
+struct vpci_dev_data {
|
|
+ /* Access to dev_list must be protected by lock */
|
|
+ struct list_head dev_list[PCI_SLOT_MAX];
|
|
+ spinlock_t lock;
|
|
+};
|
|
+
|
|
+static inline struct list_head *list_first(struct list_head *head)
|
|
+{
|
|
+ return head->next;
|
|
+}
|
|
+
|
|
+struct pci_dev *pciback_get_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn)
|
|
+{
|
|
+ struct pci_dev_entry *entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (domain != 0 || bus != 0)
|
|
+ return NULL;
|
|
+
|
|
+ if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ list_for_each_entry(entry,
|
|
+ &vpci_dev->dev_list[PCI_SLOT(devfn)],
|
|
+ list) {
|
|
+ if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
|
|
+ dev = entry->dev;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+ }
|
|
+ return dev;
|
|
+}
|
|
+
|
|
+static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
|
|
+{
|
|
+ if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
|
|
+ && l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
|
|
+ return 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_add_pci_dev(struct pciback_device *pdev, struct pci_dev *dev,
|
|
+ int devid, publish_pci_dev_cb publish_cb)
|
|
+{
|
|
+ int err = 0, slot, func;
|
|
+ struct pci_dev_entry *t, *dev_entry;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
|
|
+ err = -EFAULT;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Can't export bridges on the virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
|
+ if (!dev_entry) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error adding entry to virtual PCI bus");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_entry->dev = dev;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ /* Keep multi-function devices together on the virtual PCI bus */
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (!list_empty(&vpci_dev->dev_list[slot])) {
|
|
+ t = list_entry(list_first(&vpci_dev->dev_list[slot]),
|
|
+ struct pci_dev_entry, list);
|
|
+
|
|
+ if (match_slot(dev, t->dev)) {
|
|
+ pr_info("pciback: vpci: %s: "
|
|
+ "assign to virtual slot %d func %d\n",
|
|
+ pci_name(dev), slot,
|
|
+ PCI_FUNC(dev->devfn));
|
|
+ list_add_tail(&dev_entry->list,
|
|
+ &vpci_dev->dev_list[slot]);
|
|
+ func = PCI_FUNC(dev->devfn);
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Assign to a new slot on the virtual PCI bus */
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ if (list_empty(&vpci_dev->dev_list[slot])) {
|
|
+ printk(KERN_INFO
|
|
+ "pciback: vpci: %s: assign to virtual slot %d\n",
|
|
+ pci_name(dev), slot);
|
|
+ list_add_tail(&dev_entry->list,
|
|
+ &vpci_dev->dev_list[slot]);
|
|
+ func = PCI_FUNC(dev->devfn);
|
|
+ goto unlock;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "No more space on root virtual PCI bus");
|
|
+
|
|
+ unlock:
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+
|
|
+ /* Publish this device. */
|
|
+ if(!err)
|
|
+ err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pciback_release_pci_dev(struct pciback_device *pdev, struct pci_dev *dev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ struct pci_dev *found_dev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ struct pci_dev_entry *e, *tmp;
|
|
+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ if (e->dev == dev) {
|
|
+ list_del(&e->list);
|
|
+ found_dev = e->dev;
|
|
+ kfree(e);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+
|
|
+ if (found_dev)
|
|
+ pcistub_put_pci_dev(found_dev);
|
|
+}
|
|
+
|
|
+int pciback_init_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev;
|
|
+
|
|
+ vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
|
|
+ if (!vpci_dev)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ spin_lock_init(&vpci_dev->lock);
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
|
|
+ }
|
|
+
|
|
+ pdev->pci_dev_data = vpci_dev;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int pciback_publish_pci_roots(struct pciback_device *pdev,
|
|
+ publish_pci_root_cb publish_cb)
|
|
+{
|
|
+ /* The Virtual PCI bus has only one root */
|
|
+ return publish_cb(pdev, 0, 0);
|
|
+}
|
|
+
|
|
+void pciback_release_devices(struct pciback_device *pdev)
|
|
+{
|
|
+ int slot;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ struct pci_dev_entry *e, *tmp;
|
|
+ list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ list_del(&e->list);
|
|
+ pcistub_put_pci_dev(e->dev);
|
|
+ kfree(e);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ kfree(vpci_dev);
|
|
+ pdev->pci_dev_data = NULL;
|
|
+}
|
|
+
|
|
+int pciback_get_pcifront_dev(struct pci_dev *pcidev, struct pciback_device *pdev,
|
|
+ unsigned int *domain, unsigned int *bus, unsigned int *devfn)
|
|
+{
|
|
+ struct pci_dev_entry *entry;
|
|
+ struct pci_dev *dev = NULL;
|
|
+ struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
|
+ unsigned long flags;
|
|
+ int found = 0, slot;
|
|
+
|
|
+ spin_lock_irqsave(&vpci_dev->lock, flags);
|
|
+ for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
|
+ list_for_each_entry(entry,
|
|
+ &vpci_dev->dev_list[slot],
|
|
+ list) {
|
|
+ dev = entry->dev;
|
|
+ if (dev && dev->bus->number == pcidev->bus->number
|
|
+ && pci_domain_nr(dev->bus) == pci_domain_nr(pcidev->bus)
|
|
+ && dev->devfn == pcidev->devfn)
|
|
+ {
|
|
+ found = 1;
|
|
+ *domain = 0;
|
|
+ *bus = 0;
|
|
+ *devfn = PCI_DEVFN(slot, PCI_FUNC(pcidev->devfn));
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
|
+ return found;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pciback/xenbus.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,710 @@
|
|
+/*
|
|
+ * PCI Backend Xenbus Setup - handles setup with frontend and xend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include "pciback.h"
|
|
+
|
|
+#define INVALID_EVTCHN_IRQ (-1)
|
|
+struct workqueue_struct *pciback_wq;
|
|
+
|
|
+static struct pciback_device *alloc_pdev(struct xenbus_device *xdev)
|
|
+{
|
|
+ struct pciback_device *pdev;
|
|
+
|
|
+ pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL);
|
|
+ if (pdev == NULL)
|
|
+ goto out;
|
|
+ dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
|
|
+
|
|
+ pdev->xdev = xdev;
|
|
+ xdev->dev.driver_data = pdev;
|
|
+
|
|
+ spin_lock_init(&pdev->dev_lock);
|
|
+
|
|
+ pdev->sh_area = NULL;
|
|
+ pdev->sh_info = NULL;
|
|
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
|
+ pdev->be_watching = 0;
|
|
+
|
|
+ INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
|
|
+
|
|
+ if (pciback_init_devices(pdev)) {
|
|
+ kfree(pdev);
|
|
+ pdev = NULL;
|
|
+ }
|
|
+ out:
|
|
+ return pdev;
|
|
+}
|
|
+
|
|
+static void pciback_disconnect(struct pciback_device *pdev)
|
|
+{
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ /* Ensure the guest can't trigger our handler before removing devices */
|
|
+ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
|
|
+ unbind_from_irqhandler(pdev->evtchn_irq, pdev);
|
|
+ pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
|
+ }
|
|
+
|
|
+ /* If the driver domain started an op, make sure we complete it
|
|
+ * before releasing the shared memory */
|
|
+ flush_workqueue(pciback_wq);
|
|
+
|
|
+ if (pdev->sh_info != NULL) {
|
|
+ xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_area);
|
|
+ pdev->sh_info = NULL;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+}
|
|
+
|
|
+static void free_pdev(struct pciback_device *pdev)
|
|
+{
|
|
+ if (pdev->be_watching)
|
|
+ unregister_xenbus_watch(&pdev->be_watch);
|
|
+
|
|
+ pciback_disconnect(pdev);
|
|
+
|
|
+ pciback_release_devices(pdev);
|
|
+
|
|
+ pdev->xdev->dev.driver_data = NULL;
|
|
+ pdev->xdev = NULL;
|
|
+
|
|
+ kfree(pdev);
|
|
+}
|
|
+
|
|
+static int pciback_do_attach(struct pciback_device *pdev, int gnt_ref,
|
|
+ int remote_evtchn)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct vm_struct *area;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
|
|
+ gnt_ref, remote_evtchn);
|
|
+
|
|
+ area = xenbus_map_ring_valloc(pdev->xdev, gnt_ref);
|
|
+ if (IS_ERR(area)) {
|
|
+ err = PTR_ERR(area);
|
|
+ goto out;
|
|
+ }
|
|
+ pdev->sh_area = area;
|
|
+ pdev->sh_info = area->addr;
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
|
|
+ SA_SAMPLE_RANDOM, "pciback", pdev);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error binding event channel to IRQ");
|
|
+ goto out;
|
|
+ }
|
|
+ pdev->evtchn_irq = err;
|
|
+ err = 0;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Attached!\n");
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_attach(struct pciback_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ int gnt_ref, remote_evtchn;
|
|
+ char *magic = NULL;
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ /* Make sure we only do this setup once */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateInitialised)
|
|
+ goto out;
|
|
+
|
|
+ /* Wait for frontend to state that it has published the configuration */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->otherend) !=
|
|
+ XenbusStateInitialised)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
|
|
+ "pci-op-ref", "%u", &gnt_ref,
|
|
+ "event-channel", "%u", &remote_evtchn,
|
|
+ "magic", NULL, &magic, NULL);
|
|
+ if (err) {
|
|
+ /* If configuration didn't get read correctly, wait longer */
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading configuration from frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, -EFAULT,
|
|
+ "version mismatch (%s/%s) with pcifront - "
|
|
+ "halting pciback",
|
|
+ magic, XEN_PCI_MAGIC);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_do_attach(pdev, gnt_ref, remote_evtchn);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Connecting...\n");
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to connected state!");
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ if (magic)
|
|
+ kfree(magic);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_publish_pci_dev(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ unsigned int devfn, unsigned int devid)
|
|
+{
|
|
+ int err;
|
|
+ int len;
|
|
+ char str[64];
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "vdev-%d", devid);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%04x:%02x:%02x.%02x", domain, bus,
|
|
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_export_device(struct pciback_device *pdev,
|
|
+ int domain, int bus, int slot, int func,
|
|
+ int devid)
|
|
+{
|
|
+ struct pci_dev *dev;
|
|
+ int err = 0;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
|
|
+ if (!dev) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Couldn't locate PCI device "
|
|
+ "(%04x:%02x:%02x.%01x)! "
|
|
+ "perhaps already in-use?",
|
|
+ domain, bus, slot, func);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_add_pci_dev(pdev, dev, devid, pciback_publish_pci_dev);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* TODO: It'd be nice to export a bridge and have all of its children
|
|
+ * get exported with it. This may be best done in xend (which will
|
|
+ * have to calculate resource usage anyway) but we probably want to
|
|
+ * put something in here to ensure that if a bridge gets given to a
|
|
+ * driver domain, that all devices under that bridge are not given
|
|
+ * to other driver domains (as he who controls the bridge can disable
|
|
+ * it and stop the other devices from working).
|
|
+ */
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_remove_device(struct pciback_device *pdev,
|
|
+ int domain, int bus, int slot, int func)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pci_dev *dev;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
|
|
+ domain, bus, slot, func);
|
|
+
|
|
+ dev = pciback_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
|
|
+ if (!dev) {
|
|
+ err = -EINVAL;
|
|
+ dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
|
|
+ "(%04x:%02x:%02x.%01x)! not owned by this domain\n",
|
|
+ domain, bus, slot, func);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pciback_release_pci_dev(pdev, dev);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_publish_pci_root(struct pciback_device *pdev,
|
|
+ unsigned int domain, unsigned int bus)
|
|
+{
|
|
+ unsigned int d, b;
|
|
+ int i, root_num, len, err;
|
|
+ char str[64];
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", &root_num);
|
|
+ if (err == 0 || err == -ENOENT)
|
|
+ root_num = 0;
|
|
+ else if (err < 0)
|
|
+ goto out;
|
|
+
|
|
+ /* Verify that we haven't already published this pci root */
|
|
+ for (i = 0; i < root_num; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ str, "%x:%x", &d, &b);
|
|
+ if (err < 0)
|
|
+ goto out;
|
|
+ if (err != 2) {
|
|
+ err = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (d == domain && b == bus) {
|
|
+ err = 0;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "root-%d", root_num);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
|
|
+ root_num, domain, bus);
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
|
+ "%04x:%02x", domain, bus);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
|
+ "root_num", "%d", (root_num + 1));
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_reconfigure(struct pciback_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ int num_devs;
|
|
+ int domain, bus, slot, func;
|
|
+ int substate;
|
|
+ int i, len;
|
|
+ char state_str[64];
|
|
+ char dev_str[64];
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
|
|
+
|
|
+ /* Make sure we only reconfigure once */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateReconfiguring)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
|
+ &num_devs);
|
|
+ if (err != 1) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of devices");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_devs; i++) {
|
|
+ len = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
|
+ if (unlikely(len >= (sizeof(state_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
|
|
+ "%d", &substate);
|
|
+ if (err != 1)
|
|
+ substate = XenbusStateUnknown;
|
|
+
|
|
+ switch (substate) {
|
|
+ case XenbusStateInitialising:
|
|
+ dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
|
|
+
|
|
+ len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
|
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while "
|
|
+ "reading configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ dev_str, "%x:%x:%x.%x",
|
|
+ &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_export_device(pdev, domain, bus, slot,
|
|
+ func, i);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* Publish pci roots. */
|
|
+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error while publish PCI root"
|
|
+ "buses for frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
|
+ state_str, "%d",
|
|
+ XenbusStateInitialised);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching substate of "
|
|
+ "dev-%d\n", i);
|
|
+ goto out;
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
|
|
+
|
|
+ len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
|
|
+ if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while "
|
|
+ "reading configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
|
+ dev_str, "%x:%x:%x.%x",
|
|
+ &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_remove_device(pdev, domain, bus, slot,
|
|
+ func);
|
|
+ if(err)
|
|
+ goto out;
|
|
+
|
|
+ /* TODO: If at some point we implement support for pci
|
|
+ * root hot-remove on pcifront side, we'll need to
|
|
+ * remove unnecessary xenstore nodes of pci roots here.
|
|
+ */
|
|
+
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to reconfigured state!");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void pciback_frontend_changed(struct xenbus_device *xdev,
|
|
+ enum xenbus_state fe_state)
|
|
+{
|
|
+ struct pciback_device *pdev = xdev->dev.driver_data;
|
|
+
|
|
+ dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
|
|
+
|
|
+ switch (fe_state) {
|
|
+ case XenbusStateInitialised:
|
|
+ pciback_attach(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfiguring:
|
|
+ pciback_reconfigure(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ /* pcifront switched its state from reconfiguring to connected.
|
|
+ * Then switch to connected state.
|
|
+ */
|
|
+ xenbus_switch_state(xdev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ pciback_disconnect(pdev);
|
|
+ xenbus_switch_state(xdev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ pciback_disconnect(pdev);
|
|
+ xenbus_switch_state(xdev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(xdev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
|
|
+ device_unregister(&xdev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pciback_setup_backend(struct pciback_device *pdev)
|
|
+{
|
|
+ /* Get configuration from xend (if available now) */
|
|
+ int domain, bus, slot, func;
|
|
+ int err = 0;
|
|
+ int i, num_devs;
|
|
+ char dev_str[64];
|
|
+ char state_str[64];
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ /* It's possible we could get the call to setup twice, so make sure
|
|
+ * we're not already connected.
|
|
+ */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateInitWait)
|
|
+ goto out;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "getting be setup\n");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
|
+ &num_devs);
|
|
+ if (err != 1) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of devices");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_devs; i++) {
|
|
+ int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
|
+ if (unlikely(l >= (sizeof(dev_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
|
|
+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading device configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ if (err != 4) {
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error parsing pci device "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pciback_export_device(pdev, domain, bus, slot, func, i);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* Switch substate of this device. */
|
|
+ l = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
|
+ if (unlikely(l >= (sizeof(state_str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "String overflow while reading "
|
|
+ "configuration");
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
|
|
+ "%d", XenbusStateInitialised);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err, "Error switching "
|
|
+ "substate of dev-%d\n", i);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = pciback_publish_pci_roots(pdev, pciback_publish_pci_root);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error while publish PCI root buses "
|
|
+ "for frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error switching to initialised state!");
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ if (!err)
|
|
+ /* see if pcifront is already configured (if not, we'll wait) */
|
|
+ pciback_attach(pdev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void pciback_be_watch(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ struct pciback_device *pdev =
|
|
+ container_of(watch, struct pciback_device, be_watch);
|
|
+
|
|
+ switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
|
|
+ case XenbusStateInitWait:
|
|
+ pciback_setup_backend(pdev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pciback_xenbus_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pciback_device *pdev = alloc_pdev(dev);
|
|
+
|
|
+ if (pdev == NULL) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "Error allocating pciback_device struct");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* wait for xend to configure us */
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ /* watch the backend node for backend configuration information */
|
|
+ err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
|
|
+ pciback_be_watch);
|
|
+ if (err)
|
|
+ goto out;
|
|
+ pdev->be_watching = 1;
|
|
+
|
|
+ /* We need to force a call to our callback here in case
|
|
+ * xend already configured us!
|
|
+ */
|
|
+ pciback_be_watch(&pdev->be_watch, NULL, 0);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pciback_xenbus_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct pciback_device *pdev = dev->dev.driver_data;
|
|
+
|
|
+ if (pdev != NULL)
|
|
+ free_pdev(pdev);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id xenpci_ids[] = {
|
|
+ {"pci"},
|
|
+ {{0}},
|
|
+};
|
|
+
|
|
+static struct xenbus_driver xenbus_pciback_driver = {
|
|
+ .name = "pciback",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = xenpci_ids,
|
|
+ .probe = pciback_xenbus_probe,
|
|
+ .remove = pciback_xenbus_remove,
|
|
+ .otherend_changed = pciback_frontend_changed,
|
|
+};
|
|
+
|
|
+int __init pciback_xenbus_register(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+ pciback_wq = create_workqueue("pciback_workqueue");
|
|
+ if (!pciback_wq) {
|
|
+ printk(KERN_ERR "pciback_xenbus_register: create"
|
|
+ "pciback_workqueue failed\n");
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ return xenbus_register_backend(&xenbus_pciback_driver);
|
|
+}
|
|
+
|
|
+void __exit pciback_xenbus_unregister(void)
|
|
+{
|
|
+ destroy_workqueue(pciback_wq);
|
|
+ xenbus_unregister_driver(&xenbus_pciback_driver);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,7 @@
|
|
+obj-y += pcifront.o
|
|
+
|
|
+pcifront-y := pci_op.o xenbus.o pci.o
|
|
+
|
|
+ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y)
|
|
+EXTRA_CFLAGS += -DDEBUG
|
|
+endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pcifront/pci.c 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,46 @@
|
|
+/*
|
|
+ * PCI Frontend Operations - ensure only one PCI frontend runs at a time
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include "pcifront.h"
|
|
+
|
|
+DEFINE_SPINLOCK(pcifront_dev_lock);
|
|
+static struct pcifront_device *pcifront_dev = NULL;
|
|
+
|
|
+int pcifront_connect(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ spin_lock(&pcifront_dev_lock);
|
|
+
|
|
+ if (!pcifront_dev) {
|
|
+ dev_info(&pdev->xdev->dev, "Installing PCI frontend\n");
|
|
+ pcifront_dev = pdev;
|
|
+ }
|
|
+ else {
|
|
+ dev_err(&pdev->xdev->dev, "PCI frontend already installed!\n");
|
|
+ err = -EEXIST;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pcifront_dev_lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pcifront_disconnect(struct pcifront_device *pdev)
|
|
+{
|
|
+ spin_lock(&pcifront_dev_lock);
|
|
+
|
|
+ if (pdev == pcifront_dev) {
|
|
+ dev_info(&pdev->xdev->dev,
|
|
+ "Disconnecting PCI Frontend Buses\n");
|
|
+ pcifront_dev = NULL;
|
|
+ }
|
|
+
|
|
+ spin_unlock(&pcifront_dev_lock);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pcifront/pci_op.c 2010-11-25 09:36:37.000000000 +0100
|
|
@@ -0,0 +1,670 @@
|
|
+/*
|
|
+ * PCI Frontend Operations - Communicates with frontend
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/version.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <asm/bitops.h>
|
|
+#include <linux/time.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include "pcifront.h"
|
|
+
|
|
+static int verbose_request = 0;
|
|
+module_param(verbose_request, int, 0644);
|
|
+
|
|
+#ifdef __ia64__
|
|
+static void pcifront_init_sd(struct pcifront_sd *sd,
|
|
+ unsigned int domain, unsigned int bus,
|
|
+ struct pcifront_device *pdev)
|
|
+{
|
|
+ int err, i, j, k, len, root_num, res_count;
|
|
+ struct acpi_resource res;
|
|
+ unsigned int d, b, byte;
|
|
+ unsigned long magic;
|
|
+ char str[64], tmp[3];
|
|
+ unsigned char *buf, *bufp;
|
|
+ u8 *ptr;
|
|
+
|
|
+ memset(sd, 0, sizeof(*sd));
|
|
+
|
|
+ sd->segment = domain;
|
|
+ sd->node = -1; /* Revisit for NUMA */
|
|
+ sd->platform_data = pdev;
|
|
+
|
|
+ /* Look for resources for this controller in xenbus. */
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "root_num",
|
|
+ "%d", &root_num);
|
|
+ if (err != 1)
|
|
+ return;
|
|
+
|
|
+ for (i = 0; i < root_num; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1)))
|
|
+ return;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
|
|
+ str, "%x:%x", &d, &b);
|
|
+ if (err != 2)
|
|
+ return;
|
|
+
|
|
+ if (d == domain && b == bus)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (i == root_num)
|
|
+ return;
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "root-resource-magic");
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
|
|
+ str, "%lx", &magic);
|
|
+
|
|
+ if (err != 1)
|
|
+ return; /* No resources, nothing to do */
|
|
+
|
|
+ if (magic != (sizeof(res) * 2) + 1) {
|
|
+ printk(KERN_WARNING "pcifront: resource magic mismatch\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resources", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1)))
|
|
+ return;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
|
|
+ str, "%d", &res_count);
|
|
+
|
|
+ if (err != 1)
|
|
+ return; /* No resources, nothing to do */
|
|
+
|
|
+ sd->window = kzalloc(sizeof(*sd->window) * res_count, GFP_KERNEL);
|
|
+ if (!sd->window)
|
|
+ return;
|
|
+
|
|
+ /* magic is also the size of the byte stream in xenbus */
|
|
+ buf = kmalloc(magic, GFP_KERNEL);
|
|
+ if (!buf) {
|
|
+ kfree(sd->window);
|
|
+ sd->window = NULL;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Read the resources out of xenbus */
|
|
+ for (j = 0; j < res_count; j++) {
|
|
+ memset(&res, 0, sizeof(res));
|
|
+ memset(buf, 0, magic);
|
|
+
|
|
+ len = snprintf(str, sizeof(str), "root-%d-resource-%d", i, j);
|
|
+ if (unlikely(len >= (sizeof(str) - 1)))
|
|
+ return;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
|
|
+ "%s", buf);
|
|
+ if (err != 1) {
|
|
+ printk(KERN_WARNING "pcifront: error reading "
|
|
+ "resource %d on bus %04x:%02x\n",
|
|
+ j, domain, bus);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ bufp = buf;
|
|
+ ptr = (u8 *)&res;
|
|
+ memset(tmp, 0, sizeof(tmp));
|
|
+
|
|
+ /* Copy ASCII byte stream into structure */
|
|
+ for (k = 0; k < magic - 1; k += 2) {
|
|
+ memcpy(tmp, bufp, 2);
|
|
+ bufp += 2;
|
|
+
|
|
+ sscanf(tmp, "%02x", &byte);
|
|
+ *ptr = byte;
|
|
+ ptr++;
|
|
+ }
|
|
+
|
|
+ xen_add_resource(sd, domain, bus, &res);
|
|
+ sd->windows++;
|
|
+ }
|
|
+ kfree(buf);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int errno_to_pcibios_err(int errno)
|
|
+{
|
|
+ switch (errno) {
|
|
+ case XEN_PCI_ERR_success:
|
|
+ return PCIBIOS_SUCCESSFUL;
|
|
+
|
|
+ case XEN_PCI_ERR_dev_not_found:
|
|
+ return PCIBIOS_DEVICE_NOT_FOUND;
|
|
+
|
|
+ case XEN_PCI_ERR_invalid_offset:
|
|
+ case XEN_PCI_ERR_op_failed:
|
|
+ return PCIBIOS_BAD_REGISTER_NUMBER;
|
|
+
|
|
+ case XEN_PCI_ERR_not_implemented:
|
|
+ return PCIBIOS_FUNC_NOT_SUPPORTED;
|
|
+
|
|
+ case XEN_PCI_ERR_access_denied:
|
|
+ return PCIBIOS_SET_FAILED;
|
|
+ }
|
|
+ return errno;
|
|
+}
|
|
+
|
|
+static inline void schedule_pcifront_aer_op(struct pcifront_device *pdev)
|
|
+{
|
|
+ if (test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
|
|
+ && !test_and_set_bit(_PDEVB_op_active, &pdev->flags)) {
|
|
+ dev_dbg(&pdev->xdev->dev, "schedule aer frontend job\n");
|
|
+ schedule_work(&pdev->op_work);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct xen_pci_op *active_op = &pdev->sh_info->op;
|
|
+ unsigned long irq_flags;
|
|
+ evtchn_port_t port = pdev->evtchn;
|
|
+ s64 ns, ns_timeout;
|
|
+ struct timeval tv;
|
|
+
|
|
+ spin_lock_irqsave(&pdev->sh_info_lock, irq_flags);
|
|
+
|
|
+ memcpy(active_op, op, sizeof(struct xen_pci_op));
|
|
+
|
|
+ /* Go */
|
|
+ wmb();
|
|
+ set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
|
|
+ notify_remote_via_evtchn(port);
|
|
+
|
|
+ /*
|
|
+ * We set a poll timeout of 3 seconds but give up on return after
|
|
+ * 2 seconds. It is better to time out too late rather than too early
|
|
+ * (in the latter case we end up continually re-executing poll() with a
|
|
+ * timeout in the past). 1s difference gives plenty of slack for error.
|
|
+ */
|
|
+ do_gettimeofday(&tv);
|
|
+ ns_timeout = timeval_to_ns(&tv) + 2 * (s64)NSEC_PER_SEC;
|
|
+
|
|
+ clear_evtchn(port);
|
|
+
|
|
+ while (test_bit(_XEN_PCIF_active,
|
|
+ (unsigned long *)&pdev->sh_info->flags)) {
|
|
+ if (HYPERVISOR_poll(&port, 1, jiffies + 3*HZ))
|
|
+ BUG();
|
|
+ clear_evtchn(port);
|
|
+ do_gettimeofday(&tv);
|
|
+ ns = timeval_to_ns(&tv);
|
|
+ if (ns > ns_timeout) {
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "pciback not responding!!!\n");
|
|
+ clear_bit(_XEN_PCIF_active,
|
|
+ (unsigned long *)&pdev->sh_info->flags);
|
|
+ err = XEN_PCI_ERR_dev_not_found;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We might lose backend service request since we
|
|
+ * reuse same evtchn with pci_conf backend response. So re-schedule
|
|
+ * aer pcifront service.
|
|
+ */
|
|
+ if (test_bit(_XEN_PCIB_active,
|
|
+ (unsigned long*)&pdev->sh_info->flags)) {
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "schedule aer pcifront service\n");
|
|
+ schedule_pcifront_aer_op(pdev);
|
|
+ }
|
|
+
|
|
+ memcpy(op, active_op, sizeof(struct xen_pci_op));
|
|
+
|
|
+ err = op->err;
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&pdev->sh_info_lock, irq_flags);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+/* Access to this function is spinlocked in drivers/pci/access.c */
|
|
+static int pcifront_bus_read(struct pci_bus *bus, unsigned int devfn,
|
|
+ int where, int size, u32 * val)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_conf_read,
|
|
+ .domain = pci_domain_nr(bus),
|
|
+ .bus = bus->number,
|
|
+ .devfn = devfn,
|
|
+ .offset = where,
|
|
+ .size = size,
|
|
+ };
|
|
+ struct pcifront_sd *sd = bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ if (verbose_request)
|
|
+ dev_info(&pdev->xdev->dev,
|
|
+ "read dev=%04x:%02x:%02x.%01x - offset %x size %d\n",
|
|
+ pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
|
|
+ PCI_FUNC(devfn), where, size);
|
|
+
|
|
+ err = do_pci_op(pdev, &op);
|
|
+
|
|
+ if (likely(!err)) {
|
|
+ if (verbose_request)
|
|
+ dev_info(&pdev->xdev->dev, "read got back value %x\n",
|
|
+ op.value);
|
|
+
|
|
+ *val = op.value;
|
|
+ } else if (err == -ENODEV) {
|
|
+ /* No device here, pretend that it just returned 0 */
|
|
+ err = 0;
|
|
+ *val = 0;
|
|
+ }
|
|
+
|
|
+ return errno_to_pcibios_err(err);
|
|
+}
|
|
+
|
|
+/* Access to this function is spinlocked in drivers/pci/access.c */
|
|
+static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
|
|
+ int where, int size, u32 val)
|
|
+{
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_conf_write,
|
|
+ .domain = pci_domain_nr(bus),
|
|
+ .bus = bus->number,
|
|
+ .devfn = devfn,
|
|
+ .offset = where,
|
|
+ .size = size,
|
|
+ .value = val,
|
|
+ };
|
|
+ struct pcifront_sd *sd = bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ if (verbose_request)
|
|
+ dev_info(&pdev->xdev->dev,
|
|
+ "write dev=%04x:%02x:%02x.%01x - "
|
|
+ "offset %x size %d val %x\n",
|
|
+ pci_domain_nr(bus), bus->number,
|
|
+ PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
|
|
+
|
|
+ return errno_to_pcibios_err(do_pci_op(pdev, &op));
|
|
+}
|
|
+
|
|
+struct pci_ops pcifront_bus_ops = {
|
|
+ .read = pcifront_bus_read,
|
|
+ .write = pcifront_bus_write,
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+int pci_frontend_enable_msix(struct pci_dev *dev,
|
|
+ struct msix_entry *entries,
|
|
+ int nvec)
|
|
+{
|
|
+ int err;
|
|
+ int i;
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_enable_msix,
|
|
+ .domain = pci_domain_nr(dev->bus),
|
|
+ .bus = dev->bus->number,
|
|
+ .devfn = dev->devfn,
|
|
+ .value = nvec,
|
|
+ };
|
|
+ struct pcifront_sd *sd = dev->bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ if (nvec > SH_INFO_MAX_VEC) {
|
|
+ printk("too much vector for pci frontend%x\n", nvec);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nvec; i++) {
|
|
+ op.msix_entries[i].entry = entries[i].entry;
|
|
+ op.msix_entries[i].vector = entries[i].vector;
|
|
+ }
|
|
+
|
|
+ err = do_pci_op(pdev, &op);
|
|
+
|
|
+ if (!err) {
|
|
+ if (!op.value) {
|
|
+ /* we get the result */
|
|
+ for ( i = 0; i < nvec; i++)
|
|
+ entries[i].vector = op.msix_entries[i].vector;
|
|
+ return 0;
|
|
+ }
|
|
+ else {
|
|
+ printk("enable msix get value %x\n", op.value);
|
|
+ return op.value;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ printk("enable msix get err %x\n", err);
|
|
+ return err;
|
|
+ }
|
|
+}
|
|
+
|
|
+void pci_frontend_disable_msix(struct pci_dev* dev)
|
|
+{
|
|
+ int err;
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_disable_msix,
|
|
+ .domain = pci_domain_nr(dev->bus),
|
|
+ .bus = dev->bus->number,
|
|
+ .devfn = dev->devfn,
|
|
+ };
|
|
+ struct pcifront_sd *sd = dev->bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ err = do_pci_op(pdev, &op);
|
|
+
|
|
+ /* What should do for error ? */
|
|
+ if (err)
|
|
+ printk("pci_disable_msix get err %x\n", err);
|
|
+}
|
|
+
|
|
+int pci_frontend_enable_msi(struct pci_dev *dev)
|
|
+{
|
|
+ int err;
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_enable_msi,
|
|
+ .domain = pci_domain_nr(dev->bus),
|
|
+ .bus = dev->bus->number,
|
|
+ .devfn = dev->devfn,
|
|
+ };
|
|
+ struct pcifront_sd *sd = dev->bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ err = do_pci_op(pdev, &op);
|
|
+ if (likely(!err)) {
|
|
+ dev->irq = op.value;
|
|
+ }
|
|
+ else {
|
|
+ printk("pci frontend enable msi failed for dev %x:%x \n",
|
|
+ op.bus, op.devfn);
|
|
+ err = -EINVAL;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void pci_frontend_disable_msi(struct pci_dev* dev)
|
|
+{
|
|
+ int err;
|
|
+ struct xen_pci_op op = {
|
|
+ .cmd = XEN_PCI_OP_disable_msi,
|
|
+ .domain = pci_domain_nr(dev->bus),
|
|
+ .bus = dev->bus->number,
|
|
+ .devfn = dev->devfn,
|
|
+ };
|
|
+ struct pcifront_sd *sd = dev->bus->sysdata;
|
|
+ struct pcifront_device *pdev = pcifront_get_pdev(sd);
|
|
+
|
|
+ err = do_pci_op(pdev, &op);
|
|
+ if (err == XEN_PCI_ERR_dev_not_found) {
|
|
+ /* XXX No response from backend, what shall we do? */
|
|
+ printk("get no response from backend for disable MSI\n");
|
|
+ return;
|
|
+ }
|
|
+ if (likely(!err))
|
|
+ dev->irq = op.value;
|
|
+ else
|
|
+ /* how can pciback notify us fail? */
|
|
+ printk("get fake response frombackend \n");
|
|
+}
|
|
+#endif /* CONFIG_PCI_MSI */
|
|
+
|
|
+/* Claim resources for the PCI frontend as-is, backend won't allow changes */
|
|
+static void pcifront_claim_resource(struct pci_dev *dev, void *data)
|
|
+{
|
|
+ struct pcifront_device *pdev = data;
|
|
+ int i;
|
|
+ struct resource *r;
|
|
+
|
|
+ for (i = 0; i < PCI_NUM_RESOURCES; i++) {
|
|
+ r = &dev->resource[i];
|
|
+
|
|
+ if (!r->parent && r->start && r->flags) {
|
|
+ dev_dbg(&pdev->xdev->dev, "claiming resource %s/%d\n",
|
|
+ pci_name(dev), i);
|
|
+ pci_claim_resource(dev, i);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+int __devinit pcifront_scan_root(struct pcifront_device *pdev,
|
|
+ unsigned int domain, unsigned int bus)
|
|
+{
|
|
+ struct pci_bus *b;
|
|
+ struct pcifront_sd *sd = NULL;
|
|
+ struct pci_bus_entry *bus_entry = NULL;
|
|
+ int err = 0;
|
|
+
|
|
+#ifndef CONFIG_PCI_DOMAINS
|
|
+ if (domain != 0) {
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "Please compile with CONFIG_PCI_DOMAINS\n");
|
|
+ err = -EINVAL;
|
|
+ goto err_out;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ dev_info(&pdev->xdev->dev, "Creating PCI Frontend Bus %04x:%02x\n",
|
|
+ domain, bus);
|
|
+
|
|
+ bus_entry = kmalloc(sizeof(*bus_entry), GFP_KERNEL);
|
|
+ sd = kmalloc(sizeof(*sd), GFP_KERNEL);
|
|
+ if (!bus_entry || !sd) {
|
|
+ err = -ENOMEM;
|
|
+ goto err_out;
|
|
+ }
|
|
+ pcifront_init_sd(sd, domain, bus, pdev);
|
|
+
|
|
+ b = pci_scan_bus_parented(&pdev->xdev->dev, bus,
|
|
+ &pcifront_bus_ops, sd);
|
|
+ if (!b) {
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "Error creating PCI Frontend Bus!\n");
|
|
+ err = -ENOMEM;
|
|
+ goto err_out;
|
|
+ }
|
|
+
|
|
+ pcifront_setup_root_resources(b, sd);
|
|
+ bus_entry->bus = b;
|
|
+
|
|
+ list_add(&bus_entry->list, &pdev->root_buses);
|
|
+
|
|
+ /* Claim resources before going "live" with our devices */
|
|
+ pci_walk_bus(b, pcifront_claim_resource, pdev);
|
|
+
|
|
+ pci_bus_add_devices(b);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ err_out:
|
|
+ kfree(bus_entry);
|
|
+ kfree(sd);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+int __devinit pcifront_rescan_root(struct pcifront_device *pdev,
|
|
+ unsigned int domain, unsigned int bus)
|
|
+{
|
|
+ struct pci_bus *b;
|
|
+ struct pci_dev *d;
|
|
+ unsigned int devfn;
|
|
+
|
|
+#ifndef CONFIG_PCI_DOMAINS
|
|
+ if (domain != 0) {
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "Please compile with CONFIG_PCI_DOMAINS\n");
|
|
+ return -EINVAL;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
|
|
+ domain, bus);
|
|
+
|
|
+ b = pci_find_bus(domain, bus);
|
|
+ if(!b)
|
|
+ /* If the bus is unknown, create it. */
|
|
+ return pcifront_scan_root(pdev, domain, bus);
|
|
+
|
|
+ /* Rescan the bus for newly attached functions and add.
|
|
+ * We omit handling of PCI bridge attachment because pciback prevents
|
|
+ * bridges from being exported.
|
|
+ */
|
|
+ for (devfn = 0; devfn < 0x100; devfn++) {
|
|
+ d = pci_get_slot(b, devfn);
|
|
+ if(d) {
|
|
+ /* Device is already known. */
|
|
+ pci_dev_put(d);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ d = pci_scan_single_device(b, devfn);
|
|
+ if (d)
|
|
+ dev_info(&pdev->xdev->dev, "New device on "
|
|
+ "%04x:%02x:%02x.%02x found.\n", domain, bus,
|
|
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
|
|
+ }
|
|
+
|
|
+ /* Claim resources before going "live" with our devices */
|
|
+ pci_walk_bus(b, pcifront_claim_resource, pdev);
|
|
+
|
|
+ /* Create SysFS and notify udev of the devices. Aka: "going live" */
|
|
+ pci_bus_add_devices(b);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void free_root_bus_devs(struct pci_bus *bus)
|
|
+{
|
|
+ struct pci_dev *dev;
|
|
+
|
|
+ while (!list_empty(&bus->devices)) {
|
|
+ dev = container_of(bus->devices.next, struct pci_dev,
|
|
+ bus_list);
|
|
+ dev_dbg(&dev->dev, "removing device\n");
|
|
+ pci_remove_bus_device(dev);
|
|
+ }
|
|
+}
|
|
+
|
|
+void pcifront_free_roots(struct pcifront_device *pdev)
|
|
+{
|
|
+ struct pci_bus_entry *bus_entry, *t;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "cleaning up root buses\n");
|
|
+
|
|
+ list_for_each_entry_safe(bus_entry, t, &pdev->root_buses, list) {
|
|
+ list_del(&bus_entry->list);
|
|
+
|
|
+ free_root_bus_devs(bus_entry->bus);
|
|
+
|
|
+ kfree(bus_entry->bus->sysdata);
|
|
+
|
|
+ device_unregister(bus_entry->bus->bridge);
|
|
+ pci_remove_bus(bus_entry->bus);
|
|
+
|
|
+ kfree(bus_entry);
|
|
+ }
|
|
+}
|
|
+
|
|
+static pci_ers_result_t pcifront_common_process( int cmd, struct pcifront_device *pdev,
|
|
+ pci_channel_state_t state)
|
|
+{
|
|
+ pci_ers_result_t result;
|
|
+ struct pci_driver *pdrv;
|
|
+ int bus = pdev->sh_info->aer_op.bus;
|
|
+ int devfn = pdev->sh_info->aer_op.devfn;
|
|
+ struct pci_dev *pcidev;
|
|
+ int flag = 0;
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "pcifront AER process: cmd %x (bus:%x, devfn%x)",
|
|
+ cmd, bus, devfn);
|
|
+ result = PCI_ERS_RESULT_NONE;
|
|
+
|
|
+ pcidev = pci_get_bus_and_slot(bus, devfn);
|
|
+ if (!pcidev || !pcidev->driver) {
|
|
+ pci_dev_put(pcidev);
|
|
+ dev_err(&pdev->xdev->dev, "AER device or driver is NULL\n");
|
|
+ return result;
|
|
+ }
|
|
+ pdrv = pcidev->driver;
|
|
+
|
|
+ if (get_driver(&pdrv->driver)) {
|
|
+ if (pdrv->err_handler && pdrv->err_handler->error_detected) {
|
|
+ dev_dbg(&pcidev->dev,
|
|
+ "trying to call AER service\n");
|
|
+ if (pcidev) {
|
|
+ flag = 1;
|
|
+ switch(cmd) {
|
|
+ case XEN_PCI_OP_aer_detected:
|
|
+ result = pdrv->err_handler->error_detected(pcidev, state);
|
|
+ break;
|
|
+ case XEN_PCI_OP_aer_mmio:
|
|
+ result = pdrv->err_handler->mmio_enabled(pcidev);
|
|
+ break;
|
|
+ case XEN_PCI_OP_aer_slotreset:
|
|
+ result = pdrv->err_handler->slot_reset(pcidev);
|
|
+ break;
|
|
+ case XEN_PCI_OP_aer_resume:
|
|
+ pdrv->err_handler->resume(pcidev);
|
|
+ break;
|
|
+ default:
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "bad request in aer recovery operation!\n");
|
|
+
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ put_driver(&pdrv->driver);
|
|
+ }
|
|
+ if (!flag)
|
|
+ result = PCI_ERS_RESULT_NONE;
|
|
+
|
|
+ return result;
|
|
+}
|
|
+
|
|
+
|
|
+void pcifront_do_aer(void *data)
|
|
+{
|
|
+ struct pcifront_device *pdev = data;
|
|
+ int cmd = pdev->sh_info->aer_op.cmd;
|
|
+ pci_channel_state_t state =
|
|
+ (pci_channel_state_t)pdev->sh_info->aer_op.err;
|
|
+
|
|
+ /*If a pci_conf op is in progress,
|
|
+ we have to wait until it is done before service aer op*/
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "pcifront service aer bus %x devfn %x\n", pdev->sh_info->aer_op.bus,
|
|
+ pdev->sh_info->aer_op.devfn);
|
|
+
|
|
+ pdev->sh_info->aer_op.err = pcifront_common_process(cmd, pdev, state);
|
|
+
|
|
+ wmb();
|
|
+ clear_bit(_XEN_PCIB_active, (unsigned long*)&pdev->sh_info->flags);
|
|
+ notify_remote_via_evtchn(pdev->evtchn);
|
|
+
|
|
+ /*in case of we lost an aer request in four lines time_window*/
|
|
+ smp_mb__before_clear_bit();
|
|
+ clear_bit( _PDEVB_op_active, &pdev->flags);
|
|
+ smp_mb__after_clear_bit();
|
|
+
|
|
+ schedule_pcifront_aer_op(pdev);
|
|
+
|
|
+}
|
|
+
|
|
+irqreturn_t pcifront_handler_aer(int irq, void *dev, struct pt_regs *regs)
|
|
+{
|
|
+ struct pcifront_device *pdev = dev;
|
|
+ schedule_pcifront_aer_op(pdev);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pcifront/pcifront.h 2010-10-05 09:58:12.000000000 +0200
|
|
@@ -0,0 +1,56 @@
|
|
+/*
|
|
+ * PCI Frontend - Common data structures & function declarations
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#ifndef __XEN_PCIFRONT_H__
|
|
+#define __XEN_PCIFRONT_H__
|
|
+
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/pci.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/io/pciif.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <xen/pcifront.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <linux/workqueue.h>
|
|
+
|
|
+struct pci_bus_entry {
|
|
+ struct list_head list;
|
|
+ struct pci_bus *bus;
|
|
+};
|
|
+
|
|
+#define _PDEVB_op_active (0)
|
|
+#define PDEVB_op_active (1 << (_PDEVB_op_active))
|
|
+
|
|
+struct pcifront_device {
|
|
+ struct xenbus_device *xdev;
|
|
+ struct list_head root_buses;
|
|
+ spinlock_t dev_lock;
|
|
+
|
|
+ int evtchn;
|
|
+ int gnt_ref;
|
|
+ int irq;
|
|
+
|
|
+ /* Lock this when doing any operations in sh_info */
|
|
+ spinlock_t sh_info_lock;
|
|
+ struct xen_pci_sharedinfo *sh_info;
|
|
+ struct work_struct op_work;
|
|
+ unsigned long flags;
|
|
+
|
|
+};
|
|
+
|
|
+int pcifront_connect(struct pcifront_device *pdev);
|
|
+void pcifront_disconnect(struct pcifront_device *pdev);
|
|
+
|
|
+int pcifront_scan_root(struct pcifront_device *pdev,
|
|
+ unsigned int domain, unsigned int bus);
|
|
+int pcifront_rescan_root(struct pcifront_device *pdev,
|
|
+ unsigned int domain, unsigned int bus);
|
|
+void pcifront_free_roots(struct pcifront_device *pdev);
|
|
+
|
|
+void pcifront_do_aer( void *data);
|
|
+
|
|
+irqreturn_t pcifront_handler_aer(int irq, void *dev, struct pt_regs *regs);
|
|
+
|
|
+#endif /* __XEN_PCIFRONT_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/pcifront/xenbus.c 2010-10-05 09:58:12.000000000 +0200
|
|
@@ -0,0 +1,483 @@
|
|
+/*
|
|
+ * PCI Frontend Xenbus Setup - handles setup with backend (imports page/evtchn)
|
|
+ *
|
|
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
|
+ */
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/mm.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include "pcifront.h"
|
|
+
|
|
+#ifndef __init_refok
|
|
+#define __init_refok
|
|
+#endif
|
|
+
|
|
+#define INVALID_GRANT_REF (0)
|
|
+#define INVALID_EVTCHN (-1)
|
|
+
|
|
+static struct pcifront_device *alloc_pdev(struct xenbus_device *xdev)
|
|
+{
|
|
+ struct pcifront_device *pdev;
|
|
+
|
|
+ pdev = kzalloc(sizeof(struct pcifront_device), GFP_KERNEL);
|
|
+ if (pdev == NULL)
|
|
+ goto out;
|
|
+
|
|
+ pdev->sh_info =
|
|
+ (struct xen_pci_sharedinfo *)__get_free_page(GFP_KERNEL);
|
|
+ if (pdev->sh_info == NULL) {
|
|
+ kfree(pdev);
|
|
+ pdev = NULL;
|
|
+ goto out;
|
|
+ }
|
|
+ pdev->sh_info->flags = 0;
|
|
+
|
|
+ /*Flag for registering PV AER handler*/
|
|
+ set_bit(_XEN_PCIB_AERHANDLER, (void*)&pdev->sh_info->flags);
|
|
+
|
|
+ xdev->dev.driver_data = pdev;
|
|
+ pdev->xdev = xdev;
|
|
+
|
|
+ INIT_LIST_HEAD(&pdev->root_buses);
|
|
+
|
|
+ spin_lock_init(&pdev->dev_lock);
|
|
+ spin_lock_init(&pdev->sh_info_lock);
|
|
+
|
|
+ pdev->evtchn = INVALID_EVTCHN;
|
|
+ pdev->gnt_ref = INVALID_GRANT_REF;
|
|
+ pdev->irq = -1;
|
|
+
|
|
+ INIT_WORK(&pdev->op_work, pcifront_do_aer, pdev);
|
|
+
|
|
+ dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
|
|
+ pdev, pdev->sh_info);
|
|
+ out:
|
|
+ return pdev;
|
|
+}
|
|
+
|
|
+static void free_pdev(struct pcifront_device *pdev)
|
|
+{
|
|
+ dev_dbg(&pdev->xdev->dev, "freeing pdev @ 0x%p\n", pdev);
|
|
+
|
|
+ pcifront_free_roots(pdev);
|
|
+
|
|
+ /*For PCIE_AER error handling job*/
|
|
+ flush_scheduled_work();
|
|
+
|
|
+ if (pdev->irq > 0)
|
|
+ unbind_from_irqhandler(pdev->irq, pdev);
|
|
+
|
|
+ if (pdev->evtchn != INVALID_EVTCHN)
|
|
+ xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
|
|
+
|
|
+ if (pdev->gnt_ref != INVALID_GRANT_REF)
|
|
+ gnttab_end_foreign_access(pdev->gnt_ref,
|
|
+ (unsigned long)pdev->sh_info);
|
|
+ else
|
|
+ free_page((unsigned long)pdev->sh_info);
|
|
+
|
|
+ pdev->xdev->dev.driver_data = NULL;
|
|
+
|
|
+ kfree(pdev);
|
|
+}
|
|
+
|
|
+static int pcifront_publish_info(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct xenbus_transaction trans;
|
|
+
|
|
+ err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
|
|
+ if (err < 0)
|
|
+ goto out;
|
|
+
|
|
+ pdev->gnt_ref = err;
|
|
+
|
|
+ err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ err = bind_caller_port_to_irqhandler(pdev->evtchn,
|
|
+ pcifront_handler_aer,
|
|
+ SA_SAMPLE_RANDOM,
|
|
+ "pcifront", pdev);
|
|
+ if (err < 0) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Failed to bind event channel");
|
|
+ goto out;
|
|
+ }
|
|
+ pdev->irq = err;
|
|
+
|
|
+ do_publish:
|
|
+ err = xenbus_transaction_start(&trans);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error writing configuration for backend "
|
|
+ "(start transaction)");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(trans, pdev->xdev->nodename,
|
|
+ "pci-op-ref", "%u", pdev->gnt_ref);
|
|
+ if (!err)
|
|
+ err = xenbus_printf(trans, pdev->xdev->nodename,
|
|
+ "event-channel", "%u", pdev->evtchn);
|
|
+ if (!err)
|
|
+ err = xenbus_printf(trans, pdev->xdev->nodename,
|
|
+ "magic", XEN_PCI_MAGIC);
|
|
+
|
|
+ if (err) {
|
|
+ xenbus_transaction_end(trans, 1);
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error writing configuration for backend");
|
|
+ goto out;
|
|
+ } else {
|
|
+ err = xenbus_transaction_end(trans, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto do_publish;
|
|
+ else if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error completing transaction "
|
|
+ "for backend");
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev, "publishing successful!\n");
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __devinit pcifront_try_connect(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = -EFAULT;
|
|
+ int i, num_roots, len;
|
|
+ char str[64];
|
|
+ unsigned int domain, bus;
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ /* Only connect once */
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateInitialised)
|
|
+ goto out;
|
|
+
|
|
+ err = pcifront_connect(pdev);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error connecting PCI Frontend");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
|
|
+ "root_num", "%d", &num_roots);
|
|
+ if (err == -ENOENT) {
|
|
+ xenbus_dev_error(pdev->xdev, err,
|
|
+ "No PCI Roots found, trying 0000:00");
|
|
+ err = pcifront_scan_root(pdev, 0, 0);
|
|
+ num_roots = 0;
|
|
+ } else if (err != 1) {
|
|
+ if (err == 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of PCI roots");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_roots; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
|
|
+ "%x:%x", &domain, &bus);
|
|
+ if (err != 2) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading PCI root %d", i);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pcifront_scan_root(pdev, domain, bus);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error scanning PCI root %04x:%02x",
|
|
+ domain, bus);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pcifront_try_disconnect(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ enum xenbus_state prev_state;
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
|
|
+
|
|
+ if (prev_state >= XenbusStateClosing)
|
|
+ goto out;
|
|
+
|
|
+ if(prev_state == XenbusStateConnected) {
|
|
+ pcifront_free_roots(pdev);
|
|
+ pcifront_disconnect(pdev);
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateClosed);
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int __devinit pcifront_attach_devices(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = -EFAULT;
|
|
+ int i, num_roots, len;
|
|
+ unsigned int domain, bus;
|
|
+ char str[64];
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateReconfiguring)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
|
|
+ "root_num", "%d", &num_roots);
|
|
+ if (err == -ENOENT) {
|
|
+ xenbus_dev_error(pdev->xdev, err,
|
|
+ "No PCI Roots found, trying 0000:00");
|
|
+ err = pcifront_rescan_root(pdev, 0, 0);
|
|
+ num_roots = 0;
|
|
+ } else if (err != 1) {
|
|
+ if (err == 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of PCI roots");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < num_roots; i++) {
|
|
+ len = snprintf(str, sizeof(str), "root-%d", i);
|
|
+ if (unlikely(len >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
|
|
+ "%x:%x", &domain, &bus);
|
|
+ if (err != 2) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading PCI root %d", i);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pcifront_rescan_root(pdev, domain, bus);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error scanning PCI root %04x:%02x",
|
|
+ domain, bus);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ xenbus_switch_state(pdev->xdev, XenbusStateConnected);
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pcifront_detach_devices(struct pcifront_device *pdev)
|
|
+{
|
|
+ int err = 0;
|
|
+ int i, num_devs;
|
|
+ unsigned int domain, bus, slot, func;
|
|
+ struct pci_bus *pci_bus;
|
|
+ struct pci_dev *pci_dev;
|
|
+ char str[64];
|
|
+
|
|
+ spin_lock(&pdev->dev_lock);
|
|
+
|
|
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
|
+ XenbusStateConnected)
|
|
+ goto out;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
|
|
+ &num_devs);
|
|
+ if (err != 1) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading number of PCI devices");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Find devices being detached and remove them. */
|
|
+ for (i = 0; i < num_devs; i++) {
|
|
+ int l, state;
|
|
+ l = snprintf(str, sizeof(str), "state-%d", i);
|
|
+ if (unlikely(l >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%d",
|
|
+ &state);
|
|
+ if (err != 1)
|
|
+ state = XenbusStateUnknown;
|
|
+
|
|
+ if (state != XenbusStateClosing)
|
|
+ continue;
|
|
+
|
|
+ /* Remove device. */
|
|
+ l = snprintf(str, sizeof(str), "vdev-%d", i);
|
|
+ if (unlikely(l >= (sizeof(str) - 1))) {
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
|
|
+ "%x:%x:%x.%x", &domain, &bus, &slot, &func);
|
|
+ if (err != 4) {
|
|
+ if (err >= 0)
|
|
+ err = -EINVAL;
|
|
+ xenbus_dev_fatal(pdev->xdev, err,
|
|
+ "Error reading PCI device %d", i);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ pci_bus = pci_find_bus(domain, bus);
|
|
+ if(!pci_bus) {
|
|
+ dev_dbg(&pdev->xdev->dev, "Cannot get bus %04x:%02x\n",
|
|
+ domain, bus);
|
|
+ continue;
|
|
+ }
|
|
+ pci_dev = pci_get_slot(pci_bus, PCI_DEVFN(slot, func));
|
|
+ if(!pci_dev) {
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "Cannot get PCI device %04x:%02x:%02x.%02x\n",
|
|
+ domain, bus, slot, func);
|
|
+ continue;
|
|
+ }
|
|
+ pci_remove_bus_device(pci_dev);
|
|
+ pci_dev_put(pci_dev);
|
|
+
|
|
+ dev_dbg(&pdev->xdev->dev,
|
|
+ "PCI device %04x:%02x:%02x.%02x removed.\n",
|
|
+ domain, bus, slot, func);
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
|
|
+
|
|
+ out:
|
|
+ spin_unlock(&pdev->dev_lock);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
|
|
+ enum xenbus_state be_state)
|
|
+{
|
|
+ struct pcifront_device *pdev = xdev->dev.driver_data;
|
|
+
|
|
+ switch (be_state) {
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitWait:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ pcifront_try_connect(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ dev_warn(&xdev->dev, "backend going away!\n");
|
|
+ pcifront_try_disconnect(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfiguring:
|
|
+ pcifront_detach_devices(pdev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfigured:
|
|
+ pcifront_attach_devices(pdev);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int pcifront_xenbus_probe(struct xenbus_device *xdev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct pcifront_device *pdev = alloc_pdev(xdev);
|
|
+
|
|
+ if (pdev == NULL) {
|
|
+ err = -ENOMEM;
|
|
+ xenbus_dev_fatal(xdev, err,
|
|
+ "Error allocating pcifront_device struct");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = pcifront_publish_info(pdev);
|
|
+ if (err)
|
|
+ free_pdev(pdev);
|
|
+
|
|
+ out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int pcifront_xenbus_remove(struct xenbus_device *xdev)
|
|
+{
|
|
+ if (xdev->dev.driver_data)
|
|
+ free_pdev(xdev->dev.driver_data);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id xenpci_ids[] = {
|
|
+ {"pci"},
|
|
+ {{0}},
|
|
+};
|
|
+MODULE_ALIAS("xen:pci");
|
|
+
|
|
+static struct xenbus_driver xenbus_pcifront_driver = {
|
|
+ .name = "pcifront",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = xenpci_ids,
|
|
+ .probe = pcifront_xenbus_probe,
|
|
+ .remove = pcifront_xenbus_remove,
|
|
+ .otherend_changed = pcifront_backend_changed,
|
|
+};
|
|
+
|
|
+static int __init pcifront_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ return xenbus_register_frontend(&xenbus_pcifront_driver);
|
|
+}
|
|
+
|
|
+/* Initialize after the Xen PCI Frontend Stub is initialized */
|
|
+subsys_initcall(pcifront_init);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/privcmd/Makefile 2007-07-10 09:42:30.000000000 +0200
|
|
@@ -0,0 +1,3 @@
|
|
+
|
|
+obj-y += privcmd.o
|
|
+obj-$(CONFIG_COMPAT) += compat_privcmd.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/privcmd/compat_privcmd.c 2010-01-27 14:01:48.000000000 +0100
|
|
@@ -0,0 +1,144 @@
|
|
+/*
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+ *
|
|
+ * Copyright (C) IBM Corp. 2006
|
|
+ *
|
|
+ * Authors: Jimi Xenidis <jimix@watson.ibm.com>
|
|
+ */
|
|
+
|
|
+#include <linux/config.h>
|
|
+#include <linux/compat.h>
|
|
+#include <linux/ioctl.h>
|
|
+#include <linux/syscalls.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/public/privcmd.h>
|
|
+#include <xen/compat_ioctl.h>
|
|
+
|
|
+int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ switch (cmd) {
|
|
+ case IOCTL_PRIVCMD_MMAP_32: {
|
|
+ struct privcmd_mmap *p;
|
|
+ struct privcmd_mmap_32 *p32;
|
|
+ struct privcmd_mmap_32 n32;
|
|
+
|
|
+ p32 = compat_ptr(arg);
|
|
+ p = compat_alloc_user_space(sizeof(*p));
|
|
+ if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
+ put_user(n32.num, &p->num) ||
|
|
+ put_user(n32.dom, &p->dom) ||
|
|
+ put_user(compat_ptr(n32.entry), &p->entry))
|
|
+ return -EFAULT;
|
|
+
|
|
+ ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAP, (unsigned long)p);
|
|
+ }
|
|
+ break;
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH_32: {
|
|
+ struct privcmd_mmapbatch *p;
|
|
+ struct privcmd_mmapbatch_32 *p32;
|
|
+ struct privcmd_mmapbatch_32 n32;
|
|
+#ifdef xen_pfn32_t
|
|
+ xen_pfn_t *__user arr;
|
|
+ xen_pfn32_t *__user arr32;
|
|
+ unsigned int i;
|
|
+#endif
|
|
+
|
|
+ p32 = compat_ptr(arg);
|
|
+ p = compat_alloc_user_space(sizeof(*p));
|
|
+ if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
+ put_user(n32.num, &p->num) ||
|
|
+ put_user(n32.dom, &p->dom) ||
|
|
+ put_user(n32.addr, &p->addr))
|
|
+ return -EFAULT;
|
|
+#ifdef xen_pfn32_t
|
|
+ arr = compat_alloc_user_space(n32.num * sizeof(*arr)
|
|
+ + sizeof(*p));
|
|
+ arr32 = compat_ptr(n32.arr);
|
|
+ for (i = 0; i < n32.num; ++i) {
|
|
+ xen_pfn32_t mfn;
|
|
+
|
|
+ if (get_user(mfn, arr32 + i) || put_user(mfn, arr + i))
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ if (put_user(arr, &p->arr))
|
|
+ return -EFAULT;
|
|
+#else
|
|
+ if (put_user(compat_ptr(n32.arr), &p->arr))
|
|
+ return -EFAULT;
|
|
+#endif
|
|
+
|
|
+ ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, (unsigned long)p);
|
|
+
|
|
+#ifdef xen_pfn32_t
|
|
+ for (i = 0; !ret && i < n32.num; ++i) {
|
|
+ xen_pfn_t mfn;
|
|
+
|
|
+ if (get_user(mfn, arr + i) || put_user(mfn, arr32 + i))
|
|
+ ret = -EFAULT;
|
|
+ else if (mfn != (xen_pfn32_t)mfn)
|
|
+ ret = -ERANGE;
|
|
+ }
|
|
+#endif
|
|
+ }
|
|
+ break;
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH_V2_32: {
|
|
+ struct privcmd_mmapbatch_v2 *p;
|
|
+ struct privcmd_mmapbatch_v2_32 *p32;
|
|
+ struct privcmd_mmapbatch_v2_32 n32;
|
|
+#ifdef xen_pfn32_t
|
|
+ xen_pfn_t *__user arr;
|
|
+ const xen_pfn32_t *__user arr32;
|
|
+ unsigned int i;
|
|
+#endif
|
|
+
|
|
+ p32 = compat_ptr(arg);
|
|
+ p = compat_alloc_user_space(sizeof(*p));
|
|
+ if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
+ put_user(n32.num, &p->num) ||
|
|
+ put_user(n32.dom, &p->dom) ||
|
|
+ put_user(n32.addr, &p->addr) ||
|
|
+ put_user(compat_ptr(n32.err), &p->err))
|
|
+ return -EFAULT;
|
|
+#ifdef xen_pfn32_t
|
|
+ arr = compat_alloc_user_space(n32.num * sizeof(*arr)
|
|
+ + sizeof(*p));
|
|
+ arr32 = compat_ptr(n32.arr);
|
|
+ for (i = 0; i < n32.num; ++i) {
|
|
+ xen_pfn32_t mfn;
|
|
+
|
|
+ if (get_user(mfn, arr32 + i) || put_user(mfn, arr + i))
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ if (put_user(arr, &p->arr))
|
|
+ return -EFAULT;
|
|
+#else
|
|
+ if (put_user(compat_ptr(n32.arr), &p->arr))
|
|
+ return -EFAULT;
|
|
+#endif
|
|
+
|
|
+ ret = sys_ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH_V2, (unsigned long)p);
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EINVAL;
|
|
+ break;
|
|
+ }
|
|
+ return ret;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/privcmd/privcmd.c 2010-01-27 14:01:48.000000000 +0100
|
|
@@ -0,0 +1,491 @@
|
|
+/******************************************************************************
|
|
+ * privcmd.c
|
|
+ *
|
|
+ * Interface to privileged domain-0 commands.
|
|
+ *
|
|
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mman.h>
|
|
+#include <linux/swap.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/pagemap.h>
|
|
+#include <linux/seq_file.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/tlb.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/public/privcmd.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/xen_proc.h>
|
|
+#include <xen/features.h>
|
|
+
|
|
+static struct proc_dir_entry *privcmd_intf;
|
|
+static struct proc_dir_entry *capabilities_intf;
|
|
+
|
|
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
+static int enforce_singleshot_mapping_fn(pte_t *pte, struct page *pmd_page,
|
|
+ unsigned long addr, void *data)
|
|
+{
|
|
+ return pte_none(*pte) ? 0 : -EBUSY;
|
|
+}
|
|
+
|
|
+static inline int enforce_singleshot_mapping(struct vm_area_struct *vma,
|
|
+ unsigned long addr,
|
|
+ unsigned long npages)
|
|
+{
|
|
+ return apply_to_page_range(vma->vm_mm, addr, npages << PAGE_SHIFT,
|
|
+ enforce_singleshot_mapping_fn, NULL) == 0;
|
|
+}
|
|
+#else
|
|
+#define enforce_singleshot_mapping(vma, addr, npages) \
|
|
+ privcmd_enforce_singleshot_mapping(vma)
|
|
+#endif
|
|
+
|
|
+static long privcmd_ioctl(struct file *file,
|
|
+ unsigned int cmd, unsigned long data)
|
|
+{
|
|
+ long ret;
|
|
+ void __user *udata = (void __user *) data;
|
|
+ unsigned long i, addr, nr, nr_pages;
|
|
+ int paged_out;
|
|
+ struct mm_struct *mm = current->mm;
|
|
+ struct vm_area_struct *vma;
|
|
+ LIST_HEAD(pagelist);
|
|
+ struct list_head *l, *l2;
|
|
+
|
|
+ switch (cmd) {
|
|
+ case IOCTL_PRIVCMD_HYPERCALL: {
|
|
+ privcmd_hypercall_t hypercall;
|
|
+
|
|
+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ ret = -ENOSYS;
|
|
+#if defined(__i386__)
|
|
+ if (hypercall.op >= (PAGE_SIZE >> 5))
|
|
+ break;
|
|
+ __asm__ __volatile__ (
|
|
+ "pushl %%ebx; pushl %%ecx; pushl %%edx; "
|
|
+ "pushl %%esi; pushl %%edi; "
|
|
+ "movl 8(%%eax),%%ebx ;"
|
|
+ "movl 16(%%eax),%%ecx ;"
|
|
+ "movl 24(%%eax),%%edx ;"
|
|
+ "movl 32(%%eax),%%esi ;"
|
|
+ "movl 40(%%eax),%%edi ;"
|
|
+ "movl (%%eax),%%eax ;"
|
|
+ "shll $5,%%eax ;"
|
|
+ "addl $hypercall_page,%%eax ;"
|
|
+ "call *%%eax ;"
|
|
+ "popl %%edi; popl %%esi; popl %%edx; "
|
|
+ "popl %%ecx; popl %%ebx"
|
|
+ : "=a" (ret) : "0" (&hypercall) : "memory" );
|
|
+#elif defined (__x86_64__)
|
|
+ if (hypercall.op < (PAGE_SIZE >> 5)) {
|
|
+ long ign1, ign2, ign3;
|
|
+ __asm__ __volatile__ (
|
|
+ "movq %8,%%r10; movq %9,%%r8;"
|
|
+ "shll $5,%%eax ;"
|
|
+ "addq $hypercall_page,%%rax ;"
|
|
+ "call *%%rax"
|
|
+ : "=a" (ret), "=D" (ign1),
|
|
+ "=S" (ign2), "=d" (ign3)
|
|
+ : "0" ((unsigned int)hypercall.op),
|
|
+ "1" (hypercall.arg[0]),
|
|
+ "2" (hypercall.arg[1]),
|
|
+ "3" (hypercall.arg[2]),
|
|
+ "g" (hypercall.arg[3]),
|
|
+ "g" (hypercall.arg[4])
|
|
+ : "r8", "r10", "memory" );
|
|
+ }
|
|
+#else
|
|
+ ret = privcmd_hypercall(&hypercall);
|
|
+#endif
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case IOCTL_PRIVCMD_MMAP: {
|
|
+#define MMAP_NR_PER_PAGE \
|
|
+ (unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*msg))
|
|
+ privcmd_mmap_t mmapcmd;
|
|
+ privcmd_mmap_entry_t *msg;
|
|
+ privcmd_mmap_entry_t __user *p;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return -EPERM;
|
|
+
|
|
+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ if (mmapcmd.num <= 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ p = mmapcmd.entry;
|
|
+ for (i = 0; i < mmapcmd.num;) {
|
|
+ nr = min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+ l = (struct list_head *) __get_free_page(GFP_KERNEL);
|
|
+ if (l == NULL)
|
|
+ goto mmap_out;
|
|
+
|
|
+ INIT_LIST_HEAD(l);
|
|
+ list_add_tail(l, &pagelist);
|
|
+ msg = (privcmd_mmap_entry_t*)(l + 1);
|
|
+
|
|
+ ret = -EFAULT;
|
|
+ if (copy_from_user(msg, p, nr*sizeof(*msg)))
|
|
+ goto mmap_out;
|
|
+ i += nr;
|
|
+ p += nr;
|
|
+ }
|
|
+
|
|
+ l = pagelist.next;
|
|
+ msg = (privcmd_mmap_entry_t*)(l + 1);
|
|
+
|
|
+ down_write(&mm->mmap_sem);
|
|
+
|
|
+ vma = find_vma(mm, msg->va);
|
|
+ ret = -EINVAL;
|
|
+ if (!vma || (msg->va != vma->vm_start))
|
|
+ goto mmap_out;
|
|
+
|
|
+ addr = vma->vm_start;
|
|
+
|
|
+ i = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
|
|
+
|
|
+ msg = (privcmd_mmap_entry_t*)(l + 1);
|
|
+ while (i<nr) {
|
|
+
|
|
+ /* Do not allow range to wrap the address space. */
|
|
+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
|
|
+ (((unsigned long)msg->npages << PAGE_SHIFT) >= -addr))
|
|
+ goto mmap_out;
|
|
+
|
|
+ /* Range chunks must be contiguous in va space. */
|
|
+ if ((msg->va != addr) ||
|
|
+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
|
|
+ goto mmap_out;
|
|
+
|
|
+ addr += msg->npages << PAGE_SHIFT;
|
|
+ msg++;
|
|
+ i++;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!enforce_singleshot_mapping(vma, vma->vm_start,
|
|
+ (addr - vma->vm_start) >> PAGE_SHIFT))
|
|
+ goto mmap_out;
|
|
+
|
|
+ addr = vma->vm_start;
|
|
+ i = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = i + min(mmapcmd.num - i, MMAP_NR_PER_PAGE);
|
|
+
|
|
+ msg = (privcmd_mmap_entry_t*)(l + 1);
|
|
+ while (i < nr) {
|
|
+ if ((ret = direct_remap_pfn_range(
|
|
+ vma,
|
|
+ msg->va & PAGE_MASK,
|
|
+ msg->mfn,
|
|
+ msg->npages << PAGE_SHIFT,
|
|
+ vma->vm_page_prot,
|
|
+ mmapcmd.dom)) < 0)
|
|
+ goto mmap_out;
|
|
+
|
|
+ addr += msg->npages << PAGE_SHIFT;
|
|
+ msg++;
|
|
+ i++;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ret = 0;
|
|
+
|
|
+ mmap_out:
|
|
+ up_write(&mm->mmap_sem);
|
|
+ list_for_each_safe(l,l2,&pagelist)
|
|
+ free_page((unsigned long)l);
|
|
+ }
|
|
+#undef MMAP_NR_PER_PAGE
|
|
+ break;
|
|
+
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH: {
|
|
+#define MMAPBATCH_NR_PER_PAGE \
|
|
+ (unsigned long)((PAGE_SIZE - sizeof(*l)) / sizeof(*mfn))
|
|
+ privcmd_mmapbatch_t m;
|
|
+ xen_pfn_t __user *p;
|
|
+ xen_pfn_t *mfn;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return -EPERM;
|
|
+
|
|
+ if (copy_from_user(&m, udata, sizeof(m)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ nr_pages = m.num;
|
|
+ addr = m.addr;
|
|
+ if (m.num <= 0 || nr_pages > (LONG_MAX >> PAGE_SHIFT) ||
|
|
+ addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT))
|
|
+ return -EINVAL;
|
|
+
|
|
+ p = m.arr;
|
|
+ for (i=0; i<nr_pages; ) {
|
|
+ nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+ l = (struct list_head *)__get_free_page(GFP_KERNEL);
|
|
+ if (l == NULL)
|
|
+ goto mmapbatch_out;
|
|
+
|
|
+ INIT_LIST_HEAD(l);
|
|
+ list_add_tail(l, &pagelist);
|
|
+
|
|
+ mfn = (unsigned long*)(l + 1);
|
|
+ ret = -EFAULT;
|
|
+ if (copy_from_user(mfn, p, nr*sizeof(*mfn)))
|
|
+ goto mmapbatch_out;
|
|
+
|
|
+ i += nr; p+= nr;
|
|
+ }
|
|
+
|
|
+ down_write(&mm->mmap_sem);
|
|
+
|
|
+ vma = find_vma(mm, addr);
|
|
+ ret = -EINVAL;
|
|
+ if (!vma ||
|
|
+ addr < vma->vm_start ||
|
|
+ addr + (nr_pages << PAGE_SHIFT) > vma->vm_end ||
|
|
+ !enforce_singleshot_mapping(vma, addr, nr_pages)) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+ goto mmapbatch_out;
|
|
+ }
|
|
+
|
|
+ i = 0;
|
|
+ ret = 0;
|
|
+ paged_out = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+ mfn = (unsigned long *)(l + 1);
|
|
+
|
|
+ while (i<nr) {
|
|
+ int rc;
|
|
+
|
|
+ rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
|
|
+ *mfn, PAGE_SIZE,
|
|
+ vma->vm_page_prot, m.dom);
|
|
+ if(rc < 0) {
|
|
+ if (rc == -ENOENT)
|
|
+ {
|
|
+ *mfn |= 0x80000000U;
|
|
+ paged_out = 1;
|
|
+ }
|
|
+ else
|
|
+ *mfn |= 0xf0000000U;
|
|
+ ret++;
|
|
+ }
|
|
+ mfn++; i++; addr += PAGE_SIZE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ up_write(&mm->mmap_sem);
|
|
+ if (ret > 0) {
|
|
+ p = m.arr;
|
|
+ i = 0;
|
|
+ if (paged_out)
|
|
+ ret = -ENOENT;
|
|
+ else
|
|
+ ret = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+ mfn = (unsigned long *)(l + 1);
|
|
+ if (copy_to_user(p, mfn, nr*sizeof(*mfn)))
|
|
+ ret = -EFAULT;
|
|
+ i += nr; p += nr;
|
|
+ }
|
|
+ }
|
|
+ mmapbatch_out:
|
|
+ list_for_each_safe(l,l2,&pagelist)
|
|
+ free_page((unsigned long)l);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH_V2: {
|
|
+ privcmd_mmapbatch_v2_t m;
|
|
+ const xen_pfn_t __user *p;
|
|
+ xen_pfn_t *mfn;
|
|
+ int *err;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return -EPERM;
|
|
+
|
|
+ if (copy_from_user(&m, udata, sizeof(m)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ nr_pages = m.num;
|
|
+ addr = m.addr;
|
|
+ if (m.num <= 0 || nr_pages > (ULONG_MAX >> PAGE_SHIFT) ||
|
|
+ addr != m.addr || nr_pages > (-addr >> PAGE_SHIFT))
|
|
+ return -EINVAL;
|
|
+
|
|
+ p = m.arr;
|
|
+ for (i = 0; i < nr_pages; i += nr, p += nr) {
|
|
+ nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+
|
|
+ ret = -ENOMEM;
|
|
+ l = (struct list_head *)__get_free_page(GFP_KERNEL);
|
|
+ if (l == NULL)
|
|
+ goto mmapbatch_v2_out;
|
|
+
|
|
+ INIT_LIST_HEAD(l);
|
|
+ list_add_tail(l, &pagelist);
|
|
+
|
|
+ mfn = (void *)(l + 1);
|
|
+ ret = -EFAULT;
|
|
+ if (copy_from_user(mfn, p, nr * sizeof(*mfn)))
|
|
+ goto mmapbatch_v2_out;
|
|
+ }
|
|
+
|
|
+ down_write(&mm->mmap_sem);
|
|
+
|
|
+ vma = find_vma(mm, addr);
|
|
+ ret = -EINVAL;
|
|
+ if (!vma ||
|
|
+ addr < vma->vm_start ||
|
|
+ addr + (nr_pages << PAGE_SHIFT) > vma->vm_end ||
|
|
+ !enforce_singleshot_mapping(vma, addr, nr_pages)) {
|
|
+ up_write(&mm->mmap_sem);
|
|
+ goto mmapbatch_v2_out;
|
|
+ }
|
|
+
|
|
+ i = 0;
|
|
+ ret = 0;
|
|
+ paged_out = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = i + min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+ mfn = (void *)(l + 1);
|
|
+ err = (void *)(l + 1);
|
|
+ BUILD_BUG_ON(sizeof(*err) > sizeof(*mfn));
|
|
+
|
|
+ while (i < nr) {
|
|
+ int rc;
|
|
+
|
|
+ rc = direct_remap_pfn_range(vma, addr & PAGE_MASK,
|
|
+ *mfn, PAGE_SIZE,
|
|
+ vma->vm_page_prot, m.dom);
|
|
+ if (rc < 0) {
|
|
+ if (rc == -ENOENT)
|
|
+ paged_out = 1;
|
|
+ ret++;
|
|
+ } else
|
|
+ BUG_ON(rc > 0);
|
|
+ *err++ = rc;
|
|
+ mfn++; i++; addr += PAGE_SIZE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ up_write(&mm->mmap_sem);
|
|
+
|
|
+ if (ret > 0) {
|
|
+ int __user *p = m.err;
|
|
+
|
|
+ ret = paged_out ? -ENOENT : 0;
|
|
+ i = 0;
|
|
+ list_for_each(l, &pagelist) {
|
|
+ nr = min(nr_pages - i, MMAPBATCH_NR_PER_PAGE);
|
|
+ err = (void *)(l + 1);
|
|
+ if (copy_to_user(p, err, nr * sizeof(*err)))
|
|
+ ret = -EFAULT;
|
|
+ i += nr; p += nr;
|
|
+ }
|
|
+ } else if (clear_user(m.err, nr_pages * sizeof(*m.err)))
|
|
+ ret = -EFAULT;
|
|
+
|
|
+ mmapbatch_v2_out:
|
|
+ list_for_each_safe(l, l2, &pagelist)
|
|
+ free_page((unsigned long)l);
|
|
+#undef MMAPBATCH_NR_PER_PAGE
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ ret = -EINVAL;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
|
|
+ unsigned long address,
|
|
+ int *type)
|
|
+{
|
|
+ return NOPAGE_SIGBUS;
|
|
+}
|
|
+
|
|
+static struct vm_operations_struct privcmd_vm_ops = {
|
|
+ .nopage = privcmd_nopage
|
|
+};
|
|
+
|
|
+static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
|
|
+{
|
|
+ /* Unsupported for auto-translate guests. */
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ return -ENOSYS;
|
|
+
|
|
+ /* DONTCOPY is essential for Xen as copy_page_range is broken. */
|
|
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
|
|
+ vma->vm_ops = &privcmd_vm_ops;
|
|
+ vma->vm_private_data = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static const struct file_operations privcmd_file_ops = {
|
|
+ .unlocked_ioctl = privcmd_ioctl,
|
|
+ .mmap = privcmd_mmap,
|
|
+};
|
|
+
|
|
+static int capabilities_read(char *page, char **start, off_t off,
|
|
+ int count, int *eof, void *data)
|
|
+{
|
|
+ int len = 0;
|
|
+ *page = 0;
|
|
+
|
|
+ if (is_initial_xendomain())
|
|
+ len = sprintf( page, "control_d\n" );
|
|
+
|
|
+ *eof = 1;
|
|
+ return len;
|
|
+}
|
|
+
|
|
+static int __init privcmd_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ privcmd_intf = create_xen_proc_entry("privcmd", 0400);
|
|
+ if (privcmd_intf != NULL)
|
|
+ privcmd_intf->proc_fops = &privcmd_file_ops;
|
|
+
|
|
+ capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
|
|
+ if (capabilities_intf != NULL)
|
|
+ capabilities_intf->read_proc = capabilities_read;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+__initcall(privcmd_init);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/Makefile 2008-07-21 11:00:33.000000000 +0200
|
|
@@ -0,0 +1,4 @@
|
|
+obj-$(CONFIG_XEN_SCSI_BACKEND) := xen-scsibk.o
|
|
+
|
|
+xen-scsibk-y := interface.o scsiback.o xenbus.o translate.o emulate.o
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/common.h 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,186 @@
|
|
+/*
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * Based on the blkback driver code.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __SCSIIF__BACKEND__COMMON_H__
|
|
+#define __SCSIIF__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_cmnd.h>
|
|
+#include <scsi/scsi_host.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+#include <scsi/scsi_dbg.h>
|
|
+#include <scsi/scsi_eh.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/delay.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/io/ring.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/interface/io/vscsiif.h>
|
|
+
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+struct ids_tuple {
|
|
+ unsigned int hst; /* host */
|
|
+ unsigned int chn; /* channel */
|
|
+ unsigned int tgt; /* target */
|
|
+ unsigned int lun; /* LUN */
|
|
+};
|
|
+
|
|
+struct v2p_entry {
|
|
+ struct ids_tuple v; /* translate from */
|
|
+ struct scsi_device *sdev; /* translate to */
|
|
+ struct list_head l;
|
|
+};
|
|
+
|
|
+struct vscsibk_info {
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ domid_t domid;
|
|
+ unsigned int evtchn;
|
|
+ unsigned int irq;
|
|
+
|
|
+ int feature;
|
|
+
|
|
+ struct vscsiif_back_ring ring;
|
|
+ struct vm_struct *ring_area;
|
|
+ grant_handle_t shmem_handle;
|
|
+ grant_ref_t shmem_ref;
|
|
+
|
|
+ spinlock_t ring_lock;
|
|
+ atomic_t nr_unreplied_reqs;
|
|
+
|
|
+ spinlock_t v2p_lock;
|
|
+ struct list_head v2p_entry_lists;
|
|
+
|
|
+ struct task_struct *kthread;
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+ wait_queue_head_t wq;
|
|
+ unsigned int waiting_reqs;
|
|
+ struct page **mmap_pages;
|
|
+
|
|
+};
|
|
+
|
|
+typedef struct {
|
|
+ unsigned char act;
|
|
+ struct vscsibk_info *info;
|
|
+ struct scsi_device *sdev;
|
|
+
|
|
+ uint16_t rqid;
|
|
+
|
|
+ uint16_t v_chn, v_tgt;
|
|
+
|
|
+ uint8_t nr_segments;
|
|
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
|
|
+ uint8_t cmd_len;
|
|
+
|
|
+ uint8_t sc_data_direction;
|
|
+ uint16_t timeout_per_command;
|
|
+
|
|
+ uint32_t request_bufflen;
|
|
+ struct scatterlist *sgl;
|
|
+ grant_ref_t gref[VSCSIIF_SG_TABLESIZE];
|
|
+
|
|
+ int32_t rslt;
|
|
+ uint32_t resid;
|
|
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
|
|
+
|
|
+ struct list_head free_list;
|
|
+} pending_req_t;
|
|
+
|
|
+
|
|
+
|
|
+#define scsiback_get(_b) (atomic_inc(&(_b)->nr_unreplied_reqs))
|
|
+#define scsiback_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->nr_unreplied_reqs)) \
|
|
+ wake_up(&(_b)->waiting_to_free);\
|
|
+ } while (0)
|
|
+
|
|
+#define VSCSIIF_TIMEOUT (900*HZ)
|
|
+
|
|
+#define VSCSI_TYPE_HOST 1
|
|
+
|
|
+irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
|
|
+int scsiback_init_sring(struct vscsibk_info *info,
|
|
+ unsigned long ring_ref, unsigned int evtchn);
|
|
+int scsiback_schedule(void *data);
|
|
+
|
|
+
|
|
+struct vscsibk_info *vscsibk_info_alloc(domid_t domid);
|
|
+void scsiback_free(struct vscsibk_info *info);
|
|
+void scsiback_disconnect(struct vscsibk_info *info);
|
|
+int __init scsiback_interface_init(void);
|
|
+void scsiback_interface_exit(void);
|
|
+int scsiback_xenbus_init(void);
|
|
+void scsiback_xenbus_unregister(void);
|
|
+
|
|
+void scsiback_init_translation_table(struct vscsibk_info *info);
|
|
+
|
|
+int scsiback_add_translation_entry(struct vscsibk_info *info,
|
|
+ struct scsi_device *sdev, struct ids_tuple *v);
|
|
+
|
|
+int scsiback_del_translation_entry(struct vscsibk_info *info,
|
|
+ struct ids_tuple *v);
|
|
+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
|
|
+ struct ids_tuple *v);
|
|
+void scsiback_release_translation_entry(struct vscsibk_info *info);
|
|
+
|
|
+
|
|
+void scsiback_cmd_exec(pending_req_t *pending_req);
|
|
+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
|
|
+ uint32_t resid, pending_req_t *pending_req);
|
|
+void scsiback_fast_flush_area(pending_req_t *req);
|
|
+
|
|
+void scsiback_rsp_emulation(pending_req_t *pending_req);
|
|
+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req);
|
|
+void scsiback_emulation_init(void);
|
|
+
|
|
+
|
|
+#endif /* __SCSIIF__BACKEND__COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/emulate.c 2011-02-02 12:19:11.000000000 +0100
|
|
@@ -0,0 +1,484 @@
|
|
+/*
|
|
+ * Xen SCSI backend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+/*
|
|
+* Patched to support >2TB drives + allow tape & autoloader operations
|
|
+* 2010, Samuel Kvasnica, IMS Nanofabrication AG
|
|
+*/
|
|
+
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_cmnd.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+#include "common.h"
|
|
+
|
|
+/* Following SCSI commands are not defined in scsi/scsi.h */
|
|
+#define EXTENDED_COPY 0x83 /* EXTENDED COPY command */
|
|
+#define REPORT_ALIASES 0xa3 /* REPORT ALIASES command */
|
|
+#define CHANGE_ALIASES 0xa4 /* CHANGE ALIASES command */
|
|
+#define SET_PRIORITY 0xa4 /* SET PRIORITY command */
|
|
+
|
|
+
|
|
+/*
|
|
+ The bitmap in order to control emulation.
|
|
+ (Bit 3 to 7 are reserved for future use.)
|
|
+*/
|
|
+#define VSCSIIF_NEED_CMD_EXEC 0x01 /* If this bit is set, cmd exec */
|
|
+ /* is required. */
|
|
+#define VSCSIIF_NEED_EMULATE_REQBUF 0x02 /* If this bit is set, need */
|
|
+ /* emulation reqest buff before */
|
|
+ /* cmd exec. */
|
|
+#define VSCSIIF_NEED_EMULATE_RSPBUF 0x04 /* If this bit is set, need */
|
|
+ /* emulation resp buff after */
|
|
+ /* cmd exec. */
|
|
+
|
|
+/* Additional Sense Code (ASC) used */
|
|
+#define NO_ADDITIONAL_SENSE 0x0
|
|
+#define LOGICAL_UNIT_NOT_READY 0x4
|
|
+#define UNRECOVERED_READ_ERR 0x11
|
|
+#define PARAMETER_LIST_LENGTH_ERR 0x1a
|
|
+#define INVALID_OPCODE 0x20
|
|
+#define ADDR_OUT_OF_RANGE 0x21
|
|
+#define INVALID_FIELD_IN_CDB 0x24
|
|
+#define INVALID_FIELD_IN_PARAM_LIST 0x26
|
|
+#define POWERON_RESET 0x29
|
|
+#define SAVING_PARAMS_UNSUP 0x39
|
|
+#define THRESHOLD_EXCEEDED 0x5d
|
|
+#define LOW_POWER_COND_ON 0x5e
|
|
+
|
|
+
|
|
+
|
|
+/* Number os SCSI op_code */
|
|
+#define VSCSI_MAX_SCSI_OP_CODE 256
|
|
+static unsigned char bitmap[VSCSI_MAX_SCSI_OP_CODE];
|
|
+
|
|
+#define NO_EMULATE(cmd) \
|
|
+ bitmap[cmd] = VSCSIIF_NEED_CMD_EXEC; \
|
|
+ pre_function[cmd] = NULL; \
|
|
+ post_function[cmd] = NULL
|
|
+
|
|
+
|
|
+
|
|
+/*
|
|
+ Emulation routines for each SCSI op_code.
|
|
+*/
|
|
+static void (*pre_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
|
|
+static void (*post_function[VSCSI_MAX_SCSI_OP_CODE])(pending_req_t *, void *);
|
|
+
|
|
+
|
|
+static const int check_condition_result =
|
|
+ (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
|
|
+
|
|
+static void scsiback_mk_sense_buffer(uint8_t *data, uint8_t key,
|
|
+ uint8_t asc, uint8_t asq)
|
|
+{
|
|
+ data[0] = 0x70; /* fixed, current */
|
|
+ data[2] = key;
|
|
+ data[7] = 0xa; /* implies 18 byte sense buffer */
|
|
+ data[12] = asc;
|
|
+ data[13] = asq;
|
|
+}
|
|
+
|
|
+static void resp_not_supported_cmd(pending_req_t *pending_req, void *data)
|
|
+{
|
|
+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
|
|
+ INVALID_OPCODE, 0);
|
|
+ pending_req->resid = 0;
|
|
+ pending_req->rslt = check_condition_result;
|
|
+}
|
|
+
|
|
+
|
|
+static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg,
|
|
+ void *buf, unsigned int buflen)
|
|
+{
|
|
+ void *from = buf;
|
|
+ void *to;
|
|
+ unsigned int from_rest = buflen;
|
|
+ unsigned int to_capa;
|
|
+ unsigned int copy_size = 0;
|
|
+ unsigned int i;
|
|
+ unsigned long pfn;
|
|
+
|
|
+ for (i = 0; i < nr_sg; i++) {
|
|
+ if (sg->page == NULL) {
|
|
+ printk(KERN_WARNING "%s: inconsistent length field in "
|
|
+ "scatterlist\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ to_capa = sg->length;
|
|
+ copy_size = min_t(unsigned int, to_capa, from_rest);
|
|
+
|
|
+ pfn = page_to_pfn(sg->page);
|
|
+ to = pfn_to_kaddr(pfn) + (sg->offset);
|
|
+ memcpy(to, from, copy_size);
|
|
+
|
|
+ from_rest -= copy_size;
|
|
+ if (from_rest == 0) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ sg++;
|
|
+ from += copy_size;
|
|
+ }
|
|
+
|
|
+ printk(KERN_WARNING "%s: no space in scatterlist\n",
|
|
+ __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg,
|
|
+ void *buf, unsigned int buflen)
|
|
+{
|
|
+ void *from;
|
|
+ void *to = buf;
|
|
+ unsigned int from_rest;
|
|
+ unsigned int to_capa = buflen;
|
|
+ unsigned int copy_size;
|
|
+ unsigned int i;
|
|
+ unsigned long pfn;
|
|
+
|
|
+ for (i = 0; i < nr_sg; i++) {
|
|
+ if (sg->page == NULL) {
|
|
+ printk(KERN_WARNING "%s: inconsistent length field in "
|
|
+ "scatterlist\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ from_rest = sg->length;
|
|
+ if ((from_rest > 0) && (to_capa < from_rest)) {
|
|
+ printk(KERN_WARNING
|
|
+ "%s: no space in destination buffer\n",
|
|
+ __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ copy_size = from_rest;
|
|
+
|
|
+ pfn = page_to_pfn(sg->page);
|
|
+ from = pfn_to_kaddr(pfn) + (sg->offset);
|
|
+ memcpy(to, from, copy_size);
|
|
+
|
|
+ to_capa -= copy_size;
|
|
+
|
|
+ sg++;
|
|
+ to += copy_size;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __nr_luns_under_host(struct vscsibk_info *info)
|
|
+{
|
|
+ struct v2p_entry *entry;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ unsigned long flags;
|
|
+ int lun_cnt = 0;
|
|
+
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+ list_for_each_entry(entry, head, l) {
|
|
+ lun_cnt++;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+
|
|
+ return (lun_cnt);
|
|
+}
|
|
+
|
|
+
|
|
+/* REPORT LUNS Define*/
|
|
+#define VSCSI_REPORT_LUNS_HEADER 8
|
|
+#define VSCSI_REPORT_LUNS_RETRY 3
|
|
+
|
|
+/* quoted scsi_debug.c/resp_report_luns() */
|
|
+static void __report_luns(pending_req_t *pending_req, void *data)
|
|
+{
|
|
+ struct vscsibk_info *info = pending_req->info;
|
|
+ unsigned int channel = pending_req->v_chn;
|
|
+ unsigned int target = pending_req->v_tgt;
|
|
+ unsigned int nr_seg = pending_req->nr_segments;
|
|
+ unsigned char *cmd = (unsigned char *)pending_req->cmnd;
|
|
+
|
|
+ unsigned char *buff = NULL;
|
|
+ unsigned char alloc_len;
|
|
+ unsigned int alloc_luns = 0;
|
|
+ unsigned int req_bufflen = 0;
|
|
+ unsigned int actual_len = 0;
|
|
+ unsigned int retry_cnt = 0;
|
|
+ int select_report = (int)cmd[2];
|
|
+ int i, lun_cnt = 0, lun, upper, err = 0;
|
|
+
|
|
+ struct v2p_entry *entry;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ unsigned long flags;
|
|
+
|
|
+ struct scsi_lun *one_lun;
|
|
+
|
|
+ req_bufflen = cmd[9] + (cmd[8] << 8) + (cmd[7] << 16) + (cmd[6] << 24);
|
|
+ if ((req_bufflen < 4) || (select_report != 0))
|
|
+ goto fail;
|
|
+
|
|
+ alloc_luns = __nr_luns_under_host(info);
|
|
+ alloc_len = sizeof(struct scsi_lun) * alloc_luns
|
|
+ + VSCSI_REPORT_LUNS_HEADER;
|
|
+retry:
|
|
+ if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) {
|
|
+ printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ memset(buff, 0, alloc_len);
|
|
+
|
|
+ one_lun = (struct scsi_lun *) &buff[8];
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+ list_for_each_entry(entry, head, l) {
|
|
+ if ((entry->v.chn == channel) &&
|
|
+ (entry->v.tgt == target)) {
|
|
+
|
|
+ /* check overflow */
|
|
+ if (lun_cnt >= alloc_luns) {
|
|
+ spin_unlock_irqrestore(&info->v2p_lock,
|
|
+ flags);
|
|
+
|
|
+ if (retry_cnt < VSCSI_REPORT_LUNS_RETRY) {
|
|
+ retry_cnt++;
|
|
+ if (buff)
|
|
+ kfree(buff);
|
|
+ goto retry;
|
|
+ }
|
|
+
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ lun = entry->v.lun;
|
|
+ upper = (lun >> 8) & 0x3f;
|
|
+ if (upper)
|
|
+ one_lun[lun_cnt].scsi_lun[0] = upper;
|
|
+ one_lun[lun_cnt].scsi_lun[1] = lun & 0xff;
|
|
+ lun_cnt++;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+
|
|
+ buff[2] = ((sizeof(struct scsi_lun) * lun_cnt) >> 8) & 0xff;
|
|
+ buff[3] = (sizeof(struct scsi_lun) * lun_cnt) & 0xff;
|
|
+
|
|
+ actual_len = lun_cnt * sizeof(struct scsi_lun)
|
|
+ + VSCSI_REPORT_LUNS_HEADER;
|
|
+ req_bufflen = 0;
|
|
+ for (i = 0; i < nr_seg; i++)
|
|
+ req_bufflen += pending_req->sgl[i].length;
|
|
+
|
|
+ err = __copy_to_sg(pending_req->sgl, nr_seg, buff,
|
|
+ min(req_bufflen, actual_len));
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
|
|
+ pending_req->rslt = 0x00;
|
|
+ pending_req->resid = req_bufflen - min(req_bufflen, actual_len);
|
|
+
|
|
+ kfree(buff);
|
|
+ return;
|
|
+
|
|
+fail:
|
|
+ scsiback_mk_sense_buffer(pending_req->sense_buffer, ILLEGAL_REQUEST,
|
|
+ INVALID_FIELD_IN_CDB, 0);
|
|
+ pending_req->rslt = check_condition_result;
|
|
+ pending_req->resid = 0;
|
|
+ if (buff)
|
|
+ kfree(buff);
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+int __pre_do_emulation(pending_req_t *pending_req, void *data)
|
|
+{
|
|
+ uint8_t op_code = pending_req->cmnd[0];
|
|
+
|
|
+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_REQBUF) &&
|
|
+ pre_function[op_code] != NULL) {
|
|
+ pre_function[op_code](pending_req, data);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ 0: no need for native driver call, so should return immediately.
|
|
+ 1: non emulation or should call native driver
|
|
+ after modifing the request buffer.
|
|
+ */
|
|
+ return !!(bitmap[op_code] & VSCSIIF_NEED_CMD_EXEC);
|
|
+}
|
|
+
|
|
+void scsiback_rsp_emulation(pending_req_t *pending_req)
|
|
+{
|
|
+ uint8_t op_code = pending_req->cmnd[0];
|
|
+
|
|
+ if ((bitmap[op_code] & VSCSIIF_NEED_EMULATE_RSPBUF) &&
|
|
+ post_function[op_code] != NULL) {
|
|
+ post_function[op_code](pending_req, NULL);
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+void scsiback_req_emulation_or_cmdexec(pending_req_t *pending_req)
|
|
+{
|
|
+ if (__pre_do_emulation(pending_req, NULL)) {
|
|
+ scsiback_cmd_exec(pending_req);
|
|
+ }
|
|
+ else {
|
|
+ scsiback_fast_flush_area(pending_req);
|
|
+ scsiback_do_resp_with_sense(pending_req->sense_buffer,
|
|
+ pending_req->rslt, pending_req->resid, pending_req);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ Following are not customizable functions.
|
|
+*/
|
|
+void scsiback_emulation_init(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* Initialize to default state */
|
|
+ for (i = 0; i < VSCSI_MAX_SCSI_OP_CODE; i++) {
|
|
+ bitmap[i] = (VSCSIIF_NEED_EMULATE_REQBUF |
|
|
+ VSCSIIF_NEED_EMULATE_RSPBUF);
|
|
+ pre_function[i] = resp_not_supported_cmd;
|
|
+ post_function[i] = NULL;
|
|
+ /* means,
|
|
+ - no need for pre-emulation
|
|
+ - no need for post-emulation
|
|
+ - call native driver
|
|
+ */
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ Register appropriate functions below as you need.
|
|
+ (See scsi/scsi.h for definition of SCSI op_code.)
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ Following commands do not require emulation.
|
|
+ */
|
|
+ NO_EMULATE(TEST_UNIT_READY); /*0x00*/ /* sd,st */
|
|
+ NO_EMULATE(REZERO_UNIT); /*0x01*/ /* st */
|
|
+ NO_EMULATE(REQUEST_SENSE); /*0x03*/
|
|
+ NO_EMULATE(FORMAT_UNIT); /*0x04*/
|
|
+ NO_EMULATE(READ_BLOCK_LIMITS); /*0x05*/ /* st */
|
|
+ /*NO_EMULATE(REASSIGN_BLOCKS); *//*0x07*/
|
|
+ NO_EMULATE(INITIALIZE_ELEMENT_STATUS); /*0x07*/ /* ch */
|
|
+ NO_EMULATE(READ_6); /*0x08*/ /* sd,st */
|
|
+ NO_EMULATE(WRITE_6); /*0x0a*/ /* sd,st */
|
|
+ NO_EMULATE(SEEK_6); /*0x0b*/
|
|
+ /*NO_EMULATE(READ_REVERSE); *//*0x0f*/
|
|
+ NO_EMULATE(WRITE_FILEMARKS); /*0x10*/ /* st */
|
|
+ NO_EMULATE(SPACE); /*0x11*/ /* st */
|
|
+ NO_EMULATE(INQUIRY); /*0x12*/
|
|
+ /*NO_EMULATE(RECOVER_BUFFERED_DATA); *//*0x14*/
|
|
+ NO_EMULATE(MODE_SELECT); /*0x15*/ /* st */
|
|
+ /*NO_EMULATE(RESERVE); *//*0x16*/
|
|
+ /*NO_EMULATE(RELEASE); *//*0x17*/
|
|
+ /*NO_EMULATE(COPY); *//*0x18*/
|
|
+ NO_EMULATE(ERASE); /*0x19*/ /* st */
|
|
+ NO_EMULATE(MODE_SENSE); /*0x1a*/ /* st */
|
|
+ NO_EMULATE(START_STOP); /*0x1b*/ /* sd,st */
|
|
+ NO_EMULATE(RECEIVE_DIAGNOSTIC); /*0x1c*/
|
|
+ NO_EMULATE(SEND_DIAGNOSTIC); /*0x1d*/
|
|
+ NO_EMULATE(ALLOW_MEDIUM_REMOVAL); /*0x1e*/
|
|
+
|
|
+ /*NO_EMULATE(SET_WINDOW); *//*0x24*/
|
|
+ NO_EMULATE(READ_CAPACITY); /*0x25*/ /* sd */
|
|
+ NO_EMULATE(READ_10); /*0x28*/ /* sd */
|
|
+ NO_EMULATE(WRITE_10); /*0x2a*/ /* sd */
|
|
+ NO_EMULATE(SEEK_10); /*0x2b*/ /* st */
|
|
+ NO_EMULATE(POSITION_TO_ELEMENT); /*0x2b*/ /* ch */
|
|
+ /*NO_EMULATE(WRITE_VERIFY); *//*0x2e*/
|
|
+ /*NO_EMULATE(VERIFY); *//*0x2f*/
|
|
+ /*NO_EMULATE(SEARCH_HIGH); *//*0x30*/
|
|
+ /*NO_EMULATE(SEARCH_EQUAL); *//*0x31*/
|
|
+ /*NO_EMULATE(SEARCH_LOW); *//*0x32*/
|
|
+ NO_EMULATE(SET_LIMITS); /*0x33*/
|
|
+ NO_EMULATE(PRE_FETCH); /*0x34*/ /* st! */
|
|
+ NO_EMULATE(READ_POSITION); /*0x34*/ /* st */
|
|
+ NO_EMULATE(SYNCHRONIZE_CACHE); /*0x35*/ /* sd */
|
|
+ NO_EMULATE(LOCK_UNLOCK_CACHE); /*0x36*/
|
|
+ NO_EMULATE(READ_DEFECT_DATA); /*0x37*/
|
|
+ NO_EMULATE(MEDIUM_SCAN); /*0x38*/
|
|
+ /*NO_EMULATE(COMPARE); *//*0x39*/
|
|
+ /*NO_EMULATE(COPY_VERIFY); *//*0x3a*/
|
|
+ NO_EMULATE(WRITE_BUFFER); /*0x3b*/
|
|
+ NO_EMULATE(READ_BUFFER); /*0x3c*/ /* osst */
|
|
+ /*NO_EMULATE(UPDATE_BLOCK); *//*0x3d*/
|
|
+ /*NO_EMULATE(READ_LONG); *//*0x3e*/
|
|
+ /*NO_EMULATE(WRITE_LONG); *//*0x3f*/
|
|
+ /*NO_EMULATE(CHANGE_DEFINITION); *//*0x40*/
|
|
+ /*NO_EMULATE(WRITE_SAME); *//*0x41*/
|
|
+ NO_EMULATE(READ_TOC); /*0x43*/ /* sr */
|
|
+ NO_EMULATE(LOG_SELECT); /*0x4c*/
|
|
+ NO_EMULATE(LOG_SENSE); /*0x4d*/ /* st! */
|
|
+ /*NO_EMULATE(MODE_SELECT_10); *//*0x55*/
|
|
+ /*NO_EMULATE(RESERVE_10); *//*0x56*/
|
|
+ /*NO_EMULATE(RELEASE_10); *//*0x57*/
|
|
+ NO_EMULATE(MODE_SENSE_10); /*0x5a*/ /* scsi_lib */
|
|
+ /*NO_EMULATE(PERSISTENT_RESERVE_IN); *//*0x5e*/
|
|
+ /*NO_EMULATE(PERSISTENT_RESERVE_OUT); *//*0x5f*/
|
|
+ /* REPORT_LUNS *//*0xa0*//*Full emulaiton*/
|
|
+#ifdef MAINTENANCE_IN
|
|
+ NO_EMULATE(MAINTENANCE_IN); /*0xa3*/ /* IFT alua */
|
|
+ NO_EMULATE(MAINTENANCE_OUT); /*0xa4*/ /* IFT alua */
|
|
+#endif
|
|
+ NO_EMULATE(MOVE_MEDIUM); /*0xa5*/ /* ch */
|
|
+ NO_EMULATE(EXCHANGE_MEDIUM); /*0xa6*/ /* ch */
|
|
+ /*NO_EMULATE(READ_12); *//*0xa8*/
|
|
+ /*NO_EMULATE(WRITE_12); *//*0xaa*/
|
|
+ /*NO_EMULATE(WRITE_VERIFY_12); *//*0xae*/
|
|
+ /*NO_EMULATE(SEARCH_HIGH_12); *//*0xb0*/
|
|
+ /*NO_EMULATE(SEARCH_EQUAL_12); *//*0xb1*/
|
|
+ /*NO_EMULATE(SEARCH_LOW_12); *//*0xb2*/
|
|
+ NO_EMULATE(READ_ELEMENT_STATUS); /*0xb8*/ /* ch */
|
|
+ NO_EMULATE(SEND_VOLUME_TAG); /*0xb6*/ /* ch */
|
|
+ /*NO_EMULATE(WRITE_LONG_2); *//*0xea*/
|
|
+ NO_EMULATE(READ_16); /*0x88*/ /* sd >2TB */
|
|
+ NO_EMULATE(WRITE_16); /*0x8a*/ /* sd >2TB */
|
|
+ NO_EMULATE(VERIFY_16); /*0x8f*/
|
|
+ NO_EMULATE(SERVICE_ACTION_IN); /*0x9e*/ /* sd >2TB */
|
|
+
|
|
+/* st: QFA_REQUEST_BLOCK, QFA_SEEK_BLOCK might be needed ? */
|
|
+ /*
|
|
+ Following commands require emulation.
|
|
+ */
|
|
+ pre_function[REPORT_LUNS] = __report_luns;
|
|
+ bitmap[REPORT_LUNS] = (VSCSIIF_NEED_EMULATE_REQBUF |
|
|
+ VSCSIIF_NEED_EMULATE_RSPBUF);
|
|
+
|
|
+ return;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/interface.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,182 @@
|
|
+/*
|
|
+ * interface management.
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * Based on the blkback driver code.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_host.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+#include "common.h"
|
|
+
|
|
+#include <xen/evtchn.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+
|
|
+static kmem_cache_t *scsiback_cachep;
|
|
+
|
|
+struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
|
|
+{
|
|
+ struct vscsibk_info *info;
|
|
+
|
|
+ info = kmem_cache_alloc(scsiback_cachep, GFP_KERNEL);
|
|
+ if (!info)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ memset(info, 0, sizeof(*info));
|
|
+ info->domid = domid;
|
|
+ spin_lock_init(&info->ring_lock);
|
|
+ atomic_set(&info->nr_unreplied_reqs, 0);
|
|
+ init_waitqueue_head(&info->wq);
|
|
+ init_waitqueue_head(&info->waiting_to_free);
|
|
+
|
|
+ return info;
|
|
+}
|
|
+
|
|
+static int map_frontend_page( struct vscsibk_info *info,
|
|
+ unsigned long ring_ref)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)info->ring_area->addr,
|
|
+ GNTMAP_host_map, ring_ref,
|
|
+ info->domid);
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ printk(KERN_ERR "scsiback: Grant table operation failure %d!\n", (int)op.status);
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ info->shmem_ref = ring_ref;
|
|
+ info->shmem_handle = op.handle;
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_page(struct vscsibk_info *info)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+ int err;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)info->ring_area->addr,
|
|
+ GNTMAP_host_map, info->shmem_handle);
|
|
+
|
|
+ err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
|
|
+ BUG_ON(err);
|
|
+
|
|
+}
|
|
+
|
|
+int scsiback_init_sring(struct vscsibk_info *info,
|
|
+ unsigned long ring_ref, unsigned int evtchn)
|
|
+{
|
|
+ struct vscsiif_sring *sring;
|
|
+ int err;
|
|
+
|
|
+ if (info->irq) {
|
|
+ printk(KERN_ERR "scsiback: Already connected through?\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ info->ring_area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (!info)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = map_frontend_page(info, ring_ref);
|
|
+ if (err)
|
|
+ goto free_vm;
|
|
+
|
|
+ sring = (struct vscsiif_sring *) info->ring_area->addr;
|
|
+ BACK_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ info->domid, evtchn,
|
|
+ scsiback_intr, 0, "vscsiif-backend", info);
|
|
+
|
|
+ if (err < 0)
|
|
+ goto unmap_page;
|
|
+
|
|
+ info->irq = err;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+unmap_page:
|
|
+ unmap_frontend_page(info);
|
|
+free_vm:
|
|
+ free_vm_area(info->ring_area);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void scsiback_disconnect(struct vscsibk_info *info)
|
|
+{
|
|
+ if (info->kthread) {
|
|
+ kthread_stop(info->kthread);
|
|
+ info->kthread = NULL;
|
|
+ }
|
|
+
|
|
+ wait_event(info->waiting_to_free,
|
|
+ atomic_read(&info->nr_unreplied_reqs) == 0);
|
|
+
|
|
+ if (info->irq) {
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = 0;
|
|
+ }
|
|
+
|
|
+ if (info->ring.sring) {
|
|
+ unmap_frontend_page(info);
|
|
+ free_vm_area(info->ring_area);
|
|
+ info->ring.sring = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void scsiback_free(struct vscsibk_info *info)
|
|
+{
|
|
+ kmem_cache_free(scsiback_cachep, info);
|
|
+}
|
|
+
|
|
+int __init scsiback_interface_init(void)
|
|
+{
|
|
+ scsiback_cachep = kmem_cache_create("vscsiif_cache",
|
|
+ sizeof(struct vscsibk_info), 0, 0, NULL, NULL);
|
|
+ if (!scsiback_cachep) {
|
|
+ printk(KERN_ERR "scsiback: can't init scsi cache\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void scsiback_interface_exit(void)
|
|
+{
|
|
+ kmem_cache_destroy(scsiback_cachep);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/scsiback.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,747 @@
|
|
+/*
|
|
+ * Xen SCSI backend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * Based on the blkback driver code.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/delay.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_cmnd.h>
|
|
+#include <scsi/scsi_host.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+#include <scsi/scsi_dbg.h>
|
|
+#include <scsi/scsi_eh.h>
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+
|
|
+struct list_head pending_free;
|
|
+DEFINE_SPINLOCK(pending_free_lock);
|
|
+DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
|
|
+
|
|
+int vscsiif_reqs = VSCSIIF_BACK_MAX_PENDING_REQS;
|
|
+module_param_named(reqs, vscsiif_reqs, int, 0);
|
|
+MODULE_PARM_DESC(reqs, "Number of scsiback requests to allocate");
|
|
+
|
|
+static unsigned int log_print_stat = 0;
|
|
+module_param(log_print_stat, int, 0644);
|
|
+
|
|
+#define SCSIBACK_INVALID_HANDLE (~0)
|
|
+
|
|
+static pending_req_t *pending_reqs;
|
|
+static struct page **pending_pages;
|
|
+static grant_handle_t *pending_grant_handles;
|
|
+
|
|
+static int vaddr_pagenr(pending_req_t *req, int seg)
|
|
+{
|
|
+ return (req - pending_reqs) * VSCSIIF_SG_TABLESIZE + seg;
|
|
+}
|
|
+
|
|
+static unsigned long vaddr(pending_req_t *req, int seg)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+#define pending_handle(_req, _seg) \
|
|
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
|
+
|
|
+
|
|
+void scsiback_fast_flush_area(pending_req_t *req)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref unmap[VSCSIIF_SG_TABLESIZE];
|
|
+ unsigned int i, invcount = 0;
|
|
+ grant_handle_t handle;
|
|
+ int err;
|
|
+
|
|
+ if (req->nr_segments) {
|
|
+ for (i = 0; i < req->nr_segments; i++) {
|
|
+ handle = pending_handle(req, i);
|
|
+ if (handle == SCSIBACK_INVALID_HANDLE)
|
|
+ continue;
|
|
+ gnttab_set_unmap_op(&unmap[i], vaddr(req, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+ pending_handle(req, i) = SCSIBACK_INVALID_HANDLE;
|
|
+ invcount++;
|
|
+ }
|
|
+
|
|
+ err = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
+ BUG_ON(err);
|
|
+ kfree(req->sgl);
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static pending_req_t * alloc_req(struct vscsibk_info *info)
|
|
+{
|
|
+ pending_req_t *req = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ if (!list_empty(&pending_free)) {
|
|
+ req = list_entry(pending_free.next, pending_req_t, free_list);
|
|
+ list_del(&req->free_list);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ return req;
|
|
+}
|
|
+
|
|
+
|
|
+static void free_req(pending_req_t *req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int was_empty;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ was_empty = list_empty(&pending_free);
|
|
+ list_add(&req->free_list, &pending_free);
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ if (was_empty)
|
|
+ wake_up(&pending_free_wq);
|
|
+}
|
|
+
|
|
+
|
|
+static void scsiback_notify_work(struct vscsibk_info *info)
|
|
+{
|
|
+ info->waiting_reqs = 1;
|
|
+ wake_up(&info->wq);
|
|
+}
|
|
+
|
|
+void scsiback_do_resp_with_sense(char *sense_buffer, int32_t result,
|
|
+ uint32_t resid, pending_req_t *pending_req)
|
|
+{
|
|
+ vscsiif_response_t *ring_res;
|
|
+ struct vscsibk_info *info = pending_req->info;
|
|
+ int notify;
|
|
+ int more_to_do = 1;
|
|
+ struct scsi_sense_hdr sshdr;
|
|
+ unsigned long flags;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ spin_lock_irqsave(&info->ring_lock, flags);
|
|
+
|
|
+ ring_res = RING_GET_RESPONSE(&info->ring, info->ring.rsp_prod_pvt);
|
|
+ info->ring.rsp_prod_pvt++;
|
|
+
|
|
+ ring_res->rslt = result;
|
|
+ ring_res->rqid = pending_req->rqid;
|
|
+
|
|
+ if (sense_buffer != NULL) {
|
|
+ if (scsi_normalize_sense(sense_buffer,
|
|
+ sizeof(sense_buffer), &sshdr)) {
|
|
+
|
|
+ int len = 8 + sense_buffer[7];
|
|
+
|
|
+ if (len > VSCSIIF_SENSE_BUFFERSIZE)
|
|
+ len = VSCSIIF_SENSE_BUFFERSIZE;
|
|
+
|
|
+ memcpy(ring_res->sense_buffer, sense_buffer, len);
|
|
+ ring_res->sense_len = len;
|
|
+ }
|
|
+ } else {
|
|
+ ring_res->sense_len = 0;
|
|
+ }
|
|
+
|
|
+ ring_res->residual_len = resid;
|
|
+
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&info->ring, notify);
|
|
+ if (info->ring.rsp_prod_pvt == info->ring.req_cons) {
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&info->ring, more_to_do);
|
|
+ } else if (RING_HAS_UNCONSUMED_REQUESTS(&info->ring)) {
|
|
+ more_to_do = 1;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&info->ring_lock, flags);
|
|
+
|
|
+ if (more_to_do)
|
|
+ scsiback_notify_work(info);
|
|
+
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(info->irq);
|
|
+
|
|
+ free_req(pending_req);
|
|
+}
|
|
+
|
|
+static void scsiback_print_status(char *sense_buffer, int errors,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ struct scsi_device *sdev = pending_req->sdev;
|
|
+
|
|
+ printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no,
|
|
+ sdev->channel, sdev->id, sdev->lun);
|
|
+ printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n",
|
|
+ status_byte(errors), msg_byte(errors),
|
|
+ host_byte(errors), driver_byte(errors));
|
|
+
|
|
+ printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n",
|
|
+ pending_req->cmnd[0]);
|
|
+
|
|
+ if (CHECK_CONDITION & status_byte(errors))
|
|
+ __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE);
|
|
+}
|
|
+
|
|
+
|
|
+static void scsiback_cmd_done(struct request *req, int uptodate)
|
|
+{
|
|
+ pending_req_t *pending_req = req->end_io_data;
|
|
+ unsigned char *sense_buffer;
|
|
+ unsigned int resid;
|
|
+ int errors;
|
|
+
|
|
+ sense_buffer = req->sense;
|
|
+ resid = req->data_len;
|
|
+ errors = req->errors;
|
|
+
|
|
+ if (errors != 0) {
|
|
+ if (log_print_stat)
|
|
+ scsiback_print_status(sense_buffer, errors, pending_req);
|
|
+ }
|
|
+
|
|
+ /* The Host mode is through as for Emulation. */
|
|
+ if (pending_req->info->feature != VSCSI_TYPE_HOST)
|
|
+ scsiback_rsp_emulation(pending_req);
|
|
+
|
|
+ scsiback_fast_flush_area(pending_req);
|
|
+ scsiback_do_resp_with_sense(sense_buffer, errors, resid, pending_req);
|
|
+ scsiback_put(pending_req->info);
|
|
+
|
|
+ __blk_put_request(req->q, req);
|
|
+}
|
|
+
|
|
+
|
|
+static int scsiback_gnttab_data_map(vscsiif_request_t *ring_req,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ u32 flags;
|
|
+ int write;
|
|
+ int i, err = 0;
|
|
+ unsigned int data_len = 0;
|
|
+ struct gnttab_map_grant_ref map[VSCSIIF_SG_TABLESIZE];
|
|
+ struct vscsibk_info *info = pending_req->info;
|
|
+
|
|
+ int data_dir = (int)pending_req->sc_data_direction;
|
|
+ unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
|
|
+
|
|
+ write = (data_dir == DMA_TO_DEVICE);
|
|
+
|
|
+ if (nr_segments) {
|
|
+ /* free of (sgl) in fast_flush_area()*/
|
|
+ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments,
|
|
+ GFP_KERNEL);
|
|
+ if (!pending_req->sgl) {
|
|
+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nr_segments; i++) {
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (write)
|
|
+ flags |= GNTMAP_readonly;
|
|
+ gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
|
|
+ ring_req->seg[i].gref,
|
|
+ info->domid);
|
|
+ }
|
|
+
|
|
+ err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments);
|
|
+ BUG_ON(err);
|
|
+
|
|
+ for (i = 0; i < nr_segments; i++) {
|
|
+ struct page *pg;
|
|
+
|
|
+ /* Retry maps with GNTST_eagain */
|
|
+ if (unlikely(map[i].status == GNTST_eagain))
|
|
+ gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]);
|
|
+ if (unlikely(map[i].status != GNTST_okay)) {
|
|
+ printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n");
|
|
+ map[i].handle = SCSIBACK_INVALID_HANDLE;
|
|
+ err |= 1;
|
|
+ }
|
|
+
|
|
+ pending_handle(pending_req, i) = map[i].handle;
|
|
+
|
|
+ if (err)
|
|
+ continue;
|
|
+
|
|
+ pg = pending_pages[vaddr_pagenr(pending_req, i)];
|
|
+
|
|
+ set_phys_to_machine(page_to_pfn(pg),
|
|
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
|
|
+
|
|
+ pending_req->sgl[i].page = pg;
|
|
+ pending_req->sgl[i].offset = ring_req->seg[i].offset;
|
|
+ pending_req->sgl[i].length = ring_req->seg[i].length;
|
|
+ data_len += pending_req->sgl[i].length;
|
|
+
|
|
+ barrier();
|
|
+ if (pending_req->sgl[i].offset >= PAGE_SIZE ||
|
|
+ pending_req->sgl[i].length > PAGE_SIZE ||
|
|
+ pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE)
|
|
+ err |= 1;
|
|
+
|
|
+ }
|
|
+
|
|
+ if (err)
|
|
+ goto fail_flush;
|
|
+ }
|
|
+
|
|
+ pending_req->request_bufflen = data_len;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_flush:
|
|
+ scsiback_fast_flush_area(pending_req);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+/* quoted scsi_lib.c/scsi_merge_bio */
|
|
+static int scsiback_merge_bio(struct request *rq, struct bio *bio)
|
|
+{
|
|
+ struct request_queue *q = rq->q;
|
|
+
|
|
+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
|
|
+ if (rq_data_dir(rq) == WRITE)
|
|
+ bio->bi_rw |= (1 << BIO_RW);
|
|
+
|
|
+ blk_queue_bounce(q, &bio);
|
|
+
|
|
+ if (!rq->bio)
|
|
+ blk_rq_bio_prep(q, rq, bio);
|
|
+ else if (!q->back_merge_fn(q, rq, bio))
|
|
+ return -EINVAL;
|
|
+ else {
|
|
+ rq->biotail->bi_next = bio;
|
|
+ rq->biotail = bio;
|
|
+ rq->hard_nr_sectors += bio_sectors(bio);
|
|
+ rq->nr_sectors = rq->hard_nr_sectors;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* quoted scsi_lib.c/scsi_bi_endio */
|
|
+static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
|
|
+{
|
|
+ if (bio->bi_size)
|
|
+ return 1;
|
|
+
|
|
+ bio_put(bio);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/* quoted scsi_lib.c/scsi_req_map_sg . */
|
|
+static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
|
|
+{
|
|
+ struct request_queue *q = rq->q;
|
|
+ int nr_pages;
|
|
+ unsigned int nsegs = count;
|
|
+
|
|
+ unsigned int data_len = 0, len, bytes, off;
|
|
+ struct page *page;
|
|
+ struct bio *bio = NULL;
|
|
+ int i, err, nr_vecs = 0;
|
|
+
|
|
+ for (i = 0; i < nsegs; i++) {
|
|
+ page = pending_req->sgl[i].page;
|
|
+ off = (unsigned int)pending_req->sgl[i].offset;
|
|
+ len = (unsigned int)pending_req->sgl[i].length;
|
|
+ data_len += len;
|
|
+
|
|
+ nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ while (len > 0) {
|
|
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
|
|
+
|
|
+ if (!bio) {
|
|
+ nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
|
|
+ nr_pages -= nr_vecs;
|
|
+ bio = bio_alloc(GFP_KERNEL, nr_vecs);
|
|
+ if (!bio) {
|
|
+ err = -ENOMEM;
|
|
+ goto free_bios;
|
|
+ }
|
|
+ bio->bi_end_io = scsiback_bi_endio;
|
|
+ }
|
|
+
|
|
+ if (bio_add_pc_page(q, bio, page, bytes, off) !=
|
|
+ bytes) {
|
|
+ bio_put(bio);
|
|
+ err = -EINVAL;
|
|
+ goto free_bios;
|
|
+ }
|
|
+
|
|
+ if (bio->bi_vcnt >= nr_vecs) {
|
|
+ err = scsiback_merge_bio(rq, bio);
|
|
+ if (err) {
|
|
+ bio_endio(bio, bio->bi_size, 0);
|
|
+ goto free_bios;
|
|
+ }
|
|
+ bio = NULL;
|
|
+ }
|
|
+
|
|
+ page++;
|
|
+ len -= bytes;
|
|
+ off = 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ rq->buffer = rq->data = NULL;
|
|
+ rq->data_len = data_len;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+free_bios:
|
|
+ while ((bio = rq->bio) != NULL) {
|
|
+ rq->bio = bio->bi_next;
|
|
+ /*
|
|
+ * call endio instead of bio_put incase it was bounced
|
|
+ */
|
|
+ bio_endio(bio, bio->bi_size, 0);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+void scsiback_cmd_exec(pending_req_t *pending_req)
|
|
+{
|
|
+ int cmd_len = (int)pending_req->cmd_len;
|
|
+ int data_dir = (int)pending_req->sc_data_direction;
|
|
+ unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
|
|
+ unsigned int timeout;
|
|
+ struct request *rq;
|
|
+ int write;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ /* because it doesn't timeout backend earlier than frontend.*/
|
|
+ if (pending_req->timeout_per_command)
|
|
+ timeout = pending_req->timeout_per_command * HZ;
|
|
+ else
|
|
+ timeout = VSCSIIF_TIMEOUT;
|
|
+
|
|
+ write = (data_dir == DMA_TO_DEVICE);
|
|
+ rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
|
|
+
|
|
+ rq->flags |= REQ_BLOCK_PC;
|
|
+ rq->cmd_len = cmd_len;
|
|
+ memcpy(rq->cmd, pending_req->cmnd, cmd_len);
|
|
+
|
|
+ memset(pending_req->sense_buffer, 0, VSCSIIF_SENSE_BUFFERSIZE);
|
|
+ rq->sense = pending_req->sense_buffer;
|
|
+ rq->sense_len = 0;
|
|
+
|
|
+ /* not allowed to retry in backend. */
|
|
+ rq->retries = 0;
|
|
+ rq->timeout = timeout;
|
|
+ rq->end_io_data = pending_req;
|
|
+
|
|
+ if (nr_segments) {
|
|
+
|
|
+ if (request_map_sg(rq, pending_req, nr_segments)) {
|
|
+ printk(KERN_ERR "scsiback: SG Request Map Error\n");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ scsiback_get(pending_req->info);
|
|
+ blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
|
|
+
|
|
+ return ;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsiback_device_reset_exec(pending_req_t *pending_req)
|
|
+{
|
|
+ struct vscsibk_info *info = pending_req->info;
|
|
+ int err;
|
|
+ struct scsi_device *sdev = pending_req->sdev;
|
|
+
|
|
+ scsiback_get(info);
|
|
+ err = scsi_reset_provider(sdev, SCSI_TRY_RESET_DEVICE);
|
|
+
|
|
+ scsiback_do_resp_with_sense(NULL, err, 0, pending_req);
|
|
+ scsiback_put(info);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ scsiback_notify_work((struct vscsibk_info *)dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static int prepare_pending_reqs(struct vscsibk_info *info,
|
|
+ vscsiif_request_t *ring_req, pending_req_t *pending_req)
|
|
+{
|
|
+ struct scsi_device *sdev;
|
|
+ struct ids_tuple vir;
|
|
+ int err = -EINVAL;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ pending_req->rqid = ring_req->rqid;
|
|
+ pending_req->act = ring_req->act;
|
|
+
|
|
+ pending_req->info = info;
|
|
+
|
|
+ pending_req->v_chn = vir.chn = ring_req->channel;
|
|
+ pending_req->v_tgt = vir.tgt = ring_req->id;
|
|
+ vir.lun = ring_req->lun;
|
|
+
|
|
+ rmb();
|
|
+ sdev = scsiback_do_translation(info, &vir);
|
|
+ if (!sdev) {
|
|
+ pending_req->sdev = NULL;
|
|
+ DPRINTK("scsiback: doesn't exist.\n");
|
|
+ err = -ENODEV;
|
|
+ goto invalid_value;
|
|
+ }
|
|
+ pending_req->sdev = sdev;
|
|
+
|
|
+ /* request range check from frontend */
|
|
+ pending_req->sc_data_direction = ring_req->sc_data_direction;
|
|
+ barrier();
|
|
+ if ((pending_req->sc_data_direction != DMA_BIDIRECTIONAL) &&
|
|
+ (pending_req->sc_data_direction != DMA_TO_DEVICE) &&
|
|
+ (pending_req->sc_data_direction != DMA_FROM_DEVICE) &&
|
|
+ (pending_req->sc_data_direction != DMA_NONE)) {
|
|
+ DPRINTK("scsiback: invalid parameter data_dir = %d\n",
|
|
+ pending_req->sc_data_direction);
|
|
+ err = -EINVAL;
|
|
+ goto invalid_value;
|
|
+ }
|
|
+
|
|
+ pending_req->nr_segments = ring_req->nr_segments;
|
|
+ barrier();
|
|
+ if (pending_req->nr_segments > VSCSIIF_SG_TABLESIZE) {
|
|
+ DPRINTK("scsiback: invalid parameter nr_seg = %d\n",
|
|
+ pending_req->nr_segments);
|
|
+ err = -EINVAL;
|
|
+ goto invalid_value;
|
|
+ }
|
|
+
|
|
+ pending_req->cmd_len = ring_req->cmd_len;
|
|
+ barrier();
|
|
+ if (pending_req->cmd_len > VSCSIIF_MAX_COMMAND_SIZE) {
|
|
+ DPRINTK("scsiback: invalid parameter cmd_len = %d\n",
|
|
+ pending_req->cmd_len);
|
|
+ err = -EINVAL;
|
|
+ goto invalid_value;
|
|
+ }
|
|
+ memcpy(pending_req->cmnd, ring_req->cmnd, pending_req->cmd_len);
|
|
+
|
|
+ pending_req->timeout_per_command = ring_req->timeout_per_command;
|
|
+
|
|
+ if(scsiback_gnttab_data_map(ring_req, pending_req)) {
|
|
+ DPRINTK("scsiback: invalid buffer\n");
|
|
+ err = -EINVAL;
|
|
+ goto invalid_value;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+invalid_value:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsiback_do_cmd_fn(struct vscsibk_info *info)
|
|
+{
|
|
+ struct vscsiif_back_ring *ring = &info->ring;
|
|
+ vscsiif_request_t *ring_req;
|
|
+
|
|
+ pending_req_t *pending_req;
|
|
+ RING_IDX rc, rp;
|
|
+ int err, more_to_do = 0;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ rc = ring->req_cons;
|
|
+ rp = ring->sring->req_prod;
|
|
+ rmb();
|
|
+
|
|
+ while ((rc != rp)) {
|
|
+ if (RING_REQUEST_CONS_OVERFLOW(ring, rc))
|
|
+ break;
|
|
+ pending_req = alloc_req(info);
|
|
+ if (NULL == pending_req) {
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ring_req = RING_GET_REQUEST(ring, rc);
|
|
+ ring->req_cons = ++rc;
|
|
+
|
|
+ err = prepare_pending_reqs(info, ring_req,
|
|
+ pending_req);
|
|
+ if (err == -EINVAL) {
|
|
+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
|
|
+ 0, pending_req);
|
|
+ continue;
|
|
+ } else if (err == -ENODEV) {
|
|
+ scsiback_do_resp_with_sense(NULL, (DID_NO_CONNECT << 16),
|
|
+ 0, pending_req);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (pending_req->act == VSCSIIF_ACT_SCSI_CDB) {
|
|
+
|
|
+ /* The Host mode is through as for Emulation. */
|
|
+ if (info->feature == VSCSI_TYPE_HOST)
|
|
+ scsiback_cmd_exec(pending_req);
|
|
+ else
|
|
+ scsiback_req_emulation_or_cmdexec(pending_req);
|
|
+
|
|
+ } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) {
|
|
+ scsiback_device_reset_exec(pending_req);
|
|
+ } else {
|
|
+ printk(KERN_ERR "scsiback: invalid parameter for request\n");
|
|
+ scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24),
|
|
+ 0, pending_req);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (RING_HAS_UNCONSUMED_REQUESTS(ring))
|
|
+ more_to_do = 1;
|
|
+
|
|
+ /* Yield point for this unbounded loop. */
|
|
+ cond_resched();
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+
|
|
+int scsiback_schedule(void *data)
|
|
+{
|
|
+ struct vscsibk_info *info = (struct vscsibk_info *)data;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ wait_event_interruptible(
|
|
+ info->wq,
|
|
+ info->waiting_reqs || kthread_should_stop());
|
|
+ wait_event_interruptible(
|
|
+ pending_free_wq,
|
|
+ !list_empty(&pending_free) || kthread_should_stop());
|
|
+
|
|
+ info->waiting_reqs = 0;
|
|
+ smp_mb();
|
|
+
|
|
+ if (scsiback_do_cmd_fn(info))
|
|
+ info->waiting_reqs = 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int __init scsiback_init(void)
|
|
+{
|
|
+ int i, mmap_pages;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ mmap_pages = vscsiif_reqs * VSCSIIF_SG_TABLESIZE;
|
|
+
|
|
+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
|
|
+ vscsiif_reqs, GFP_KERNEL);
|
|
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
|
|
+ mmap_pages, GFP_KERNEL);
|
|
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
|
|
+
|
|
+ if (!pending_reqs || !pending_grant_handles || !pending_pages)
|
|
+ goto out_of_memory;
|
|
+
|
|
+ for (i = 0; i < mmap_pages; i++)
|
|
+ pending_grant_handles[i] = SCSIBACK_INVALID_HANDLE;
|
|
+
|
|
+ if (scsiback_interface_init() < 0)
|
|
+ goto out_of_kmem;
|
|
+
|
|
+ memset(pending_reqs, 0, sizeof(pending_reqs));
|
|
+ INIT_LIST_HEAD(&pending_free);
|
|
+
|
|
+ for (i = 0; i < vscsiif_reqs; i++)
|
|
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
|
|
+
|
|
+ if (scsiback_xenbus_init())
|
|
+ goto out_of_xenbus;
|
|
+
|
|
+ scsiback_emulation_init();
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out_of_xenbus:
|
|
+ scsiback_xenbus_unregister();
|
|
+out_of_kmem:
|
|
+ scsiback_interface_exit();
|
|
+out_of_memory:
|
|
+ kfree(pending_reqs);
|
|
+ kfree(pending_grant_handles);
|
|
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
|
|
+ printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+#if 0
|
|
+static void __exit scsiback_exit(void)
|
|
+{
|
|
+ scsiback_xenbus_unregister();
|
|
+ scsiback_interface_exit();
|
|
+ kfree(pending_reqs);
|
|
+ kfree(pending_grant_handles);
|
|
+ free_empty_pages_and_pagevec(pending_pages, (vscsiif_reqs * VSCSIIF_SG_TABLESIZE));
|
|
+
|
|
+}
|
|
+#endif
|
|
+
|
|
+module_init(scsiback_init);
|
|
+
|
|
+#if 0
|
|
+module_exit(scsiback_exit);
|
|
+#endif
|
|
+
|
|
+MODULE_DESCRIPTION("Xen SCSI backend driver");
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/translate.c 2008-07-21 11:00:33.000000000 +0200
|
|
@@ -0,0 +1,168 @@
|
|
+/*
|
|
+ * Xen SCSI backend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/list.h>
|
|
+#include <linux/gfp.h>
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+/*
|
|
+ Initialize the translation entry list
|
|
+*/
|
|
+void scsiback_init_translation_table(struct vscsibk_info *info)
|
|
+{
|
|
+ INIT_LIST_HEAD(&info->v2p_entry_lists);
|
|
+ spin_lock_init(&info->v2p_lock);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ Add a new translation entry
|
|
+*/
|
|
+int scsiback_add_translation_entry(struct vscsibk_info *info,
|
|
+ struct scsi_device *sdev, struct ids_tuple *v)
|
|
+{
|
|
+ int err = 0;
|
|
+ struct v2p_entry *entry;
|
|
+ struct v2p_entry *new;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+
|
|
+ /* Check double assignment to identical virtual ID */
|
|
+ list_for_each_entry(entry, head, l) {
|
|
+ if ((entry->v.chn == v->chn) &&
|
|
+ (entry->v.tgt == v->tgt) &&
|
|
+ (entry->v.lun == v->lun)) {
|
|
+ printk(KERN_WARNING "scsiback: Virtual ID is already used. "
|
|
+ "Assignment was not performed.\n");
|
|
+ err = -EEXIST;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ /* Create a new translation entry and add to the list */
|
|
+ if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) {
|
|
+ printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__);
|
|
+ err = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ new->v = *v;
|
|
+ new->sdev = sdev;
|
|
+ list_add_tail(&new->l, head);
|
|
+
|
|
+out:
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ Delete the translation entry specfied
|
|
+*/
|
|
+int scsiback_del_translation_entry(struct vscsibk_info *info,
|
|
+ struct ids_tuple *v)
|
|
+{
|
|
+ struct v2p_entry *entry;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+ /* Find out the translation entry specified */
|
|
+ list_for_each_entry(entry, head, l) {
|
|
+ if ((entry->v.chn == v->chn) &&
|
|
+ (entry->v.tgt == v->tgt) &&
|
|
+ (entry->v.lun == v->lun)) {
|
|
+ goto found;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+ return 1;
|
|
+
|
|
+found:
|
|
+ /* Delete the translation entry specfied */
|
|
+ scsi_device_put(entry->sdev);
|
|
+ list_del(&entry->l);
|
|
+ kfree(entry);
|
|
+
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ Perform virtual to physical translation
|
|
+*/
|
|
+struct scsi_device *scsiback_do_translation(struct vscsibk_info *info,
|
|
+ struct ids_tuple *v)
|
|
+{
|
|
+ struct v2p_entry *entry;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ struct scsi_device *sdev = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+ list_for_each_entry(entry, head, l) {
|
|
+ if ((entry->v.chn == v->chn) &&
|
|
+ (entry->v.tgt == v->tgt) &&
|
|
+ (entry->v.lun == v->lun)) {
|
|
+ sdev = entry->sdev;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+out:
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+ return sdev;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ Release the translation entry specfied
|
|
+*/
|
|
+void scsiback_release_translation_entry(struct vscsibk_info *info)
|
|
+{
|
|
+ struct v2p_entry *entry, *tmp;
|
|
+ struct list_head *head = &(info->v2p_entry_lists);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->v2p_lock, flags);
|
|
+ list_for_each_entry_safe(entry, tmp, head, l) {
|
|
+ scsi_device_put(entry->sdev);
|
|
+ list_del(&entry->l);
|
|
+ kfree(entry);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&info->v2p_lock, flags);
|
|
+ return;
|
|
+
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsiback/xenbus.c 2009-03-18 10:39:32.000000000 +0100
|
|
@@ -0,0 +1,378 @@
|
|
+/*
|
|
+ * Xen SCSI backend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * Based on the blkback driver code.
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_host.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+struct backend_info
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+ struct vscsibk_info *info;
|
|
+};
|
|
+
|
|
+
|
|
+static int __vscsiif_name(struct backend_info *be, char *buf)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned int domid, id;
|
|
+
|
|
+ sscanf(dev->nodename, "backend/vscsi/%u/%u", &domid, &id);
|
|
+ snprintf(buf, TASK_COMM_LEN, "vscsi.%u.%u", be->info->domid, id);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int scsiback_map(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ int err;
|
|
+ char name[TASK_COMM_LEN];
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "ring-ref", "%lu", &ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "reading %s ring", dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ err = scsiback_init_sring(be->info, ring_ref, evtchn);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ err = __vscsiif_name(be, name);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(dev, err, "get scsiback dev name");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ be->info->kthread = kthread_run(scsiback_schedule, be->info, name);
|
|
+ if (IS_ERR(be->info->kthread)) {
|
|
+ err = PTR_ERR(be->info->kthread);
|
|
+ be->info->kthread = NULL;
|
|
+ xenbus_dev_error(be->dev, err, "start vscsiif");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+struct scsi_device *scsiback_get_scsi_device(struct ids_tuple *phy)
|
|
+{
|
|
+ struct Scsi_Host *shost;
|
|
+ struct scsi_device *sdev = NULL;
|
|
+
|
|
+ shost = scsi_host_lookup(phy->hst);
|
|
+ if (IS_ERR(shost)) {
|
|
+ printk(KERN_ERR "scsiback: host%d doesn't exist.\n",
|
|
+ phy->hst);
|
|
+ return NULL;
|
|
+ }
|
|
+ sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun);
|
|
+ if (!sdev) {
|
|
+ printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n",
|
|
+ phy->hst, phy->chn, phy->tgt, phy->lun);
|
|
+ scsi_host_put(shost);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ scsi_host_put(shost);
|
|
+ return (sdev);
|
|
+}
|
|
+
|
|
+#define VSCSIBACK_OP_ADD_OR_DEL_LUN 1
|
|
+#define VSCSIBACK_OP_UPDATEDEV_STATE 2
|
|
+
|
|
+
|
|
+static void scsiback_do_lun_hotplug(struct backend_info *be, int op)
|
|
+{
|
|
+ int i, err = 0;
|
|
+ struct ids_tuple phy, vir;
|
|
+ int device_state;
|
|
+ char str[64], state_str[64];
|
|
+ char **dir;
|
|
+ unsigned int dir_n = 0;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ struct scsi_device *sdev;
|
|
+
|
|
+ dir = xenbus_directory(XBT_NIL, dev->nodename, "vscsi-devs", &dir_n);
|
|
+ if (IS_ERR(dir))
|
|
+ return;
|
|
+
|
|
+ for (i = 0; i < dir_n; i++) {
|
|
+
|
|
+ /* read status */
|
|
+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, state_str, "%u",
|
|
+ &device_state);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ continue;
|
|
+
|
|
+ /* physical SCSI device */
|
|
+ snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", dir[i]);
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
|
|
+ "%u:%u:%u:%u", &phy.hst, &phy.chn, &phy.tgt, &phy.lun);
|
|
+ if (XENBUS_EXIST_ERR(err)) {
|
|
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* virtual SCSI device */
|
|
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename, str,
|
|
+ "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
|
|
+ if (XENBUS_EXIST_ERR(err)) {
|
|
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ switch (op) {
|
|
+ case VSCSIBACK_OP_ADD_OR_DEL_LUN:
|
|
+ if (device_state == XenbusStateInitialising) {
|
|
+ sdev = scsiback_get_scsi_device(&phy);
|
|
+ if (!sdev)
|
|
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed);
|
|
+ else {
|
|
+ err = scsiback_add_translation_entry(be->info, sdev, &vir);
|
|
+ if (!err) {
|
|
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateInitialised)) {
|
|
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
|
|
+ scsiback_del_translation_entry(be->info, &vir);
|
|
+ }
|
|
+ } else {
|
|
+ scsi_device_put(sdev);
|
|
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (device_state == XenbusStateClosing) {
|
|
+ if (!scsiback_del_translation_entry(be->info, &vir)) {
|
|
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed))
|
|
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case VSCSIBACK_OP_UPDATEDEV_STATE:
|
|
+ if (device_state == XenbusStateInitialised) {
|
|
+ /* modify vscsi-devs/dev-x/state */
|
|
+ if (xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateConnected)) {
|
|
+ printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str);
|
|
+ scsiback_del_translation_entry(be->info, &vir);
|
|
+ xenbus_printf(XBT_NIL, dev->nodename, state_str,
|
|
+ "%d", XenbusStateClosed);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ /*When it is necessary, processing is added here.*/
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ kfree(dir);
|
|
+ return ;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsiback_frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ break;
|
|
+ case XenbusStateInitialised:
|
|
+ err = scsiback_map(be);
|
|
+ if (err)
|
|
+ break;
|
|
+
|
|
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+
|
|
+ break;
|
|
+ case XenbusStateConnected:
|
|
+
|
|
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_UPDATEDEV_STATE);
|
|
+
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ scsiback_disconnect(be->info);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfiguring:
|
|
+ scsiback_do_lun_hotplug(be, VSCSIBACK_OP_ADD_OR_DEL_LUN);
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateReconfigured);
|
|
+
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static int scsiback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ if (be->info) {
|
|
+ scsiback_disconnect(be->info);
|
|
+ scsiback_release_translation_entry(be->info);
|
|
+ scsiback_free(be->info);
|
|
+ be->info = NULL;
|
|
+ }
|
|
+
|
|
+ kfree(be);
|
|
+ dev->dev.driver_data = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsiback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ unsigned val = 0;
|
|
+
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+
|
|
+ DPRINTK("%p %d\n", dev, dev->otherend_id);
|
|
+
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ be->dev = dev;
|
|
+ dev->dev.driver_data = be;
|
|
+
|
|
+ be->info = vscsibk_info_alloc(dev->otherend_id);
|
|
+ if (IS_ERR(be->info)) {
|
|
+ err = PTR_ERR(be->info);
|
|
+ be->info = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "creating scsihost interface");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ be->info->dev = dev;
|
|
+ be->info->irq = 0;
|
|
+ be->info->feature = 0; /*default not HOSTMODE.*/
|
|
+
|
|
+ scsiback_init_translation_table(be->info);
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
|
|
+ "feature-host", "%d", &val);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ val = 0;
|
|
+
|
|
+ if (val)
|
|
+ be->info->feature = VSCSI_TYPE_HOST;
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+
|
|
+fail:
|
|
+ printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__);
|
|
+ scsiback_remove(dev);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static struct xenbus_device_id scsiback_ids[] = {
|
|
+ { "vscsi" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+static struct xenbus_driver scsiback = {
|
|
+ .name = "vscsi",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = scsiback_ids,
|
|
+ .probe = scsiback_probe,
|
|
+ .remove = scsiback_remove,
|
|
+ .otherend_changed = scsiback_frontend_changed
|
|
+};
|
|
+
|
|
+int scsiback_xenbus_init(void)
|
|
+{
|
|
+ return xenbus_register_backend(&scsiback);
|
|
+}
|
|
+
|
|
+void scsiback_xenbus_unregister(void)
|
|
+{
|
|
+ xenbus_unregister_driver(&scsiback);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsifront/Makefile 2008-07-21 11:00:33.000000000 +0200
|
|
@@ -0,0 +1,3 @@
|
|
+
|
|
+obj-$(CONFIG_XEN_SCSI_FRONTEND) := xenscsi.o
|
|
+xenscsi-objs := scsifront.o xenbus.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsifront/common.h 2010-02-24 13:13:46.000000000 +0100
|
|
@@ -0,0 +1,135 @@
|
|
+/*
|
|
+ * Xen SCSI frontend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_DRIVERS_SCSIFRONT_H__
|
|
+#define __XEN_DRIVERS_SCSIFRONT_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/device.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/blkdev.h>
|
|
+#include <scsi/scsi_cmnd.h>
|
|
+#include <scsi/scsi_device.h>
|
|
+#include <scsi/scsi.h>
|
|
+#include <scsi/scsi_host.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/io/ring.h>
|
|
+#include <xen/interface/io/vscsiif.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/interface/io/protocols.h>
|
|
+#include <asm/delay.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <asm/maddr.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#define GRANT_INVALID_REF 0
|
|
+#define VSCSI_IN_ABORT 1
|
|
+#define VSCSI_IN_RESET 2
|
|
+
|
|
+/* tuning point*/
|
|
+#define VSCSIIF_DEFAULT_CMD_PER_LUN 10
|
|
+#define VSCSIIF_MAX_TARGET 64
|
|
+#define VSCSIIF_MAX_LUN 255
|
|
+
|
|
+#define VSCSIIF_RING_SIZE __CONST_RING_SIZE(vscsiif, PAGE_SIZE)
|
|
+#define VSCSIIF_MAX_REQS VSCSIIF_RING_SIZE
|
|
+
|
|
+struct vscsifrnt_shadow {
|
|
+ uint16_t next_free;
|
|
+
|
|
+ /* command between backend and frontend
|
|
+ * VSCSIIF_ACT_SCSI_CDB or VSCSIIF_ACT_SCSI_RESET */
|
|
+ unsigned char act;
|
|
+
|
|
+ /* do reset function */
|
|
+ wait_queue_head_t wq_reset; /* reset work queue */
|
|
+ int wait_reset; /* reset work queue condition */
|
|
+ int32_t rslt_reset; /* reset response status */
|
|
+ /* (SUCESS or FAILED) */
|
|
+
|
|
+ /* for DMA_TO_DEVICE(1), DMA_FROM_DEVICE(2), DMA_NONE(3)
|
|
+ requests */
|
|
+ unsigned int sc_data_direction;
|
|
+
|
|
+ /* Number of pieces of scatter-gather */
|
|
+ unsigned int nr_segments;
|
|
+
|
|
+ /* requested struct scsi_cmnd is stored from kernel */
|
|
+ unsigned long req_scsi_cmnd;
|
|
+ int gref[VSCSIIF_SG_TABLESIZE];
|
|
+};
|
|
+
|
|
+struct vscsifrnt_info {
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ struct Scsi_Host *host;
|
|
+
|
|
+ spinlock_t io_lock;
|
|
+ spinlock_t shadow_lock;
|
|
+ unsigned int evtchn;
|
|
+ unsigned int irq;
|
|
+
|
|
+ grant_ref_t ring_ref;
|
|
+ struct vscsiif_front_ring ring;
|
|
+ struct vscsiif_response ring_res;
|
|
+
|
|
+ struct vscsifrnt_shadow shadow[VSCSIIF_MAX_REQS];
|
|
+ uint32_t shadow_free;
|
|
+
|
|
+ struct task_struct *kthread;
|
|
+ wait_queue_head_t wq;
|
|
+ unsigned int waiting_resp;
|
|
+
|
|
+};
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+int scsifront_xenbus_init(void);
|
|
+void scsifront_xenbus_unregister(void);
|
|
+int scsifront_schedule(void *data);
|
|
+irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+int scsifront_cmd_done(struct vscsifrnt_info *info);
|
|
+
|
|
+
|
|
+#endif /* __XEN_DRIVERS_SCSIFRONT_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsifront/scsifront.c 2011-02-02 12:19:11.000000000 +0100
|
|
@@ -0,0 +1,516 @@
|
|
+/*
|
|
+ * Xen SCSI frontend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include "common.h"
|
|
+
|
|
+static int get_id_from_freelist(struct vscsifrnt_info *info)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ uint32_t free;
|
|
+
|
|
+ spin_lock_irqsave(&info->shadow_lock, flags);
|
|
+
|
|
+ free = info->shadow_free;
|
|
+ BUG_ON(free > VSCSIIF_MAX_REQS);
|
|
+ info->shadow_free = info->shadow[free].next_free;
|
|
+ info->shadow[free].next_free = 0x0fff;
|
|
+
|
|
+ info->shadow[free].wait_reset = 0;
|
|
+
|
|
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
|
|
+
|
|
+ return free;
|
|
+}
|
|
+
|
|
+static void add_id_to_freelist(struct vscsifrnt_info *info, uint32_t id)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->shadow_lock, flags);
|
|
+
|
|
+ info->shadow[id].next_free = info->shadow_free;
|
|
+ info->shadow[id].req_scsi_cmnd = 0;
|
|
+ info->shadow_free = id;
|
|
+
|
|
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
|
|
+}
|
|
+
|
|
+
|
|
+struct vscsiif_request * scsifront_pre_request(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct vscsiif_front_ring *ring = &(info->ring);
|
|
+ vscsiif_request_t *ring_req;
|
|
+ uint32_t id;
|
|
+
|
|
+ ring_req = RING_GET_REQUEST(&(info->ring), ring->req_prod_pvt);
|
|
+
|
|
+ ring->req_prod_pvt++;
|
|
+
|
|
+ id = get_id_from_freelist(info); /* use id by response */
|
|
+ ring_req->rqid = (uint16_t)id;
|
|
+
|
|
+ return ring_req;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsifront_notify_work(struct vscsifrnt_info *info)
|
|
+{
|
|
+ info->waiting_resp = 1;
|
|
+ wake_up(&info->wq);
|
|
+}
|
|
+
|
|
+
|
|
+static void scsifront_do_request(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct vscsiif_front_ring *ring = &(info->ring);
|
|
+ unsigned int irq = info->irq;
|
|
+ int notify;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(ring, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(irq);
|
|
+}
|
|
+
|
|
+irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+{
|
|
+ scsifront_notify_work((struct vscsifrnt_info *)dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsifront_gnttab_done(struct vscsifrnt_shadow *s, uint32_t id)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ if (s->sc_data_direction == DMA_NONE)
|
|
+ return;
|
|
+
|
|
+ if (s->nr_segments) {
|
|
+ for (i = 0; i < s->nr_segments; i++) {
|
|
+ if (unlikely(gnttab_query_foreign_access(
|
|
+ s->gref[i]) != 0)) {
|
|
+ printk(KERN_ALERT "scsifront: "
|
|
+ "grant still in use by backend.\n");
|
|
+ BUG();
|
|
+ }
|
|
+ gnttab_end_foreign_access(s->gref[i], 0UL);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsifront_cdb_cmd_done(struct vscsifrnt_info *info,
|
|
+ vscsiif_response_t *ring_res)
|
|
+{
|
|
+ struct scsi_cmnd *sc;
|
|
+ uint32_t id;
|
|
+ uint8_t sense_len;
|
|
+
|
|
+ id = ring_res->rqid;
|
|
+ sc = (struct scsi_cmnd *)info->shadow[id].req_scsi_cmnd;
|
|
+
|
|
+ if (sc == NULL)
|
|
+ BUG();
|
|
+
|
|
+ scsifront_gnttab_done(&info->shadow[id], id);
|
|
+ add_id_to_freelist(info, id);
|
|
+
|
|
+ sc->result = ring_res->rslt;
|
|
+ sc->resid = ring_res->residual_len;
|
|
+
|
|
+ if (ring_res->sense_len > VSCSIIF_SENSE_BUFFERSIZE)
|
|
+ sense_len = VSCSIIF_SENSE_BUFFERSIZE;
|
|
+ else
|
|
+ sense_len = ring_res->sense_len;
|
|
+
|
|
+ if (sense_len)
|
|
+ memcpy(sc->sense_buffer, ring_res->sense_buffer, sense_len);
|
|
+
|
|
+ sc->scsi_done(sc);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static void scsifront_sync_cmd_done(struct vscsifrnt_info *info,
|
|
+ vscsiif_response_t *ring_res)
|
|
+{
|
|
+ uint16_t id = ring_res->rqid;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->shadow_lock, flags);
|
|
+ info->shadow[id].wait_reset = 1;
|
|
+ info->shadow[id].rslt_reset = ring_res->rslt;
|
|
+ spin_unlock_irqrestore(&info->shadow_lock, flags);
|
|
+
|
|
+ wake_up(&(info->shadow[id].wq_reset));
|
|
+}
|
|
+
|
|
+
|
|
+int scsifront_cmd_done(struct vscsifrnt_info *info)
|
|
+{
|
|
+ vscsiif_response_t *ring_res;
|
|
+
|
|
+ RING_IDX i, rp;
|
|
+ int more_to_do = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->io_lock, flags);
|
|
+
|
|
+ rp = info->ring.sring->rsp_prod;
|
|
+ rmb();
|
|
+ for (i = info->ring.rsp_cons; i != rp; i++) {
|
|
+
|
|
+ ring_res = RING_GET_RESPONSE(&info->ring, i);
|
|
+
|
|
+ if (info->shadow[ring_res->rqid].act == VSCSIIF_ACT_SCSI_CDB)
|
|
+ scsifront_cdb_cmd_done(info, ring_res);
|
|
+ else
|
|
+ scsifront_sync_cmd_done(info, ring_res);
|
|
+ }
|
|
+
|
|
+ info->ring.rsp_cons = i;
|
|
+
|
|
+ if (i != info->ring.req_prod_pvt) {
|
|
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
|
|
+ } else {
|
|
+ info->ring.sring->rsp_event = i + 1;
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&info->io_lock, flags);
|
|
+
|
|
+
|
|
+ /* Yield point for this unbounded loop. */
|
|
+ cond_resched();
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+int scsifront_schedule(void *data)
|
|
+{
|
|
+ struct vscsifrnt_info *info = (struct vscsifrnt_info *)data;
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ wait_event_interruptible(
|
|
+ info->wq,
|
|
+ info->waiting_resp || kthread_should_stop());
|
|
+
|
|
+ info->waiting_resp = 0;
|
|
+ smp_mb();
|
|
+
|
|
+ if (scsifront_cmd_done(info))
|
|
+ info->waiting_resp = 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+static int map_data_for_request(struct vscsifrnt_info *info,
|
|
+ struct scsi_cmnd *sc, vscsiif_request_t *ring_req, uint32_t id)
|
|
+{
|
|
+ grant_ref_t gref_head;
|
|
+ struct page *page;
|
|
+ int err, i, ref, ref_cnt = 0;
|
|
+ int write = (sc->sc_data_direction == DMA_TO_DEVICE);
|
|
+ int nr_pages, off, len, bytes;
|
|
+ unsigned long buffer_pfn;
|
|
+ unsigned int data_len = 0;
|
|
+
|
|
+ if (sc->sc_data_direction == DMA_NONE)
|
|
+ return 0;
|
|
+
|
|
+ err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ if (sc->use_sg) {
|
|
+ /* quoted scsi_lib.c/scsi_req_map_sg . */
|
|
+ struct scatterlist *sg = (struct scatterlist *)sc->request_buffer;
|
|
+ nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+
|
|
+ if (nr_pages > VSCSIIF_SG_TABLESIZE) {
|
|
+ printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n");
|
|
+ ref_cnt = (-E2BIG);
|
|
+ goto big_to_sg;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < sc->use_sg; i++) {
|
|
+ page = sg[i].page;
|
|
+ off = sg[i].offset;
|
|
+ len = sg[i].length;
|
|
+ data_len += len;
|
|
+
|
|
+ buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
|
|
+
|
|
+ while (len > 0) {
|
|
+ bytes = min_t(unsigned int, len, PAGE_SIZE - off);
|
|
+
|
|
+ ref = gnttab_claim_grant_reference(&gref_head);
|
|
+ BUG_ON(ref == -ENOSPC);
|
|
+
|
|
+ gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
|
|
+ buffer_pfn, write);
|
|
+
|
|
+ info->shadow[id].gref[ref_cnt] = ref;
|
|
+ ring_req->seg[ref_cnt].gref = ref;
|
|
+ ring_req->seg[ref_cnt].offset = (uint16_t)off;
|
|
+ ring_req->seg[ref_cnt].length = (uint16_t)bytes;
|
|
+
|
|
+ buffer_pfn++;
|
|
+ len -= bytes;
|
|
+ off = 0;
|
|
+ ref_cnt++;
|
|
+ }
|
|
+ }
|
|
+ } else if (sc->request_bufflen) {
|
|
+ unsigned long end = ((unsigned long)sc->request_buffer
|
|
+ + sc->request_bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ unsigned long start = (unsigned long)sc->request_buffer >> PAGE_SHIFT;
|
|
+
|
|
+ page = virt_to_page(sc->request_buffer);
|
|
+ nr_pages = end - start;
|
|
+ len = sc->request_bufflen;
|
|
+
|
|
+ if (nr_pages > VSCSIIF_SG_TABLESIZE) {
|
|
+ ref_cnt = (-E2BIG);
|
|
+ goto big_to_sg;
|
|
+ }
|
|
+
|
|
+ buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
|
|
+
|
|
+ off = offset_in_page((unsigned long)sc->request_buffer);
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ bytes = PAGE_SIZE - off;
|
|
+
|
|
+ if (bytes > len)
|
|
+ bytes = len;
|
|
+
|
|
+ ref = gnttab_claim_grant_reference(&gref_head);
|
|
+ BUG_ON(ref == -ENOSPC);
|
|
+
|
|
+ gnttab_grant_foreign_access_ref(ref, info->dev->otherend_id,
|
|
+ buffer_pfn, write);
|
|
+
|
|
+ info->shadow[id].gref[i] = ref;
|
|
+ ring_req->seg[i].gref = ref;
|
|
+ ring_req->seg[i].offset = (uint16_t)off;
|
|
+ ring_req->seg[i].length = (uint16_t)bytes;
|
|
+
|
|
+ buffer_pfn++;
|
|
+ len -= bytes;
|
|
+ off = 0;
|
|
+ ref_cnt++;
|
|
+ }
|
|
+ }
|
|
+
|
|
+big_to_sg:
|
|
+
|
|
+ gnttab_free_grant_references(gref_head);
|
|
+
|
|
+ return ref_cnt;
|
|
+}
|
|
+
|
|
+static int scsifront_queuecommand(struct scsi_cmnd *sc,
|
|
+ void (*done)(struct scsi_cmnd *))
|
|
+{
|
|
+ struct vscsifrnt_info *info =
|
|
+ (struct vscsifrnt_info *) sc->device->host->hostdata;
|
|
+ vscsiif_request_t *ring_req;
|
|
+ int ref_cnt;
|
|
+ uint16_t rqid;
|
|
+
|
|
+/* debug printk to identify more missing scsi commands
|
|
+ printk(KERN_INFO "scsicmd: len=%i, 0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x,0x%x",sc->cmd_len,
|
|
+ sc->cmnd[0],sc->cmnd[1],sc->cmnd[2],sc->cmnd[3],sc->cmnd[4],
|
|
+ sc->cmnd[5],sc->cmnd[6],sc->cmnd[7],sc->cmnd[8],sc->cmnd[9]);
|
|
+*/
|
|
+ if (RING_FULL(&info->ring)) {
|
|
+ goto out_host_busy;
|
|
+ }
|
|
+
|
|
+ sc->scsi_done = done;
|
|
+ sc->result = 0;
|
|
+
|
|
+ ring_req = scsifront_pre_request(info);
|
|
+ rqid = ring_req->rqid;
|
|
+ ring_req->act = VSCSIIF_ACT_SCSI_CDB;
|
|
+
|
|
+ ring_req->id = sc->device->id;
|
|
+ ring_req->lun = sc->device->lun;
|
|
+ ring_req->channel = sc->device->channel;
|
|
+ ring_req->cmd_len = sc->cmd_len;
|
|
+
|
|
+ BUG_ON(sc->cmd_len > VSCSIIF_MAX_COMMAND_SIZE);
|
|
+
|
|
+ if ( sc->cmd_len )
|
|
+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
|
|
+ else
|
|
+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
|
|
+
|
|
+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
|
|
+ ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
|
|
+
|
|
+ info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
|
|
+ info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
|
|
+ info->shadow[rqid].act = ring_req->act;
|
|
+
|
|
+ ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
|
|
+ if (ref_cnt < 0) {
|
|
+ add_id_to_freelist(info, rqid);
|
|
+ if (ref_cnt == (-ENOMEM))
|
|
+ goto out_host_busy;
|
|
+ else {
|
|
+ sc->result = (DID_ERROR << 16);
|
|
+ goto out_fail_command;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ring_req->nr_segments = (uint8_t)ref_cnt;
|
|
+ info->shadow[rqid].nr_segments = ref_cnt;
|
|
+
|
|
+ scsifront_do_request(info);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out_host_busy:
|
|
+ return SCSI_MLQUEUE_HOST_BUSY;
|
|
+
|
|
+out_fail_command:
|
|
+ done(sc);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsifront_eh_abort_handler(struct scsi_cmnd *sc)
|
|
+{
|
|
+ return (FAILED);
|
|
+}
|
|
+
|
|
+/* vscsi supports only device_reset, because it is each of LUNs */
|
|
+static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
|
|
+{
|
|
+ struct Scsi_Host *host = sc->device->host;
|
|
+ struct vscsifrnt_info *info =
|
|
+ (struct vscsifrnt_info *) sc->device->host->hostdata;
|
|
+
|
|
+ vscsiif_request_t *ring_req;
|
|
+ uint16_t rqid;
|
|
+ int err;
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
|
|
+ spin_lock_irq(host->host_lock);
|
|
+#endif
|
|
+
|
|
+ ring_req = scsifront_pre_request(info);
|
|
+ ring_req->act = VSCSIIF_ACT_SCSI_RESET;
|
|
+
|
|
+ rqid = ring_req->rqid;
|
|
+ info->shadow[rqid].act = VSCSIIF_ACT_SCSI_RESET;
|
|
+
|
|
+ ring_req->channel = sc->device->channel;
|
|
+ ring_req->id = sc->device->id;
|
|
+ ring_req->lun = sc->device->lun;
|
|
+ ring_req->cmd_len = sc->cmd_len;
|
|
+
|
|
+ if ( sc->cmd_len )
|
|
+ memcpy(ring_req->cmnd, sc->cmnd, sc->cmd_len);
|
|
+ else
|
|
+ memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
|
|
+
|
|
+ ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
|
|
+ ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
|
|
+ ring_req->nr_segments = 0;
|
|
+
|
|
+ scsifront_do_request(info);
|
|
+
|
|
+ spin_unlock_irq(host->host_lock);
|
|
+ wait_event_interruptible(info->shadow[rqid].wq_reset,
|
|
+ info->shadow[rqid].wait_reset);
|
|
+ spin_lock_irq(host->host_lock);
|
|
+
|
|
+ err = info->shadow[rqid].rslt_reset;
|
|
+
|
|
+ add_id_to_freelist(info, rqid);
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
|
|
+ spin_unlock_irq(host->host_lock);
|
|
+#endif
|
|
+ return (err);
|
|
+}
|
|
+
|
|
+
|
|
+struct scsi_host_template scsifront_sht = {
|
|
+ .module = THIS_MODULE,
|
|
+ .name = "Xen SCSI frontend driver",
|
|
+ .queuecommand = scsifront_queuecommand,
|
|
+ .eh_abort_handler = scsifront_eh_abort_handler,
|
|
+ .eh_device_reset_handler= scsifront_dev_reset_handler,
|
|
+ .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN,
|
|
+ .can_queue = VSCSIIF_MAX_REQS,
|
|
+ .this_id = -1,
|
|
+ .sg_tablesize = VSCSIIF_SG_TABLESIZE,
|
|
+ .use_clustering = DISABLE_CLUSTERING,
|
|
+ .proc_name = "scsifront",
|
|
+};
|
|
+
|
|
+
|
|
+static int __init scsifront_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ err = scsifront_xenbus_init();
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit scsifront_exit(void)
|
|
+{
|
|
+ scsifront_xenbus_unregister();
|
|
+}
|
|
+
|
|
+module_init(scsifront_init);
|
|
+module_exit(scsifront_exit);
|
|
+
|
|
+MODULE_DESCRIPTION("Xen SCSI frontend driver");
|
|
+MODULE_LICENSE("GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/scsifront/xenbus.c 2011-02-02 12:19:11.000000000 +0100
|
|
@@ -0,0 +1,426 @@
|
|
+/*
|
|
+ * Xen SCSI frontend driver
|
|
+ *
|
|
+ * Copyright (c) 2008, FUJITSU Limited
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+/*
|
|
+* Patched to support >2TB drives
|
|
+* 2010, Samuel Kvasnica, IMS Nanofabrication AG
|
|
+*/
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include "common.h"
|
|
+
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
|
|
+ #define DEFAULT_TASK_COMM_LEN 16
|
|
+#else
|
|
+ #define DEFAULT_TASK_COMM_LEN TASK_COMM_LEN
|
|
+#endif
|
|
+
|
|
+extern struct scsi_host_template scsifront_sht;
|
|
+
|
|
+static void scsifront_free(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct Scsi_Host *host = info->host;
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
|
|
+ if (host->shost_state != SHOST_DEL) {
|
|
+#else
|
|
+ if (!test_bit(SHOST_DEL, &host->shost_state)) {
|
|
+#endif
|
|
+ scsi_remove_host(info->host);
|
|
+ }
|
|
+
|
|
+ if (info->ring_ref != GRANT_INVALID_REF) {
|
|
+ gnttab_end_foreign_access(info->ring_ref,
|
|
+ (unsigned long)info->ring.sring);
|
|
+ info->ring_ref = GRANT_INVALID_REF;
|
|
+ info->ring.sring = NULL;
|
|
+ }
|
|
+
|
|
+ if (info->irq)
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = 0;
|
|
+
|
|
+ scsi_host_put(info->host);
|
|
+}
|
|
+
|
|
+
|
|
+static int scsifront_alloc_ring(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct xenbus_device *dev = info->dev;
|
|
+ struct vscsiif_sring *sring;
|
|
+ int err = -ENOMEM;
|
|
+
|
|
+
|
|
+ info->ring_ref = GRANT_INVALID_REF;
|
|
+
|
|
+ /***** Frontend to Backend ring start *****/
|
|
+ sring = (struct vscsiif_sring *) __get_free_page(GFP_KERNEL);
|
|
+ if (!sring) {
|
|
+ xenbus_dev_fatal(dev, err, "fail to allocate shared ring (Front to Back)");
|
|
+ return err;
|
|
+ }
|
|
+ SHARED_RING_INIT(sring);
|
|
+ FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(sring));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long) sring);
|
|
+ info->ring.sring = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "fail to grant shared ring (Front to Back)");
|
|
+ goto free_sring;
|
|
+ }
|
|
+ info->ring_ref = err;
|
|
+
|
|
+ err = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, scsifront_intr,
|
|
+ SA_SAMPLE_RANDOM, "scsifront", info);
|
|
+
|
|
+ if (err <= 0) {
|
|
+ xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
|
|
+ goto free_sring;
|
|
+ }
|
|
+ info->irq = err;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+/* free resource */
|
|
+free_sring:
|
|
+ scsifront_free(info);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsifront_init_ring(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct xenbus_device *dev = info->dev;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s\n",__FUNCTION__);
|
|
+
|
|
+ err = scsifront_alloc_ring(info);
|
|
+ if (err)
|
|
+ return err;
|
|
+ DPRINTK("%u %u\n", info->ring_ref, info->evtchn);
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "ring-ref", "%u",
|
|
+ info->ring_ref);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "%s", "writing ring-ref");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
|
|
+ irq_to_evtchn_port(info->irq));
|
|
+
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "%s", "writing event-channel");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto free_sring;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+free_sring:
|
|
+ /* free resource */
|
|
+ scsifront_free(info);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsifront_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ struct vscsifrnt_info *info;
|
|
+ struct Scsi_Host *host;
|
|
+ int i, err = -ENOMEM;
|
|
+ char name[DEFAULT_TASK_COMM_LEN];
|
|
+
|
|
+ host = scsi_host_alloc(&scsifront_sht, sizeof(*info));
|
|
+ if (!host) {
|
|
+ xenbus_dev_fatal(dev, err, "fail to allocate scsi host");
|
|
+ return err;
|
|
+ }
|
|
+ info = (struct vscsifrnt_info *) host->hostdata;
|
|
+ info->host = host;
|
|
+
|
|
+
|
|
+ dev->dev.driver_data = info;
|
|
+ info->dev = dev;
|
|
+
|
|
+ for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
|
|
+ info->shadow[i].next_free = i + 1;
|
|
+ init_waitqueue_head(&(info->shadow[i].wq_reset));
|
|
+ info->shadow[i].wait_reset = 0;
|
|
+ }
|
|
+ info->shadow[VSCSIIF_MAX_REQS - 1].next_free = 0x0fff;
|
|
+
|
|
+ err = scsifront_init_ring(info);
|
|
+ if (err) {
|
|
+ scsi_host_put(host);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ init_waitqueue_head(&info->wq);
|
|
+ spin_lock_init(&info->io_lock);
|
|
+ spin_lock_init(&info->shadow_lock);
|
|
+
|
|
+ snprintf(name, DEFAULT_TASK_COMM_LEN, "vscsiif.%d", info->host->host_no);
|
|
+
|
|
+ info->kthread = kthread_run(scsifront_schedule, info, name);
|
|
+ if (IS_ERR(info->kthread)) {
|
|
+ err = PTR_ERR(info->kthread);
|
|
+ info->kthread = NULL;
|
|
+ printk(KERN_ERR "scsifront: kthread start err %d\n", err);
|
|
+ goto free_sring;
|
|
+ }
|
|
+
|
|
+ host->max_id = VSCSIIF_MAX_TARGET;
|
|
+ host->max_channel = 0;
|
|
+ host->max_lun = VSCSIIF_MAX_LUN;
|
|
+ host->max_sectors = (VSCSIIF_SG_TABLESIZE - 1) * PAGE_SIZE / 512;
|
|
+ host->max_cmd_len = VSCSIIF_MAX_COMMAND_SIZE;
|
|
+
|
|
+ err = scsi_add_host(host, &dev->dev);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err);
|
|
+ goto free_sring;
|
|
+ }
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateInitialised);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+free_sring:
|
|
+ /* free resource */
|
|
+ scsifront_free(info);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int scsifront_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct vscsifrnt_info *info = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
|
|
+
|
|
+ if (info->kthread) {
|
|
+ kthread_stop(info->kthread);
|
|
+ info->kthread = NULL;
|
|
+ }
|
|
+
|
|
+ scsifront_free(info);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int scsifront_disconnect(struct vscsifrnt_info *info)
|
|
+{
|
|
+ struct xenbus_device *dev = info->dev;
|
|
+ struct Scsi_Host *host = info->host;
|
|
+
|
|
+ DPRINTK("%s: %s disconnect\n",__FUNCTION__ ,dev->nodename);
|
|
+
|
|
+ /*
|
|
+ When this function is executed, all devices of
|
|
+ Frontend have been deleted.
|
|
+ Therefore, it need not block I/O before remove_host.
|
|
+ */
|
|
+
|
|
+ scsi_remove_host(host);
|
|
+ xenbus_frontend_closed(dev);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#define VSCSIFRONT_OP_ADD_LUN 1
|
|
+#define VSCSIFRONT_OP_DEL_LUN 2
|
|
+
|
|
+static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
|
|
+{
|
|
+ struct xenbus_device *dev = info->dev;
|
|
+ int i, err = 0;
|
|
+ char str[64], state_str[64];
|
|
+ char **dir;
|
|
+ unsigned int dir_n = 0;
|
|
+ unsigned int device_state;
|
|
+ unsigned int hst, chn, tgt, lun;
|
|
+ struct scsi_device *sdev;
|
|
+
|
|
+ dir = xenbus_directory(XBT_NIL, dev->otherend, "vscsi-devs", &dir_n);
|
|
+ if (IS_ERR(dir))
|
|
+ return;
|
|
+
|
|
+ for (i = 0; i < dir_n; i++) {
|
|
+ /* read status */
|
|
+ snprintf(str, sizeof(str), "vscsi-devs/%s/state", dir[i]);
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str, "%u",
|
|
+ &device_state);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ continue;
|
|
+
|
|
+ /* virtual SCSI device */
|
|
+ snprintf(str, sizeof(str), "vscsi-devs/%s/v-dev", dir[i]);
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend, str,
|
|
+ "%u:%u:%u:%u", &hst, &chn, &tgt, &lun);
|
|
+ if (XENBUS_EXIST_ERR(err))
|
|
+ continue;
|
|
+
|
|
+ /* front device state path */
|
|
+ snprintf(state_str, sizeof(state_str), "vscsi-devs/%s/state", dir[i]);
|
|
+
|
|
+ switch (op) {
|
|
+ case VSCSIFRONT_OP_ADD_LUN:
|
|
+ if (device_state == XenbusStateInitialised) {
|
|
+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
|
|
+ if (sdev) {
|
|
+ printk(KERN_ERR "scsifront: Device already in use.\n");
|
|
+ scsi_device_put(sdev);
|
|
+ xenbus_printf(XBT_NIL, dev->nodename,
|
|
+ state_str, "%d", XenbusStateClosed);
|
|
+ } else {
|
|
+ scsi_add_device(info->host, chn, tgt, lun);
|
|
+ xenbus_printf(XBT_NIL, dev->nodename,
|
|
+ state_str, "%d", XenbusStateConnected);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ case VSCSIFRONT_OP_DEL_LUN:
|
|
+ if (device_state == XenbusStateClosing) {
|
|
+ sdev = scsi_device_lookup(info->host, chn, tgt, lun);
|
|
+ if (sdev) {
|
|
+ scsi_remove_device(sdev);
|
|
+ scsi_device_put(sdev);
|
|
+ xenbus_printf(XBT_NIL, dev->nodename,
|
|
+ state_str, "%d", XenbusStateClosed);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ kfree(dir);
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+static void scsifront_backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ struct vscsifrnt_info *info = dev->dev.driver_data;
|
|
+
|
|
+ DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitWait:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialised:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ if (xenbus_read_driver_state(dev->nodename) ==
|
|
+ XenbusStateInitialised) {
|
|
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
|
|
+ }
|
|
+
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ scsifront_disconnect(info);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfiguring:
|
|
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_DEL_LUN);
|
|
+ xenbus_switch_state(dev, XenbusStateReconfiguring);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateReconfigured:
|
|
+ scsifront_do_lun_hotplug(info, VSCSIFRONT_OP_ADD_LUN);
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static struct xenbus_device_id scsifront_ids[] = {
|
|
+ { "vscsi" },
|
|
+ { "" }
|
|
+};
|
|
+MODULE_ALIAS("xen:vscsi");
|
|
+
|
|
+static struct xenbus_driver scsifront_driver = {
|
|
+ .name = "vscsi",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = scsifront_ids,
|
|
+ .probe = scsifront_probe,
|
|
+ .remove = scsifront_remove,
|
|
+/* .resume = scsifront_resume, */
|
|
+ .otherend_changed = scsifront_backend_changed,
|
|
+};
|
|
+
|
|
+int scsifront_xenbus_init(void)
|
|
+{
|
|
+ return xenbus_register_frontend(&scsifront_driver);
|
|
+}
|
|
+
|
|
+void scsifront_xenbus_unregister(void)
|
|
+{
|
|
+ xenbus_unregister_driver(&scsifront_driver);
|
|
+}
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/Makefile 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,12 @@
|
|
+EXTRA_CFLAGS += -Idrivers/xen/sfc_netback -Idrivers/xen/sfc_netutil -Idrivers/xen/netback -Idrivers/net/sfc -Idrivers/net/sfc/sfc_resource
|
|
+EXTRA_CFLAGS += -D__ci_driver__
|
|
+EXTRA_CFLAGS += -DEFX_USE_KCOMPAT
|
|
+EXTRA_CFLAGS += -Werror
|
|
+
|
|
+ifdef GCOV
|
|
+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
|
|
+endif
|
|
+
|
|
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) := sfc_netback.o
|
|
+
|
|
+sfc_netback-objs := accel.o accel_fwd.o accel_msg.o accel_solarflare.o accel_xenbus.o accel_debugfs.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,147 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_solarflare.h"
|
|
+
|
|
+#include <linux/notifier.h>
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+#include "gcov.h"
|
|
+#endif
|
|
+
|
|
+static int netback_accel_netdev_event(struct notifier_block *nb,
|
|
+ unsigned long event, void *ptr)
|
|
+{
|
|
+ struct net_device *net_dev = (struct net_device *)ptr;
|
|
+ struct netback_accel *bend;
|
|
+
|
|
+ if ((event == NETDEV_UP) ||
|
|
+ (event == NETDEV_DOWN) ||
|
|
+ (event == NETDEV_CHANGE)) {
|
|
+ mutex_lock(&bend_list_mutex);
|
|
+ bend = bend_list;
|
|
+ while (bend != NULL) {
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+ /*
|
|
+ * This happens when the shared pages have
|
|
+ * been unmapped, but the bend not yet removed
|
|
+ * from list
|
|
+ */
|
|
+ if (bend->shared_page == NULL)
|
|
+ goto next;
|
|
+
|
|
+ if (bend->net_dev->ifindex == net_dev->ifindex) {
|
|
+ int ok;
|
|
+ if (event == NETDEV_CHANGE)
|
|
+ ok = (netif_carrier_ok(net_dev) &&
|
|
+ (net_dev->flags & IFF_UP));
|
|
+ else
|
|
+ ok = (netif_carrier_ok(net_dev) &&
|
|
+ (event == NETDEV_UP));
|
|
+ netback_accel_set_interface_state(bend, ok);
|
|
+ }
|
|
+
|
|
+ next:
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+ bend = bend->next_bend;
|
|
+ }
|
|
+ mutex_unlock(&bend_list_mutex);
|
|
+ }
|
|
+
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+
|
|
+
|
|
+static struct notifier_block netback_accel_netdev_notifier = {
|
|
+ .notifier_call = netback_accel_netdev_event,
|
|
+};
|
|
+
|
|
+
|
|
+unsigned sfc_netback_max_pages = NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES;
|
|
+module_param_named(max_pages, sfc_netback_max_pages, uint, 0644);
|
|
+MODULE_PARM_DESC(max_pages,
|
|
+ "The number of buffer pages to enforce on each guest");
|
|
+
|
|
+/* Initialise subsystems need for the accelerated fast path */
|
|
+static int __init netback_accel_init(void)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_init(THIS_MODULE);
|
|
+#endif
|
|
+
|
|
+ rc = netback_accel_init_fwd();
|
|
+ if (rc != 0)
|
|
+ goto fail0;
|
|
+
|
|
+ netback_accel_debugfs_init();
|
|
+
|
|
+ rc = netback_accel_sf_init();
|
|
+ if (rc != 0)
|
|
+ goto fail1;
|
|
+
|
|
+ rc = register_netdevice_notifier
|
|
+ (&netback_accel_netdev_notifier);
|
|
+ if (rc != 0)
|
|
+ goto fail2;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail2:
|
|
+ netback_accel_sf_shutdown();
|
|
+ fail1:
|
|
+ netback_accel_debugfs_fini();
|
|
+ netback_accel_shutdown_fwd();
|
|
+ fail0:
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_fini(THIS_MODULE);
|
|
+#endif
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+module_init(netback_accel_init);
|
|
+
|
|
+static void __exit netback_accel_exit(void)
|
|
+{
|
|
+ unregister_netdevice_notifier(&netback_accel_netdev_notifier);
|
|
+
|
|
+ netback_accel_sf_shutdown();
|
|
+
|
|
+ netback_accel_shutdown_bends();
|
|
+
|
|
+ netback_accel_debugfs_fini();
|
|
+
|
|
+ netback_accel_shutdown_fwd();
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_fini(THIS_MODULE);
|
|
+#endif
|
|
+}
|
|
+
|
|
+module_exit(netback_accel_exit);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel.h 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,391 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETBACK_ACCEL_H
|
|
+#define NETBACK_ACCEL_H
|
|
+
|
|
+#include <linux/slab.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <linux/udp.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/wait.h>
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#include "accel_shared_fifo.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+/**************************************************************************
|
|
+ * Datatypes
|
|
+ **************************************************************************/
|
|
+
|
|
+#define NETBACK_ACCEL_DEFAULT_MAX_FILTERS (8)
|
|
+#define NETBACK_ACCEL_DEFAULT_MAX_MCASTS (8)
|
|
+#define NETBACK_ACCEL_DEFAULT_MAX_BUF_PAGES (384)
|
|
+/* Variable to store module parameter for max_buf_pages */
|
|
+extern unsigned sfc_netback_max_pages;
|
|
+
|
|
+#define NETBACK_ACCEL_STATS 1
|
|
+
|
|
+#if NETBACK_ACCEL_STATS
|
|
+#define NETBACK_ACCEL_STATS_OP(x) x
|
|
+#else
|
|
+#define NETBACK_ACCEL_STATS_OP(x)
|
|
+#endif
|
|
+
|
|
+/*! Statistics for a given backend */
|
|
+struct netback_accel_stats {
|
|
+ /*! Number of eventq wakeup events */
|
|
+ u64 evq_wakeups;
|
|
+ /*! Number of eventq timeout events */
|
|
+ u64 evq_timeouts;
|
|
+ /*! Number of filters used */
|
|
+ u32 num_filters;
|
|
+ /*! Number of buffer pages registered */
|
|
+ u32 num_buffer_pages;
|
|
+};
|
|
+
|
|
+
|
|
+/* Debug fs nodes for each of the above stats */
|
|
+struct netback_accel_dbfs {
|
|
+ struct dentry *evq_wakeups;
|
|
+ struct dentry *evq_timeouts;
|
|
+ struct dentry *num_filters;
|
|
+ struct dentry *num_buffer_pages;
|
|
+};
|
|
+
|
|
+
|
|
+/*! Resource limits for a given NIC */
|
|
+struct netback_accel_limits {
|
|
+ int max_filters; /*!< Max. number of filters to use. */
|
|
+ int max_mcasts; /*!< Max. number of mcast subscriptions */
|
|
+ int max_buf_pages; /*!< Max. number of pages of NIC buffers */
|
|
+};
|
|
+
|
|
+
|
|
+/*! The state for an instance of the back end driver. */
|
|
+struct netback_accel {
|
|
+ /*! mutex to protect this state */
|
|
+ struct mutex bend_mutex;
|
|
+
|
|
+ /*! Watches on xenstore */
|
|
+ struct xenbus_watch domu_accel_watch;
|
|
+ struct xenbus_watch config_accel_watch;
|
|
+
|
|
+ /*! Pointer to whatever device cookie ties us in to the hypervisor */
|
|
+ void *hdev_data;
|
|
+
|
|
+ /*! FIFO indices. Next page is msg FIFOs */
|
|
+ struct net_accel_shared_page *shared_page;
|
|
+
|
|
+ /*! Defer control message processing */
|
|
+ struct work_struct handle_msg;
|
|
+
|
|
+ /*! Identifies other end VM and interface.*/
|
|
+ int far_end;
|
|
+ int vif_num;
|
|
+
|
|
+ /*!< To unmap the shared pages */
|
|
+ void *sh_pages_unmap;
|
|
+
|
|
+ /* Resource tracking */
|
|
+ /*! Limits on H/W & Dom0 resources */
|
|
+ struct netback_accel_limits quotas;
|
|
+
|
|
+ /* Hardware resources */
|
|
+ /*! The H/W type of associated NIC */
|
|
+ enum net_accel_hw_type hw_type;
|
|
+ /*! State of allocation */
|
|
+ int hw_state;
|
|
+ /*! How to set up the acceleration for this hardware */
|
|
+ int (*accel_setup)(struct netback_accel *);
|
|
+ /*! And how to stop it. */
|
|
+ void (*accel_shutdown)(struct netback_accel *);
|
|
+
|
|
+ /*! The physical/real net_dev for this interface */
|
|
+ struct net_device *net_dev;
|
|
+
|
|
+ /*! Magic pointer to locate state in fowarding table */
|
|
+ void *fwd_priv;
|
|
+
|
|
+ /*! Message FIFO */
|
|
+ sh_msg_fifo2 to_domU;
|
|
+ /*! Message FIFO */
|
|
+ sh_msg_fifo2 from_domU;
|
|
+
|
|
+ /*! General notification channel id */
|
|
+ int msg_channel;
|
|
+ /*! General notification channel irq */
|
|
+ int msg_channel_irq;
|
|
+
|
|
+ /*! Event channel id dedicated to network packet interrupts. */
|
|
+ int net_channel;
|
|
+ /*! Event channel irq dedicated to network packets interrupts */
|
|
+ int net_channel_irq;
|
|
+
|
|
+ /*! The MAC address the frontend goes by. */
|
|
+ u8 mac[ETH_ALEN];
|
|
+ /*! Driver name of associated NIC */
|
|
+ char *nicname;
|
|
+
|
|
+ /*! Array of pointers to buffer pages mapped */
|
|
+ grant_handle_t *buffer_maps;
|
|
+ u64 *buffer_addrs;
|
|
+ /*! Index into buffer_maps */
|
|
+ int buffer_maps_index;
|
|
+ /*! Max number of pages that domU is allowed/will request to map */
|
|
+ int max_pages;
|
|
+
|
|
+ /*! Pointer to hardware specific private area */
|
|
+ void *accel_hw_priv;
|
|
+
|
|
+ /*! Wait queue for changes in accelstate. */
|
|
+ wait_queue_head_t state_wait_queue;
|
|
+
|
|
+ /*! Current state of the frontend according to the xenbus
|
|
+ * watch. */
|
|
+ XenbusState frontend_state;
|
|
+
|
|
+ /*! Current state of this backend. */
|
|
+ XenbusState backend_state;
|
|
+
|
|
+ /*! Non-zero if the backend is being removed. */
|
|
+ int removing;
|
|
+
|
|
+ /*! Non-zero if the setup_vnic has been called. */
|
|
+ int vnic_is_setup;
|
|
+
|
|
+#if NETBACK_ACCEL_STATS
|
|
+ struct netback_accel_stats stats;
|
|
+#endif
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ char *dbfs_dir_name;
|
|
+ struct dentry *dbfs_dir;
|
|
+ struct netback_accel_dbfs dbfs;
|
|
+#endif
|
|
+
|
|
+ /*! List */
|
|
+ struct netback_accel *next_bend;
|
|
+};
|
|
+
|
|
+
|
|
+/*
|
|
+ * Values for netback_accel.hw_state. States of resource allocation
|
|
+ * we can go through
|
|
+ */
|
|
+/*! No hardware has yet been allocated. */
|
|
+#define NETBACK_ACCEL_RES_NONE (0)
|
|
+/*! Hardware has been allocated. */
|
|
+#define NETBACK_ACCEL_RES_ALLOC (1)
|
|
+#define NETBACK_ACCEL_RES_FILTER (2)
|
|
+#define NETBACK_ACCEL_RES_HWINFO (3)
|
|
+
|
|
+/*! Filtering specification. This assumes that for VNIC support we
|
|
+ * will always want wildcard entries, so only specifies the
|
|
+ * destination IP/port
|
|
+ */
|
|
+struct netback_accel_filter_spec {
|
|
+ /*! Internal, used to access efx_vi API */
|
|
+ void *filter_handle;
|
|
+
|
|
+ /*! Destination IP in network order */
|
|
+ u32 destip_be;
|
|
+ /*! Destination port in network order */
|
|
+ u16 destport_be;
|
|
+ /*! Mac address */
|
|
+ u8 mac[ETH_ALEN];
|
|
+ /*! TCP or UDP */
|
|
+ u8 proto;
|
|
+};
|
|
+
|
|
+
|
|
+/**************************************************************************
|
|
+ * From accel.c
|
|
+ **************************************************************************/
|
|
+
|
|
+/*! \brief Start up all the acceleration plugins
|
|
+ *
|
|
+ * \return 0 on success, an errno on failure
|
|
+ */
|
|
+extern int netback_accel_init_accel(void);
|
|
+
|
|
+/*! \brief Shut down all the acceleration plugins
|
|
+ */
|
|
+extern void netback_accel_shutdown_accel(void);
|
|
+
|
|
+
|
|
+/**************************************************************************
|
|
+ * From accel_fwd.c
|
|
+ **************************************************************************/
|
|
+
|
|
+/*! \brief Init the forwarding infrastructure
|
|
+ * \return 0 on success, or -ENOMEM if it couldn't get memory for the
|
|
+ * forward table
|
|
+ */
|
|
+extern int netback_accel_init_fwd(void);
|
|
+
|
|
+/*! \brief Shut down the forwarding and free memory. */
|
|
+extern void netback_accel_shutdown_fwd(void);
|
|
+
|
|
+/*! Initialise each nic port's fowarding table */
|
|
+extern void *netback_accel_init_fwd_port(void);
|
|
+extern void netback_accel_shutdown_fwd_port(void *fwd_priv);
|
|
+
|
|
+/*! \brief Add an entry to the forwarding table.
|
|
+ * \param mac : MAC address, used as hash key
|
|
+ * \param ctxt : value to associate with key (can be NULL, see
|
|
+ * netback_accel_fwd_set_context)
|
|
+ * \return 0 on success, -ENOMEM if table was full and could no grow it
|
|
+ */
|
|
+extern int netback_accel_fwd_add(const __u8 *mac, void *context,
|
|
+ void *fwd_priv);
|
|
+
|
|
+/*! \brief Remove an entry from the forwarding table.
|
|
+ * \param mac : the MAC address to remove
|
|
+ * \return nothing: it is not an error if the mac was not in the table
|
|
+ */
|
|
+extern void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv);
|
|
+
|
|
+/*! \brief Set the context pointer for an existing fwd table entry.
|
|
+ * \param mac : key that is already present in the table
|
|
+ * \param context : new value to associate with key
|
|
+ * \return 0 on success, -ENOENT if mac not present in table.
|
|
+ */
|
|
+extern int netback_accel_fwd_set_context(const __u8 *mac, void *context,
|
|
+ void *fwd_priv);
|
|
+
|
|
+/**************************************************************************
|
|
+ * From accel_msg.c
|
|
+ **************************************************************************/
|
|
+
|
|
+
|
|
+/*! \brief Send the start-of-day message that handshakes with the VNIC
|
|
+ * and tells it its MAC address.
|
|
+ *
|
|
+ * \param bend The back end driver data structure
|
|
+ * \param version The version of communication to use, e.g. NET_ACCEL_MSG_VERSION
|
|
+ */
|
|
+extern void netback_accel_msg_tx_hello(struct netback_accel *bend,
|
|
+ unsigned version);
|
|
+
|
|
+/*! \brief Send a "there's a new local mac address" message
|
|
+ *
|
|
+ * \param bend The back end driver data structure for the vnic to send
|
|
+ * the message to
|
|
+ * \param mac Pointer to the new mac address
|
|
+ */
|
|
+extern void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
|
|
+ const void *mac);
|
|
+
|
|
+/*! \brief Send a "a mac address that was local has gone away" message
|
|
+ *
|
|
+ * \param bend The back end driver data structure for the vnic to send
|
|
+ * the message to
|
|
+ * \param mac Pointer to the old mac address
|
|
+ */
|
|
+extern void netback_accel_msg_tx_old_localmac(struct netback_accel *bend,
|
|
+ const void *mac);
|
|
+
|
|
+extern void netback_accel_set_interface_state(struct netback_accel *bend,
|
|
+ int up);
|
|
+
|
|
+/*! \brief Process the message queue for a bend that has just
|
|
+ * interrupted.
|
|
+ *
|
|
+ * Demultiplexs an interrupt from the front end driver, taking
|
|
+ * messages from the fifo and taking appropriate action.
|
|
+ *
|
|
+ * \param bend The back end driver data structure
|
|
+ */
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+extern void netback_accel_msg_rx_handler(struct work_struct *arg);
|
|
+#else
|
|
+extern void netback_accel_msg_rx_handler(void *bend_void);
|
|
+#endif
|
|
+
|
|
+/**************************************************************************
|
|
+ * From accel_xenbus.c
|
|
+ **************************************************************************/
|
|
+/*! List of all the bends currently in existence. */
|
|
+extern struct netback_accel *bend_list;
|
|
+extern struct mutex bend_list_mutex;
|
|
+
|
|
+/*! \brief Probe a new network interface. */
|
|
+extern int netback_accel_probe(struct xenbus_device *dev);
|
|
+
|
|
+/*! \brief Remove a network interface. */
|
|
+extern int netback_accel_remove(struct xenbus_device *dev);
|
|
+
|
|
+/*! \brief Shutdown all accelerator backends */
|
|
+extern void netback_accel_shutdown_bends(void);
|
|
+
|
|
+/*! \brief Initiate the xenbus state teardown handshake */
|
|
+extern void netback_accel_set_closing(struct netback_accel *bend);
|
|
+
|
|
+/**************************************************************************
|
|
+ * From accel_debugfs.c
|
|
+ **************************************************************************/
|
|
+/*! Global statistics */
|
|
+struct netback_accel_global_stats {
|
|
+ /*! Number of TX packets seen through driverlink */
|
|
+ u64 dl_tx_packets;
|
|
+ /*! Number of TX packets seen through driverlink we didn't like */
|
|
+ u64 dl_tx_bad_packets;
|
|
+ /*! Number of RX packets seen through driverlink */
|
|
+ u64 dl_rx_packets;
|
|
+ /*! Number of mac addresses we are forwarding to */
|
|
+ u32 num_fwds;
|
|
+};
|
|
+
|
|
+/*! Debug fs entries for each of the above stats */
|
|
+struct netback_accel_global_dbfs {
|
|
+ struct dentry *dl_tx_packets;
|
|
+ struct dentry *dl_tx_bad_packets;
|
|
+ struct dentry *dl_rx_packets;
|
|
+ struct dentry *num_fwds;
|
|
+};
|
|
+
|
|
+#if NETBACK_ACCEL_STATS
|
|
+extern struct netback_accel_global_stats global_stats;
|
|
+#endif
|
|
+
|
|
+/*! \brief Initialise the debugfs root and populate with global stats */
|
|
+extern void netback_accel_debugfs_init(void);
|
|
+
|
|
+/*! \brief Remove our debugfs root directory */
|
|
+extern void netback_accel_debugfs_fini(void);
|
|
+
|
|
+/*! \brief Add per-bend statistics to debug fs */
|
|
+extern int netback_accel_debugfs_create(struct netback_accel *bend);
|
|
+/*! \brief Remove per-bend statistics from debug fs */
|
|
+extern int netback_accel_debugfs_remove(struct netback_accel *bend);
|
|
+
|
|
+#endif /* NETBACK_ACCEL_H */
|
|
+
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_debugfs.c 2008-02-26 10:54:11.000000000 +0100
|
|
@@ -0,0 +1,148 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/fs.h>
|
|
+#include <linux/debugfs.h>
|
|
+
|
|
+#include "accel.h"
|
|
+
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+static struct dentry *sfc_debugfs_root = NULL;
|
|
+#endif
|
|
+
|
|
+#if NETBACK_ACCEL_STATS
|
|
+struct netback_accel_global_stats global_stats;
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+static struct netback_accel_global_dbfs global_dbfs;
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+void netback_accel_debugfs_init(void)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ sfc_debugfs_root = debugfs_create_dir("sfc_netback", NULL);
|
|
+ if (sfc_debugfs_root == NULL)
|
|
+ return;
|
|
+
|
|
+ global_dbfs.num_fwds = debugfs_create_u32
|
|
+ ("num_fwds", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ sfc_debugfs_root, &global_stats.num_fwds);
|
|
+ global_dbfs.dl_tx_packets = debugfs_create_u64
|
|
+ ("dl_tx_packets", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ sfc_debugfs_root, &global_stats.dl_tx_packets);
|
|
+ global_dbfs.dl_rx_packets = debugfs_create_u64
|
|
+ ("dl_rx_packets", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ sfc_debugfs_root, &global_stats.dl_rx_packets);
|
|
+ global_dbfs.dl_tx_bad_packets = debugfs_create_u64
|
|
+ ("dl_tx_bad_packets", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ sfc_debugfs_root, &global_stats.dl_tx_bad_packets);
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_debugfs_fini(void)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ debugfs_remove(global_dbfs.num_fwds);
|
|
+ debugfs_remove(global_dbfs.dl_tx_packets);
|
|
+ debugfs_remove(global_dbfs.dl_rx_packets);
|
|
+ debugfs_remove(global_dbfs.dl_tx_bad_packets);
|
|
+
|
|
+ debugfs_remove(sfc_debugfs_root);
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_debugfs_create(struct netback_accel *bend)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ /* Smallest length is 7 (vif0.0\n) */
|
|
+ int length = 7, temp;
|
|
+
|
|
+ if (sfc_debugfs_root == NULL)
|
|
+ return -ENOENT;
|
|
+
|
|
+ /* Work out length of string representation of far_end and vif_num */
|
|
+ temp = bend->far_end;
|
|
+ while (temp > 9) {
|
|
+ length++;
|
|
+ temp = temp / 10;
|
|
+ }
|
|
+ temp = bend->vif_num;
|
|
+ while (temp > 9) {
|
|
+ length++;
|
|
+ temp = temp / 10;
|
|
+ }
|
|
+
|
|
+ bend->dbfs_dir_name = kmalloc(length, GFP_KERNEL);
|
|
+ if (bend->dbfs_dir_name == NULL)
|
|
+ return -ENOMEM;
|
|
+ sprintf(bend->dbfs_dir_name, "vif%d.%d", bend->far_end, bend->vif_num);
|
|
+
|
|
+ bend->dbfs_dir = debugfs_create_dir(bend->dbfs_dir_name,
|
|
+ sfc_debugfs_root);
|
|
+ if (bend->dbfs_dir == NULL) {
|
|
+ kfree(bend->dbfs_dir_name);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+#if NETBACK_ACCEL_STATS
|
|
+ bend->dbfs.evq_wakeups = debugfs_create_u64
|
|
+ ("evq_wakeups", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ bend->dbfs_dir, &bend->stats.evq_wakeups);
|
|
+ bend->dbfs.evq_timeouts = debugfs_create_u64
|
|
+ ("evq_timeouts", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ bend->dbfs_dir, &bend->stats.evq_timeouts);
|
|
+ bend->dbfs.num_filters = debugfs_create_u32
|
|
+ ("num_filters", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ bend->dbfs_dir, &bend->stats.num_filters);
|
|
+ bend->dbfs.num_buffer_pages = debugfs_create_u32
|
|
+ ("num_buffer_pages", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ bend->dbfs_dir, &bend->stats.num_buffer_pages);
|
|
+#endif
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_debugfs_remove(struct netback_accel *bend)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ if (bend->dbfs_dir != NULL) {
|
|
+#if NETBACK_ACCEL_STATS
|
|
+ debugfs_remove(bend->dbfs.evq_wakeups);
|
|
+ debugfs_remove(bend->dbfs.evq_timeouts);
|
|
+ debugfs_remove(bend->dbfs.num_filters);
|
|
+ debugfs_remove(bend->dbfs.num_buffer_pages);
|
|
+#endif
|
|
+ debugfs_remove(bend->dbfs_dir);
|
|
+ }
|
|
+
|
|
+ if (bend->dbfs_dir_name)
|
|
+ kfree(bend->dbfs_dir_name);
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_fwd.c 2008-04-02 12:34:02.000000000 +0200
|
|
@@ -0,0 +1,420 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_cuckoo_hash.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_solarflare.h"
|
|
+
|
|
+#include "driverlink_api.h"
|
|
+
|
|
+#include <linux/if_arp.h>
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+/* State stored in the forward table */
|
|
+struct fwd_struct {
|
|
+ struct list_head link; /* Forms list */
|
|
+ void * context;
|
|
+ __u8 valid;
|
|
+ __u8 mac[ETH_ALEN];
|
|
+};
|
|
+
|
|
+/* Max value we support */
|
|
+#define NUM_FWDS_BITS 8
|
|
+#define NUM_FWDS (1 << NUM_FWDS_BITS)
|
|
+#define FWD_MASK (NUM_FWDS - 1)
|
|
+
|
|
+struct port_fwd {
|
|
+ /* Make a list */
|
|
+ struct list_head link;
|
|
+ /* Hash table to store the fwd_structs */
|
|
+ cuckoo_hash_table fwd_hash_table;
|
|
+ /* The array of fwd_structs */
|
|
+ struct fwd_struct *fwd_array;
|
|
+ /* Linked list of entries in use. */
|
|
+ struct list_head fwd_list;
|
|
+ /* Could do something clever with a reader/writer lock. */
|
|
+ spinlock_t fwd_lock;
|
|
+ /* Make find_free_entry() a bit faster by caching this */
|
|
+ int last_free_index;
|
|
+};
|
|
+
|
|
+/*
|
|
+ * This is unlocked as it's only called from dl probe and remove,
|
|
+ * which are themselves synchronised. Could get rid of it entirely as
|
|
+ * it's never iterated, but useful for debug
|
|
+ */
|
|
+static struct list_head port_fwds;
|
|
+
|
|
+
|
|
+/* Search the fwd_array for an unused entry */
|
|
+static int fwd_find_free_entry(struct port_fwd *fwd_set)
|
|
+{
|
|
+ int index = fwd_set->last_free_index;
|
|
+
|
|
+ do {
|
|
+ if (!fwd_set->fwd_array[index].valid) {
|
|
+ fwd_set->last_free_index = index;
|
|
+ return index;
|
|
+ }
|
|
+ index++;
|
|
+ if (index >= NUM_FWDS)
|
|
+ index = 0;
|
|
+ } while (index != fwd_set->last_free_index);
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+
|
|
+/* Look up a MAC in the hash table. Caller should hold table lock. */
|
|
+static inline struct fwd_struct *fwd_find_entry(const __u8 *mac,
|
|
+ struct port_fwd *fwd_set)
|
|
+{
|
|
+ cuckoo_hash_value value;
|
|
+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
|
|
+
|
|
+ if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
|
|
+ (cuckoo_hash_key *)(&key),
|
|
+ &value)) {
|
|
+ struct fwd_struct *fwd = &fwd_set->fwd_array[value];
|
|
+ DPRINTK_ON(memcmp(fwd->mac, mac, ETH_ALEN) != 0);
|
|
+ return fwd;
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+
|
|
+/* Initialise each nic port's fowarding table */
|
|
+void *netback_accel_init_fwd_port(void)
|
|
+{
|
|
+ struct port_fwd *fwd_set;
|
|
+
|
|
+ fwd_set = kzalloc(sizeof(struct port_fwd), GFP_KERNEL);
|
|
+ if (fwd_set == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ spin_lock_init(&fwd_set->fwd_lock);
|
|
+
|
|
+ fwd_set->fwd_array = kzalloc(sizeof (struct fwd_struct) * NUM_FWDS,
|
|
+ GFP_KERNEL);
|
|
+ if (fwd_set->fwd_array == NULL) {
|
|
+ kfree(fwd_set);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ if (cuckoo_hash_init(&fwd_set->fwd_hash_table, NUM_FWDS_BITS, 8) != 0) {
|
|
+ kfree(fwd_set->fwd_array);
|
|
+ kfree(fwd_set);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ INIT_LIST_HEAD(&fwd_set->fwd_list);
|
|
+
|
|
+ list_add(&fwd_set->link, &port_fwds);
|
|
+
|
|
+ return fwd_set;
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_shutdown_fwd_port(void *fwd_priv)
|
|
+{
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ BUG_ON(list_empty(&port_fwds));
|
|
+ list_del(&fwd_set->link);
|
|
+
|
|
+ BUG_ON(!list_empty(&fwd_set->fwd_list));
|
|
+
|
|
+ cuckoo_hash_destroy(&fwd_set->fwd_hash_table);
|
|
+ kfree(fwd_set->fwd_array);
|
|
+ kfree(fwd_set);
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_init_fwd()
|
|
+{
|
|
+ INIT_LIST_HEAD(&port_fwds);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_shutdown_fwd()
|
|
+{
|
|
+ BUG_ON(!list_empty(&port_fwds));
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Add an entry to the forwarding table. Returns -ENOMEM if no
|
|
+ * space.
|
|
+ */
|
|
+int netback_accel_fwd_add(const __u8 *mac, void *context, void *fwd_priv)
|
|
+{
|
|
+ struct fwd_struct *fwd;
|
|
+ int rc = 0, index;
|
|
+ unsigned long flags;
|
|
+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac));
|
|
+
|
|
+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
|
|
+
|
|
+ if ((rc = fwd_find_free_entry(fwd_set)) < 0 ) {
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ index = rc;
|
|
+
|
|
+ /* Shouldn't already be in the table */
|
|
+ if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
|
|
+ (cuckoo_hash_key *)(&key), &rc) != 0) {
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+ EPRINTK("MAC address " MAC_FMT " already accelerated.\n",
|
|
+ MAC_ARG(mac));
|
|
+ return -EEXIST;
|
|
+ }
|
|
+
|
|
+ if ((rc = cuckoo_hash_add(&fwd_set->fwd_hash_table,
|
|
+ (cuckoo_hash_key *)(&key), index, 1)) == 0) {
|
|
+ fwd = &fwd_set->fwd_array[index];
|
|
+ fwd->valid = 1;
|
|
+ fwd->context = context;
|
|
+ memcpy(fwd->mac, mac, ETH_ALEN);
|
|
+ list_add(&fwd->link, &fwd_set->fwd_list);
|
|
+ NETBACK_ACCEL_STATS_OP(global_stats.num_fwds++);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * No need to tell frontend that this mac address is local -
|
|
+ * it should auto-discover through packets on fastpath what is
|
|
+ * local and what is not, and just being on same server
|
|
+ * doesn't make it local (it could be on a different
|
|
+ * bridge)
|
|
+ */
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/* remove an entry from the forwarding tables. */
|
|
+void netback_accel_fwd_remove(const __u8 *mac, void *fwd_priv)
|
|
+{
|
|
+ struct fwd_struct *fwd;
|
|
+ unsigned long flags;
|
|
+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+
|
|
+ DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac));
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
|
|
+
|
|
+ fwd = fwd_find_entry(mac, fwd_set);
|
|
+ if (fwd != NULL) {
|
|
+ BUG_ON(list_empty(&fwd_set->fwd_list));
|
|
+ list_del(&fwd->link);
|
|
+
|
|
+ fwd->valid = 0;
|
|
+ cuckoo_hash_remove(&fwd_set->fwd_hash_table,
|
|
+ (cuckoo_hash_key *)(&key));
|
|
+ NETBACK_ACCEL_STATS_OP(global_stats.num_fwds--);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * No need to tell frontend that this is no longer present -
|
|
+ * the frontend is currently only interested in remote
|
|
+ * addresses and it works these out (mostly) by itself
|
|
+ */
|
|
+}
|
|
+
|
|
+
|
|
+/* Set the context pointer for a hash table entry. */
|
|
+int netback_accel_fwd_set_context(const __u8 *mac, void *context,
|
|
+ void *fwd_priv)
|
|
+{
|
|
+ struct fwd_struct *fwd;
|
|
+ unsigned long flags;
|
|
+ int rc = -ENOENT;
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
|
|
+ fwd = fwd_find_entry(mac, fwd_set);
|
|
+ if (fwd != NULL) {
|
|
+ fwd->context = context;
|
|
+ rc = 0;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/**************************************************************************
|
|
+ * Process a received packet
|
|
+ **************************************************************************/
|
|
+
|
|
+/*
|
|
+ * Returns whether or not we have a match in our forward table for the
|
|
+ * this skb. Must be called with appropriate fwd_lock already held
|
|
+ */
|
|
+static struct netback_accel *for_a_vnic(struct netback_pkt_buf *skb,
|
|
+ struct port_fwd *fwd_set)
|
|
+{
|
|
+ struct fwd_struct *fwd;
|
|
+ struct netback_accel *retval = NULL;
|
|
+
|
|
+ fwd = fwd_find_entry(skb->mac.raw, fwd_set);
|
|
+ if (fwd != NULL)
|
|
+ retval = fwd->context;
|
|
+ return retval;
|
|
+}
|
|
+
|
|
+
|
|
+static inline int packet_is_arp_reply(struct sk_buff *skb)
|
|
+{
|
|
+ return skb->protocol == ntohs(ETH_P_ARP)
|
|
+ && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
|
|
+}
|
|
+
|
|
+
|
|
+static inline void hdr_to_filt(struct ethhdr *ethhdr, struct iphdr *ip,
|
|
+ struct netback_accel_filter_spec *spec)
|
|
+{
|
|
+ spec->proto = ip->protocol;
|
|
+ spec->destip_be = ip->daddr;
|
|
+ memcpy(spec->mac, ethhdr->h_source, ETH_ALEN);
|
|
+
|
|
+ if (ip->protocol == IPPROTO_TCP) {
|
|
+ struct tcphdr *tcp = (struct tcphdr *)((char *)ip + 4 * ip->ihl);
|
|
+ spec->destport_be = tcp->dest;
|
|
+ } else {
|
|
+ struct udphdr *udp = (struct udphdr *)((char *)ip + 4 * ip->ihl);
|
|
+ EPRINTK_ON(ip->protocol != IPPROTO_UDP);
|
|
+ spec->destport_be = udp->dest;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static inline int netback_accel_can_filter(struct netback_pkt_buf *skb)
|
|
+{
|
|
+ return (skb->protocol == htons(ETH_P_IP) &&
|
|
+ ((skb->nh.iph->protocol == IPPROTO_TCP) ||
|
|
+ (skb->nh.iph->protocol == IPPROTO_UDP)));
|
|
+}
|
|
+
|
|
+
|
|
+static inline void netback_accel_filter_packet(struct netback_accel *bend,
|
|
+ struct netback_pkt_buf *skb)
|
|
+{
|
|
+ struct netback_accel_filter_spec fs;
|
|
+ struct ethhdr *eh = (struct ethhdr *)(skb->mac.raw);
|
|
+
|
|
+ hdr_to_filt(eh, skb->nh.iph, &fs);
|
|
+
|
|
+ netback_accel_filter_check_add(bend, &fs);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Receive a packet and do something appropriate with it. Return true
|
|
+ * to take exclusive ownership of the packet. This is verging on
|
|
+ * solarflare specific
|
|
+ */
|
|
+void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv)
|
|
+{
|
|
+ struct netback_accel *bend;
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+ unsigned long flags;
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ /* Checking for bcast is cheaper so do that first */
|
|
+ if (is_broadcast_ether_addr(skb->mac.raw)) {
|
|
+ /* pass through the slow path by not claiming ownership */
|
|
+ return;
|
|
+ } else if (is_multicast_ether_addr(skb->mac.raw)) {
|
|
+ /* pass through the slow path by not claiming ownership */
|
|
+ return;
|
|
+ } else {
|
|
+ /* It is unicast */
|
|
+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
|
|
+ /* We insert filter to pass it off to a VNIC */
|
|
+ if ((bend = for_a_vnic(skb, fwd_set)) != NULL)
|
|
+ if (netback_accel_can_filter(skb))
|
|
+ netback_accel_filter_packet(bend, skb);
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+ }
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv)
|
|
+{
|
|
+ __u8 *mac;
|
|
+ unsigned long flags;
|
|
+ struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
|
|
+ struct fwd_struct *fwd;
|
|
+
|
|
+ BUG_ON(fwd_priv == NULL);
|
|
+
|
|
+ if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
|
|
+ /*
|
|
+ * update our fast path forwarding to reflect this
|
|
+ * gratuitous ARP
|
|
+ */
|
|
+ mac = skb->mac.raw+ETH_ALEN;
|
|
+
|
|
+ DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
|
|
+ __FUNCTION__, MAC_ARG(mac));
|
|
+
|
|
+ spin_lock_irqsave(&fwd_set->fwd_lock, flags);
|
|
+ /*
|
|
+ * Might not be local, but let's tell them all it is,
|
|
+ * and they can restore the fastpath if they continue
|
|
+ * to get packets that way
|
|
+ */
|
|
+ list_for_each_entry(fwd, &fwd_set->fwd_list, link) {
|
|
+ struct netback_accel *bend = fwd->context;
|
|
+ if (bend != NULL)
|
|
+ netback_accel_msg_tx_new_localmac(bend, mac);
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
|
|
+ }
|
|
+ return;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,392 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_solarflare.h"
|
|
+
|
|
+/* Send a HELLO to front end to start things off */
|
|
+void netback_accel_msg_tx_hello(struct netback_accel *bend, unsigned version)
|
|
+{
|
|
+ unsigned long lock_state;
|
|
+ struct net_accel_msg *msg =
|
|
+ net_accel_msg_start_send(bend->shared_page,
|
|
+ &bend->to_domU, &lock_state);
|
|
+ /* The queue _cannot_ be full, we're the first users. */
|
|
+ EPRINTK_ON(msg == NULL);
|
|
+
|
|
+ if (msg != NULL) {
|
|
+ net_accel_msg_init(msg, NET_ACCEL_MSG_HELLO);
|
|
+ msg->u.hello.version = version;
|
|
+ msg->u.hello.max_pages = bend->quotas.max_buf_pages;
|
|
+ VPRINTK("Sending hello to channel %d\n", bend->msg_channel);
|
|
+ net_accel_msg_complete_send_notify(bend->shared_page,
|
|
+ &bend->to_domU,
|
|
+ &lock_state,
|
|
+ bend->msg_channel_irq);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Send a local mac message to vnic */
|
|
+static void netback_accel_msg_tx_localmac(struct netback_accel *bend,
|
|
+ int type, const void *mac)
|
|
+{
|
|
+ unsigned long lock_state;
|
|
+ struct net_accel_msg *msg;
|
|
+
|
|
+ BUG_ON(bend == NULL || mac == NULL);
|
|
+
|
|
+ VPRINTK("Sending local mac message: " MAC_FMT "\n",
|
|
+ MAC_ARG((const char *)mac));
|
|
+
|
|
+ msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
|
|
+ &lock_state);
|
|
+
|
|
+ if (msg != NULL) {
|
|
+ net_accel_msg_init(msg, NET_ACCEL_MSG_LOCALMAC);
|
|
+ msg->u.localmac.flags = type;
|
|
+ memcpy(msg->u.localmac.mac, mac, ETH_ALEN);
|
|
+ net_accel_msg_complete_send_notify(bend->shared_page,
|
|
+ &bend->to_domU,
|
|
+ &lock_state,
|
|
+ bend->msg_channel_irq);
|
|
+ } else {
|
|
+ /*
|
|
+ * TODO if this happens we may leave a domU
|
|
+ * fastpathing packets when they should be delivered
|
|
+ * locally. Solution is get domU to timeout entries
|
|
+ * in its fastpath lookup table when it receives no RX
|
|
+ * traffic
|
|
+ */
|
|
+ EPRINTK("%s: saw full queue, may need ARP timer to recover\n",
|
|
+ __FUNCTION__);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Send an add local mac message to vnic */
|
|
+void netback_accel_msg_tx_new_localmac(struct netback_accel *bend,
|
|
+ const void *mac)
|
|
+{
|
|
+ netback_accel_msg_tx_localmac(bend, NET_ACCEL_MSG_ADD, mac);
|
|
+}
|
|
+
|
|
+
|
|
+static int netback_accel_msg_rx_buffer_map(struct netback_accel *bend,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int log2_pages, rc;
|
|
+
|
|
+ /* Can only allocate in power of two */
|
|
+ log2_pages = log2_ge(msg->u.mapbufs.pages, 0);
|
|
+ if (msg->u.mapbufs.pages != pow2(log2_pages)) {
|
|
+ EPRINTK("%s: Can only alloc bufs in power of 2 sizes (%d)\n",
|
|
+ __FUNCTION__, msg->u.mapbufs.pages);
|
|
+ rc = -EINVAL;
|
|
+ goto err_out;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Sanity. Assumes NET_ACCEL_MSG_MAX_PAGE_REQ is same for
|
|
+ * both directions/domains
|
|
+ */
|
|
+ if (msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ) {
|
|
+ EPRINTK("%s: too many pages in a single message: %d %d\n",
|
|
+ __FUNCTION__, msg->u.mapbufs.pages,
|
|
+ NET_ACCEL_MSG_MAX_PAGE_REQ);
|
|
+ rc = -EINVAL;
|
|
+ goto err_out;
|
|
+ }
|
|
+
|
|
+ if ((rc = netback_accel_add_buffers(bend, msg->u.mapbufs.pages,
|
|
+ log2_pages, msg->u.mapbufs.grants,
|
|
+ &msg->u.mapbufs.buf)) < 0) {
|
|
+ goto err_out;
|
|
+ }
|
|
+
|
|
+ msg->id |= NET_ACCEL_MSG_REPLY;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ err_out:
|
|
+ EPRINTK("%s: err_out\n", __FUNCTION__);
|
|
+ msg->id |= NET_ACCEL_MSG_ERROR | NET_ACCEL_MSG_REPLY;
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/* Hint from frontend that one of our filters is out of date */
|
|
+static int netback_accel_process_fastpath(struct netback_accel *bend,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ struct netback_accel_filter_spec spec;
|
|
+
|
|
+ if (msg->u.fastpath.flags & NET_ACCEL_MSG_REMOVE) {
|
|
+ /*
|
|
+ * Would be nice to BUG() this but would leave us
|
|
+ * vulnerable to naughty frontend
|
|
+ */
|
|
+ EPRINTK_ON(msg->u.fastpath.flags & NET_ACCEL_MSG_ADD);
|
|
+
|
|
+ memcpy(spec.mac, msg->u.fastpath.mac, ETH_ALEN);
|
|
+ spec.destport_be = msg->u.fastpath.port;
|
|
+ spec.destip_be = msg->u.fastpath.ip;
|
|
+ spec.proto = msg->u.fastpath.proto;
|
|
+
|
|
+ netback_accel_filter_remove_spec(bend, &spec);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* Flow control for message queues */
|
|
+inline void set_queue_not_full(struct netback_accel *bend)
|
|
+{
|
|
+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
|
|
+ (unsigned long *)&bend->shared_page->aflags))
|
|
+ notify_remote_via_irq(bend->msg_channel_irq);
|
|
+ else
|
|
+ VPRINTK("queue not full bit already set, not signalling\n");
|
|
+}
|
|
+
|
|
+
|
|
+/* Flow control for message queues */
|
|
+inline void set_queue_full(struct netback_accel *bend)
|
|
+{
|
|
+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
|
|
+ (unsigned long *)&bend->shared_page->aflags))
|
|
+ notify_remote_via_irq(bend->msg_channel_irq);
|
|
+ else
|
|
+ VPRINTK("queue full bit already set, not signalling\n");
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_set_interface_state(struct netback_accel *bend, int up)
|
|
+{
|
|
+ bend->shared_page->net_dev_up = up;
|
|
+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
|
|
+ (unsigned long *)&bend->shared_page->aflags))
|
|
+ notify_remote_via_irq(bend->msg_channel_irq);
|
|
+ else
|
|
+ VPRINTK("interface up/down bit already set, not signalling\n");
|
|
+}
|
|
+
|
|
+
|
|
+static int check_rx_hello_version(unsigned version)
|
|
+{
|
|
+ /* Should only happen if there's been a version mismatch */
|
|
+ BUG_ON(version == NET_ACCEL_MSG_VERSION);
|
|
+
|
|
+ if (version > NET_ACCEL_MSG_VERSION) {
|
|
+ /* Newer protocol, we must refuse */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+
|
|
+ if (version < NET_ACCEL_MSG_VERSION) {
|
|
+ /*
|
|
+ * We are newer, so have discretion to accept if we
|
|
+ * wish. For now however, just reject
|
|
+ */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+
|
|
+static int process_rx_msg(struct netback_accel *bend,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int err = 0;
|
|
+
|
|
+ switch (msg->id) {
|
|
+ case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO:
|
|
+ /* Reply to a HELLO; mark ourselves as connected */
|
|
+ DPRINTK("got Hello reply, version %.8x\n",
|
|
+ msg->u.hello.version);
|
|
+
|
|
+ /*
|
|
+ * Check that we've not successfully done this
|
|
+ * already. NB no check at the moment that this reply
|
|
+ * comes after we've actually sent a HELLO as that's
|
|
+ * not possible with the current code structure
|
|
+ */
|
|
+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
|
|
+ return -EPROTO;
|
|
+
|
|
+ /* Store max_pages for accel_setup */
|
|
+ if (msg->u.hello.max_pages > bend->quotas.max_buf_pages) {
|
|
+ EPRINTK("More pages than quota allows (%d > %d)\n",
|
|
+ msg->u.hello.max_pages,
|
|
+ bend->quotas.max_buf_pages);
|
|
+ /* Force it down to the quota */
|
|
+ msg->u.hello.max_pages = bend->quotas.max_buf_pages;
|
|
+ }
|
|
+ bend->max_pages = msg->u.hello.max_pages;
|
|
+
|
|
+ /* Set up the hardware visible to the other end */
|
|
+ err = bend->accel_setup(bend);
|
|
+ if (err) {
|
|
+ /* This is fatal */
|
|
+ DPRINTK("Hello gave accel_setup error %d\n", err);
|
|
+ netback_accel_set_closing(bend);
|
|
+ } else {
|
|
+ /*
|
|
+ * Now add the context so that packet
|
|
+ * forwarding will commence
|
|
+ */
|
|
+ netback_accel_fwd_set_context(bend->mac, bend,
|
|
+ bend->fwd_priv);
|
|
+ }
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_ERROR:
|
|
+ EPRINTK("got Hello error, versions us:%.8x them:%.8x\n",
|
|
+ NET_ACCEL_MSG_VERSION, msg->u.hello.version);
|
|
+
|
|
+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE)
|
|
+ return -EPROTO;
|
|
+
|
|
+ if (msg->u.hello.version != NET_ACCEL_MSG_VERSION) {
|
|
+ /* Error is due to version mismatch */
|
|
+ err = check_rx_hello_version(msg->u.hello.version);
|
|
+ if (err == 0) {
|
|
+ /*
|
|
+ * It's OK to be compatible, send
|
|
+ * another hello with compatible version
|
|
+ */
|
|
+ netback_accel_msg_tx_hello
|
|
+ (bend, msg->u.hello.version);
|
|
+ } else {
|
|
+ /*
|
|
+ * Tell frontend that we're not going to
|
|
+ * send another HELLO by going to Closing.
|
|
+ */
|
|
+ netback_accel_set_closing(bend);
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_MAPBUF:
|
|
+ VPRINTK("Got mapped buffers request %d\n",
|
|
+ msg->u.mapbufs.reqid);
|
|
+
|
|
+ if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
|
|
+ return -EPROTO;
|
|
+
|
|
+ /*
|
|
+ * Frontend wants a buffer table entry for the
|
|
+ * supplied pages
|
|
+ */
|
|
+ err = netback_accel_msg_rx_buffer_map(bend, msg);
|
|
+ if (net_accel_msg_reply_notify(bend->shared_page,
|
|
+ bend->msg_channel_irq,
|
|
+ &bend->to_domU, msg)) {
|
|
+ /*
|
|
+ * This is fatal as we can't tell the frontend
|
|
+ * about the problem through the message
|
|
+ * queue, and so would otherwise stalemate
|
|
+ */
|
|
+ netback_accel_set_closing(bend);
|
|
+ }
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_FASTPATH:
|
|
+ DPRINTK("Got fastpath request\n");
|
|
+
|
|
+ if (bend->hw_state == NETBACK_ACCEL_RES_NONE)
|
|
+ return -EPROTO;
|
|
+
|
|
+ err = netback_accel_process_fastpath(bend, msg);
|
|
+ break;
|
|
+ default:
|
|
+ EPRINTK("Huh? Message code is %x\n", msg->id);
|
|
+ err = -EPROTO;
|
|
+ break;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Demultiplex an IRQ from the frontend driver. */
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+void netback_accel_msg_rx_handler(struct work_struct *arg)
|
|
+#else
|
|
+void netback_accel_msg_rx_handler(void *bend_void)
|
|
+#endif
|
|
+{
|
|
+ struct net_accel_msg msg;
|
|
+ int err, queue_was_full = 0;
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ struct netback_accel *bend =
|
|
+ container_of(arg, struct netback_accel, handle_msg);
|
|
+#else
|
|
+ struct netback_accel *bend = (struct netback_accel *)bend_void;
|
|
+#endif
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+
|
|
+ /*
|
|
+ * This happens when the shared pages have been unmapped, but
|
|
+ * the workqueue not flushed yet
|
|
+ */
|
|
+ if (bend->shared_page == NULL)
|
|
+ goto done;
|
|
+
|
|
+ if ((bend->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK) != 0) {
|
|
+ if (bend->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL) {
|
|
+ /* We've been told there may now be space. */
|
|
+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
|
|
+ (unsigned long *)&bend->shared_page->aflags);
|
|
+ }
|
|
+
|
|
+ if (bend->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) {
|
|
+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
|
|
+ (unsigned long *)&bend->shared_page->aflags);
|
|
+ queue_was_full = 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ while ((err = net_accel_msg_recv(bend->shared_page, &bend->from_domU,
|
|
+ &msg)) == 0) {
|
|
+ err = process_rx_msg(bend, &msg);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: Error %d\n", __FUNCTION__, err);
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err:
|
|
+ /* There will be space now if we can make any. */
|
|
+ if (queue_was_full)
|
|
+ set_queue_not_full(bend);
|
|
+ done:
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+
|
|
+ return;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_solarflare.c 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,1293 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_solarflare.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+#include "accel_cuckoo_hash.h"
|
|
+
|
|
+#include "ci/driver/resource/efx_vi.h"
|
|
+
|
|
+#include "ci/efrm/nic_table.h"
|
|
+#include "ci/efhw/public.h"
|
|
+
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/mutex.h>
|
|
+
|
|
+#include "driverlink_api.h"
|
|
+
|
|
+#define SF_XEN_RX_USR_BUF_SIZE 2048
|
|
+
|
|
+struct falcon_bend_accel_priv {
|
|
+ struct efx_vi_state *efx_vih;
|
|
+
|
|
+ /*! Array of pointers to dma_map state, used so VNIC can
|
|
+ * request their removal in a single message
|
|
+ */
|
|
+ struct efx_vi_dma_map_state **dma_maps;
|
|
+ /*! Index into dma_maps */
|
|
+ int dma_maps_index;
|
|
+
|
|
+ /*! Serialises access to filters */
|
|
+ spinlock_t filter_lock;
|
|
+ /*! Bitmap of which filters are free */
|
|
+ unsigned long free_filters;
|
|
+ /*! Used for index normalisation */
|
|
+ u32 filter_idx_mask;
|
|
+ struct netback_accel_filter_spec *fspecs;
|
|
+ cuckoo_hash_table filter_hash_table;
|
|
+
|
|
+ u32 txdmaq_gnt;
|
|
+ u32 rxdmaq_gnt;
|
|
+ u32 doorbell_gnt;
|
|
+ u32 evq_rptr_gnt;
|
|
+ u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
|
|
+ u32 evq_npages;
|
|
+};
|
|
+
|
|
+/* Forward declaration */
|
|
+static int netback_accel_filter_init(struct netback_accel *);
|
|
+static void netback_accel_filter_shutdown(struct netback_accel *);
|
|
+
|
|
+/**************************************************************************
|
|
+ *
|
|
+ * Driverlink stuff
|
|
+ *
|
|
+ **************************************************************************/
|
|
+
|
|
+struct driverlink_port {
|
|
+ struct list_head link;
|
|
+ enum net_accel_hw_type type;
|
|
+ struct net_device *net_dev;
|
|
+ struct efx_dl_device *efx_dl_dev;
|
|
+ void *fwd_priv;
|
|
+};
|
|
+
|
|
+static struct list_head dl_ports;
|
|
+
|
|
+/* This mutex protects global state, such as the dl_ports list */
|
|
+DEFINE_MUTEX(accel_mutex);
|
|
+
|
|
+static int init_done = 0;
|
|
+
|
|
+/* The DL callbacks */
|
|
+
|
|
+
|
|
+#if defined(EFX_USE_FASTCALL)
|
|
+static enum efx_veto fastcall
|
|
+#else
|
|
+static enum efx_veto
|
|
+#endif
|
|
+bend_dl_tx_packet(struct efx_dl_device *efx_dl_dev,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ struct driverlink_port *port = efx_dl_dev->priv;
|
|
+
|
|
+ BUG_ON(port == NULL);
|
|
+
|
|
+ NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
|
|
+ if (skb->mac.raw != NULL)
|
|
+ netback_accel_tx_packet(skb, port->fwd_priv);
|
|
+ else {
|
|
+ DPRINTK("Ignoring packet with missing mac address\n");
|
|
+ NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_bad_packets++);
|
|
+ }
|
|
+ return EFX_ALLOW_PACKET;
|
|
+}
|
|
+
|
|
+/* EFX_USE_FASTCALL */
|
|
+#if defined(EFX_USE_FASTCALL)
|
|
+static enum efx_veto fastcall
|
|
+#else
|
|
+static enum efx_veto
|
|
+#endif
|
|
+bend_dl_rx_packet(struct efx_dl_device *efx_dl_dev,
|
|
+ const char *pkt_buf, int pkt_len)
|
|
+{
|
|
+ struct driverlink_port *port = efx_dl_dev->priv;
|
|
+ struct netback_pkt_buf pkt;
|
|
+ struct ethhdr *eh;
|
|
+
|
|
+ BUG_ON(port == NULL);
|
|
+
|
|
+ pkt.mac.raw = (char *)pkt_buf;
|
|
+ pkt.nh.raw = (char *)pkt_buf + ETH_HLEN;
|
|
+ eh = (struct ethhdr *)pkt_buf;
|
|
+ pkt.protocol = eh->h_proto;
|
|
+
|
|
+ NETBACK_ACCEL_STATS_OP(global_stats.dl_rx_packets++);
|
|
+ netback_accel_rx_packet(&pkt, port->fwd_priv);
|
|
+ return EFX_ALLOW_PACKET;
|
|
+}
|
|
+
|
|
+
|
|
+/* Callbacks we'd like to get from the netdriver through driverlink */
|
|
+struct efx_dl_callbacks bend_dl_callbacks =
|
|
+ {
|
|
+ .tx_packet = bend_dl_tx_packet,
|
|
+ .rx_packet = bend_dl_rx_packet,
|
|
+ };
|
|
+
|
|
+
|
|
+static struct netback_accel_hooks accel_hooks = {
|
|
+ THIS_MODULE,
|
|
+ &netback_accel_probe,
|
|
+ &netback_accel_remove
|
|
+};
|
|
+
|
|
+
|
|
+/* Driver link probe - register our callbacks */
|
|
+static int bend_dl_probe(struct efx_dl_device *efx_dl_dev,
|
|
+ const struct net_device *net_dev,
|
|
+ const struct efx_dl_device_info *dev_info,
|
|
+ const char* silicon_rev)
|
|
+{
|
|
+ int rc;
|
|
+ enum net_accel_hw_type type;
|
|
+ struct driverlink_port *port;
|
|
+
|
|
+ DPRINTK("%s: %s\n", __FUNCTION__, silicon_rev);
|
|
+
|
|
+ if (strcmp(silicon_rev, "falcon/a1") == 0)
|
|
+ type = NET_ACCEL_MSG_HWTYPE_FALCON_A;
|
|
+ else if (strcmp(silicon_rev, "falcon/b0") == 0)
|
|
+ type = NET_ACCEL_MSG_HWTYPE_FALCON_B;
|
|
+ else if (strcmp(silicon_rev, "siena/a0") == 0)
|
|
+ type = NET_ACCEL_MSG_HWTYPE_SIENA_A;
|
|
+ else {
|
|
+ EPRINTK("%s: unsupported silicon %s\n", __FUNCTION__,
|
|
+ silicon_rev);
|
|
+ rc = -EINVAL;
|
|
+ goto fail1;
|
|
+ }
|
|
+
|
|
+ port = kmalloc(sizeof(struct driverlink_port), GFP_KERNEL);
|
|
+ if (port == NULL) {
|
|
+ EPRINTK("%s: no memory for dl probe\n", __FUNCTION__);
|
|
+ rc = -ENOMEM;
|
|
+ goto fail1;
|
|
+ }
|
|
+
|
|
+ port->efx_dl_dev = efx_dl_dev;
|
|
+ efx_dl_dev->priv = port;
|
|
+
|
|
+ port->fwd_priv = netback_accel_init_fwd_port();
|
|
+ if (port->fwd_priv == NULL) {
|
|
+ EPRINTK("%s: failed to set up forwarding for port\n",
|
|
+ __FUNCTION__);
|
|
+ rc = -ENOMEM;
|
|
+ goto fail2;
|
|
+ }
|
|
+
|
|
+ rc = efx_dl_register_callbacks(efx_dl_dev, &bend_dl_callbacks);
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("%s: register_callbacks failed\n", __FUNCTION__);
|
|
+ goto fail3;
|
|
+ }
|
|
+
|
|
+ port->type = type;
|
|
+ port->net_dev = (struct net_device *)net_dev;
|
|
+
|
|
+ mutex_lock(&accel_mutex);
|
|
+ list_add(&port->link, &dl_ports);
|
|
+ mutex_unlock(&accel_mutex);
|
|
+
|
|
+ rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0,
|
|
+ port->net_dev->name, &accel_hooks);
|
|
+
|
|
+ if (rc < 0) {
|
|
+ EPRINTK("Xen netback accelerator version mismatch\n");
|
|
+ goto fail4;
|
|
+ } else if (rc > 0) {
|
|
+ /*
|
|
+ * In future may want to add backwards compatibility
|
|
+ * and accept certain subsets of previous versions
|
|
+ */
|
|
+ EPRINTK("Xen netback accelerator version mismatch\n");
|
|
+ goto fail4;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail4:
|
|
+ mutex_lock(&accel_mutex);
|
|
+ list_del(&port->link);
|
|
+ mutex_unlock(&accel_mutex);
|
|
+
|
|
+ efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
|
|
+ fail3:
|
|
+ netback_accel_shutdown_fwd_port(port->fwd_priv);
|
|
+ fail2:
|
|
+ efx_dl_dev->priv = NULL;
|
|
+ kfree(port);
|
|
+ fail1:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+static void bend_dl_remove(struct efx_dl_device *efx_dl_dev)
|
|
+{
|
|
+ struct driverlink_port *port;
|
|
+
|
|
+ DPRINTK("Unregistering driverlink callbacks.\n");
|
|
+
|
|
+ mutex_lock(&accel_mutex);
|
|
+
|
|
+ port = (struct driverlink_port *)efx_dl_dev->priv;
|
|
+
|
|
+ BUG_ON(list_empty(&dl_ports));
|
|
+ BUG_ON(port == NULL);
|
|
+ BUG_ON(port->efx_dl_dev != efx_dl_dev);
|
|
+
|
|
+ netback_disconnect_accelerator(0, port->net_dev->name);
|
|
+
|
|
+ list_del(&port->link);
|
|
+
|
|
+ mutex_unlock(&accel_mutex);
|
|
+
|
|
+ efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
|
|
+ netback_accel_shutdown_fwd_port(port->fwd_priv);
|
|
+
|
|
+ efx_dl_dev->priv = NULL;
|
|
+ kfree(port);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static void bend_dl_reset_suspend(struct efx_dl_device *efx_dl_dev)
|
|
+{
|
|
+ struct driverlink_port *port;
|
|
+
|
|
+ DPRINTK("Driverlink reset suspend.\n");
|
|
+
|
|
+ mutex_lock(&accel_mutex);
|
|
+
|
|
+ port = (struct driverlink_port *)efx_dl_dev->priv;
|
|
+ BUG_ON(list_empty(&dl_ports));
|
|
+ BUG_ON(port == NULL);
|
|
+ BUG_ON(port->efx_dl_dev != efx_dl_dev);
|
|
+
|
|
+ netback_disconnect_accelerator(0, port->net_dev->name);
|
|
+ mutex_unlock(&accel_mutex);
|
|
+}
|
|
+
|
|
+
|
|
+static void bend_dl_reset_resume(struct efx_dl_device *efx_dl_dev, int ok)
|
|
+{
|
|
+ int rc;
|
|
+ struct driverlink_port *port;
|
|
+
|
|
+ DPRINTK("Driverlink reset resume.\n");
|
|
+
|
|
+ if (!ok)
|
|
+ return;
|
|
+
|
|
+ port = (struct driverlink_port *)efx_dl_dev->priv;
|
|
+ BUG_ON(list_empty(&dl_ports));
|
|
+ BUG_ON(port == NULL);
|
|
+ BUG_ON(port->efx_dl_dev != efx_dl_dev);
|
|
+
|
|
+ rc = netback_connect_accelerator(NETBACK_ACCEL_VERSION, 0,
|
|
+ port->net_dev->name, &accel_hooks);
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("Xen netback accelerator version mismatch\n");
|
|
+
|
|
+ mutex_lock(&accel_mutex);
|
|
+ list_del(&port->link);
|
|
+ mutex_unlock(&accel_mutex);
|
|
+
|
|
+ efx_dl_unregister_callbacks(efx_dl_dev, &bend_dl_callbacks);
|
|
+
|
|
+ netback_accel_shutdown_fwd_port(port->fwd_priv);
|
|
+
|
|
+ efx_dl_dev->priv = NULL;
|
|
+ kfree(port);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static struct efx_dl_driver bend_dl_driver =
|
|
+ {
|
|
+ .name = "SFC Xen backend",
|
|
+ .probe = bend_dl_probe,
|
|
+ .remove = bend_dl_remove,
|
|
+ .reset_suspend = bend_dl_reset_suspend,
|
|
+ .reset_resume = bend_dl_reset_resume
|
|
+ };
|
|
+
|
|
+
|
|
+int netback_accel_sf_init(void)
|
|
+{
|
|
+ int rc, nic_i;
|
|
+ struct efhw_nic *nic;
|
|
+
|
|
+ INIT_LIST_HEAD(&dl_ports);
|
|
+
|
|
+ rc = efx_dl_register_driver(&bend_dl_driver);
|
|
+ /* If we couldn't find the NET driver, give up */
|
|
+ if (rc == -ENOENT)
|
|
+ return rc;
|
|
+
|
|
+ if (rc == 0) {
|
|
+ EFRM_FOR_EACH_NIC(nic_i, nic)
|
|
+ falcon_nic_set_rx_usr_buf_size(nic,
|
|
+ SF_XEN_RX_USR_BUF_SIZE);
|
|
+ }
|
|
+
|
|
+ init_done = (rc == 0);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_sf_shutdown(void)
|
|
+{
|
|
+ if (!init_done)
|
|
+ return;
|
|
+ DPRINTK("Unregistering driverlink driver\n");
|
|
+
|
|
+ /*
|
|
+ * This will trigger removal callbacks for all the devices, which
|
|
+ * will unregister their callbacks, disconnect from netfront, etc.
|
|
+ */
|
|
+ efx_dl_unregister_driver(&bend_dl_driver);
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_sf_hwtype(struct netback_accel *bend)
|
|
+{
|
|
+ struct driverlink_port *port;
|
|
+
|
|
+ mutex_lock(&accel_mutex);
|
|
+
|
|
+ list_for_each_entry(port, &dl_ports, link) {
|
|
+ if (strcmp(bend->nicname, port->net_dev->name) == 0) {
|
|
+ bend->hw_type = port->type;
|
|
+ bend->accel_setup = netback_accel_setup_vnic_hw;
|
|
+ bend->accel_shutdown = netback_accel_shutdown_vnic_hw;
|
|
+ bend->fwd_priv = port->fwd_priv;
|
|
+ bend->net_dev = port->net_dev;
|
|
+ mutex_unlock(&accel_mutex);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&accel_mutex);
|
|
+
|
|
+ EPRINTK("Failed to identify backend device '%s' with a NIC\n",
|
|
+ bend->nicname);
|
|
+
|
|
+ return -ENOENT;
|
|
+}
|
|
+
|
|
+
|
|
+/****************************************************************************
|
|
+ * Resource management code
|
|
+ ***************************************************************************/
|
|
+
|
|
+static int alloc_page_state(struct netback_accel *bend, int max_pages)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv;
|
|
+
|
|
+ if (max_pages < 0 || max_pages > bend->quotas.max_buf_pages) {
|
|
+ EPRINTK("%s: invalid max_pages: %d\n", __FUNCTION__, max_pages);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv = kzalloc(sizeof(struct falcon_bend_accel_priv),
|
|
+ GFP_KERNEL);
|
|
+ if (accel_hw_priv == NULL) {
|
|
+ EPRINTK("%s: no memory for accel_hw_priv\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv->dma_maps = kzalloc
|
|
+ (sizeof(struct efx_vi_dma_map_state **) *
|
|
+ (max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ), GFP_KERNEL);
|
|
+ if (accel_hw_priv->dma_maps == NULL) {
|
|
+ EPRINTK("%s: no memory for dma_maps\n", __FUNCTION__);
|
|
+ kfree(accel_hw_priv);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ bend->buffer_maps = kzalloc(sizeof(struct vm_struct *) * max_pages,
|
|
+ GFP_KERNEL);
|
|
+ if (bend->buffer_maps == NULL) {
|
|
+ EPRINTK("%s: no memory for buffer_maps\n", __FUNCTION__);
|
|
+ kfree(accel_hw_priv->dma_maps);
|
|
+ kfree(accel_hw_priv);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ bend->buffer_addrs = kzalloc(sizeof(u64) * max_pages, GFP_KERNEL);
|
|
+ if (bend->buffer_addrs == NULL) {
|
|
+ kfree(bend->buffer_maps);
|
|
+ kfree(accel_hw_priv->dma_maps);
|
|
+ kfree(accel_hw_priv);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ bend->accel_hw_priv = accel_hw_priv;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int free_page_state(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv;
|
|
+
|
|
+ DPRINTK("%s: %p\n", __FUNCTION__, bend);
|
|
+
|
|
+ accel_hw_priv = bend->accel_hw_priv;
|
|
+
|
|
+ if (accel_hw_priv) {
|
|
+ kfree(accel_hw_priv->dma_maps);
|
|
+ kfree(bend->buffer_maps);
|
|
+ kfree(bend->buffer_addrs);
|
|
+ kfree(accel_hw_priv);
|
|
+ bend->accel_hw_priv = NULL;
|
|
+ bend->max_pages = 0;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* The timeout event callback for the event q */
|
|
+static void bend_evq_timeout(void *context, int is_timeout)
|
|
+{
|
|
+ struct netback_accel *bend = (struct netback_accel *)context;
|
|
+ if (is_timeout) {
|
|
+ /* Pass event to vnic front end driver */
|
|
+ VPRINTK("timeout event to %d\n", bend->net_channel);
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.evq_timeouts++);
|
|
+ notify_remote_via_irq(bend->net_channel_irq);
|
|
+ } else {
|
|
+ /* It's a wakeup event, used by Falcon */
|
|
+ VPRINTK("wakeup to %d\n", bend->net_channel);
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.evq_wakeups++);
|
|
+ notify_remote_via_irq(bend->net_channel_irq);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Create the eventq and associated gubbins for communication with the
|
|
+ * front end vnic driver
|
|
+ */
|
|
+static int ef_get_vnic(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv;
|
|
+ int rc = 0;
|
|
+
|
|
+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_NONE);
|
|
+
|
|
+ /* Allocate page related state and accel_hw_priv */
|
|
+ rc = alloc_page_state(bend, bend->max_pages);
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("Failed to allocate page state: %d\n", rc);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv = bend->accel_hw_priv;
|
|
+
|
|
+ rc = efx_vi_alloc(&accel_hw_priv->efx_vih, bend->net_dev->ifindex);
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("%s: efx_vi_alloc failed %d\n", __FUNCTION__, rc);
|
|
+ free_page_state(bend);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ rc = efx_vi_eventq_register_callback(accel_hw_priv->efx_vih,
|
|
+ bend_evq_timeout,
|
|
+ bend);
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("%s: register_callback failed %d\n", __FUNCTION__, rc);
|
|
+ efx_vi_free(accel_hw_priv->efx_vih);
|
|
+ free_page_state(bend);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void ef_free_vnic(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+
|
|
+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
|
|
+
|
|
+ efx_vi_eventq_kill_callback(accel_hw_priv->efx_vih);
|
|
+
|
|
+ DPRINTK("Hardware is freeable. Will proceed.\n");
|
|
+
|
|
+ efx_vi_free(accel_hw_priv->efx_vih);
|
|
+ accel_hw_priv->efx_vih = NULL;
|
|
+
|
|
+ VPRINTK("Free page state...\n");
|
|
+ free_page_state(bend);
|
|
+
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_NONE;
|
|
+}
|
|
+
|
|
+
|
|
+static inline void ungrant_or_crash(grant_ref_t gntref, int domain) {
|
|
+ if (net_accel_ungrant_page(gntref) == -EBUSY)
|
|
+ net_accel_shutdown_remote(domain);
|
|
+}
|
|
+
|
|
+
|
|
+static void netback_accel_release_hwinfo(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ int i;
|
|
+
|
|
+ DPRINTK("Remove dma q grants %d %d\n", accel_hw_priv->txdmaq_gnt,
|
|
+ accel_hw_priv->rxdmaq_gnt);
|
|
+ ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end);
|
|
+ ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end);
|
|
+
|
|
+ DPRINTK("Remove doorbell grant %d\n", accel_hw_priv->doorbell_gnt);
|
|
+ ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end);
|
|
+
|
|
+ if (bend->hw_type == NET_ACCEL_MSG_HWTYPE_FALCON_A) {
|
|
+ DPRINTK("Remove rptr grant %d\n", accel_hw_priv->evq_rptr_gnt);
|
|
+ ungrant_or_crash(accel_hw_priv->evq_rptr_gnt, bend->far_end);
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < accel_hw_priv->evq_npages; i++) {
|
|
+ DPRINTK("Remove evq grant %d\n", accel_hw_priv->evq_mem_gnts[i]);
|
|
+ ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end);
|
|
+ }
|
|
+
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_FILTER;
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static int ef_bend_hwinfo_falcon_common(struct netback_accel *bend,
|
|
+ struct net_accel_hw_falcon_b *hwinfo)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ struct efx_vi_hw_resource_metadata res_mdata;
|
|
+ struct efx_vi_hw_resource res_array[EFX_VI_HW_RESOURCE_MAXSIZE];
|
|
+ int rc, len = EFX_VI_HW_RESOURCE_MAXSIZE, i, pfn = 0;
|
|
+ unsigned long txdmaq_pfn = 0, rxdmaq_pfn = 0;
|
|
+
|
|
+ rc = efx_vi_hw_resource_get_phys(accel_hw_priv->efx_vih, &res_mdata,
|
|
+ res_array, &len);
|
|
+ if (rc != 0) {
|
|
+ DPRINTK("%s: resource_get_phys returned %d\n",
|
|
+ __FUNCTION__, rc);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ hwinfo->nic_arch = res_mdata.nic_arch;
|
|
+ hwinfo->nic_variant = res_mdata.nic_variant;
|
|
+ hwinfo->nic_revision = res_mdata.nic_revision;
|
|
+
|
|
+ hwinfo->evq_order = res_mdata.evq_order;
|
|
+ hwinfo->evq_offs = res_mdata.evq_offs;
|
|
+ hwinfo->evq_capacity = res_mdata.evq_capacity;
|
|
+ hwinfo->instance = res_mdata.instance;
|
|
+ hwinfo->rx_capacity = res_mdata.rx_capacity;
|
|
+ hwinfo->tx_capacity = res_mdata.tx_capacity;
|
|
+
|
|
+ VPRINTK("evq_order %d evq_offs %d evq_cap %d inst %d rx_cap %d tx_cap %d\n",
|
|
+ hwinfo->evq_order, hwinfo->evq_offs, hwinfo->evq_capacity,
|
|
+ hwinfo->instance, hwinfo->rx_capacity, hwinfo->tx_capacity);
|
|
+
|
|
+ for (i = 0; i < len; i++) {
|
|
+ struct efx_vi_hw_resource *res = &(res_array[i]);
|
|
+ switch (res->type) {
|
|
+ case EFX_VI_HW_RESOURCE_TXDMAQ:
|
|
+ txdmaq_pfn = page_to_pfn(virt_to_page(res->address));
|
|
+ break;
|
|
+ case EFX_VI_HW_RESOURCE_RXDMAQ:
|
|
+ rxdmaq_pfn = page_to_pfn(virt_to_page(res->address));
|
|
+ break;
|
|
+ case EFX_VI_HW_RESOURCE_EVQTIMER:
|
|
+ break;
|
|
+ case EFX_VI_HW_RESOURCE_EVQRPTR:
|
|
+ case EFX_VI_HW_RESOURCE_EVQRPTR_OFFSET:
|
|
+ hwinfo->evq_rptr = res->address;
|
|
+ break;
|
|
+ case EFX_VI_HW_RESOURCE_EVQMEMKVA:
|
|
+ accel_hw_priv->evq_npages = 1 << res_mdata.evq_order;
|
|
+ pfn = page_to_pfn(virt_to_page(res->address));
|
|
+ break;
|
|
+ case EFX_VI_HW_RESOURCE_BELLPAGE:
|
|
+ hwinfo->doorbell_mfn = res->address;
|
|
+ break;
|
|
+ default:
|
|
+ EPRINTK("%s: Unknown hardware resource type %d\n",
|
|
+ __FUNCTION__, res->type);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ VPRINTK("Passing txdmaq page pfn %lx\n", txdmaq_pfn);
|
|
+ rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(txdmaq_pfn), 0);
|
|
+ if (rc < 0)
|
|
+ goto fail0;
|
|
+ accel_hw_priv->txdmaq_gnt = hwinfo->txdmaq_gnt = rc;
|
|
+
|
|
+ VPRINTK("Passing rxdmaq page pfn %lx\n", rxdmaq_pfn);
|
|
+ rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(rxdmaq_pfn), 0);
|
|
+ if (rc < 0)
|
|
+ goto fail1;
|
|
+ accel_hw_priv->rxdmaq_gnt = hwinfo->rxdmaq_gnt = rc;
|
|
+
|
|
+ VPRINTK("Passing doorbell page mfn %x\n", hwinfo->doorbell_mfn);
|
|
+ /* Make the relevant H/W pages mappable by the far end */
|
|
+ rc = net_accel_grant_page(bend->hdev_data, hwinfo->doorbell_mfn, 1);
|
|
+ if (rc < 0)
|
|
+ goto fail2;
|
|
+ accel_hw_priv->doorbell_gnt = hwinfo->doorbell_gnt = rc;
|
|
+
|
|
+ /* Now do the same for the memory pages */
|
|
+ /* Convert the page + length we got back for the evq to grants. */
|
|
+ for (i = 0; i < accel_hw_priv->evq_npages; i++) {
|
|
+ rc = net_accel_grant_page(bend->hdev_data, pfn_to_mfn(pfn), 0);
|
|
+ if (rc < 0)
|
|
+ goto fail3;
|
|
+ accel_hw_priv->evq_mem_gnts[i] = hwinfo->evq_mem_gnts[i] = rc;
|
|
+
|
|
+ VPRINTK("Got grant %u for evq pfn %x\n", hwinfo->evq_mem_gnts[i],
|
|
+ pfn);
|
|
+ pfn++;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail3:
|
|
+ for (i = i - 1; i >= 0; i--) {
|
|
+ ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i], bend->far_end);
|
|
+ }
|
|
+ ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end);
|
|
+ fail2:
|
|
+ ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end);
|
|
+ fail1:
|
|
+ ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end);
|
|
+ fail0:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+static int ef_bend_hwinfo_falcon_a(struct netback_accel *bend,
|
|
+ struct net_accel_hw_falcon_a *hwinfo)
|
|
+{
|
|
+ int rc, i;
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+
|
|
+ if ((rc = ef_bend_hwinfo_falcon_common(bend, &hwinfo->common)) != 0)
|
|
+ return rc;
|
|
+
|
|
+ /*
|
|
+ * Note that unlike the above, where the message field is the
|
|
+ * page number, here evq_rptr is the entire address because
|
|
+ * it is currently a pointer into the densely mapped timer page.
|
|
+ */
|
|
+ VPRINTK("Passing evq_rptr pfn %x for rptr %x\n",
|
|
+ hwinfo->common.evq_rptr >> PAGE_SHIFT,
|
|
+ hwinfo->common.evq_rptr);
|
|
+ rc = net_accel_grant_page(bend->hdev_data,
|
|
+ hwinfo->common.evq_rptr >> PAGE_SHIFT, 0);
|
|
+ if (rc < 0) {
|
|
+ /* Undo ef_bend_hwinfo_falcon_common() */
|
|
+ ungrant_or_crash(accel_hw_priv->txdmaq_gnt, bend->far_end);
|
|
+ ungrant_or_crash(accel_hw_priv->rxdmaq_gnt, bend->far_end);
|
|
+ ungrant_or_crash(accel_hw_priv->doorbell_gnt, bend->far_end);
|
|
+ for (i = 0; i < accel_hw_priv->evq_npages; i++) {
|
|
+ ungrant_or_crash(accel_hw_priv->evq_mem_gnts[i],
|
|
+ bend->far_end);
|
|
+ }
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv->evq_rptr_gnt = hwinfo->evq_rptr_gnt = rc;
|
|
+ VPRINTK("evq_rptr_gnt got %d\n", hwinfo->evq_rptr_gnt);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int ef_bend_hwinfo_falcon_b(struct netback_accel *bend,
|
|
+ struct net_accel_hw_falcon_b *hwinfo)
|
|
+{
|
|
+ return ef_bend_hwinfo_falcon_common(bend, hwinfo);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Fill in the message with a description of the hardware resources, based on
|
|
+ * the H/W type
|
|
+ */
|
|
+static int netback_accel_hwinfo(struct netback_accel *bend,
|
|
+ struct net_accel_msg_hw *msgvi)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
|
|
+
|
|
+ msgvi->type = bend->hw_type;
|
|
+ switch (bend->hw_type) {
|
|
+ case NET_ACCEL_MSG_HWTYPE_FALCON_A:
|
|
+ rc = ef_bend_hwinfo_falcon_a(bend, &msgvi->resources.falcon_a);
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_HWTYPE_FALCON_B:
|
|
+ case NET_ACCEL_MSG_HWTYPE_SIENA_A:
|
|
+ rc = ef_bend_hwinfo_falcon_b(bend, &msgvi->resources.falcon_b);
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_HWTYPE_NONE:
|
|
+ /* Nothing to do. The slow path should just work. */
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (rc == 0)
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_HWINFO;
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/* Allocate hardware resources and make them available to the client domain */
|
|
+int netback_accel_setup_vnic_hw(struct netback_accel *bend)
|
|
+{
|
|
+ struct net_accel_msg msg;
|
|
+ int err;
|
|
+
|
|
+ /* Allocate the event queue, VI and so on. */
|
|
+ err = ef_get_vnic(bend);
|
|
+ if (err) {
|
|
+ EPRINTK("Failed to allocate hardware resource for bend:"
|
|
+ "error %d\n", err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ /* Set up the filter management */
|
|
+ err = netback_accel_filter_init(bend);
|
|
+ if (err) {
|
|
+ EPRINTK("Filter setup failed, error %d", err);
|
|
+ ef_free_vnic(bend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ net_accel_msg_init(&msg, NET_ACCEL_MSG_SETHW);
|
|
+
|
|
+ /*
|
|
+ * Extract the low-level hardware info we will actually pass to the
|
|
+ * other end, and set up the grants/ioremap permissions needed
|
|
+ */
|
|
+ err = netback_accel_hwinfo(bend, &msg.u.hw);
|
|
+
|
|
+ if (err != 0) {
|
|
+ netback_accel_filter_shutdown(bend);
|
|
+ ef_free_vnic(bend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ /* Send the message, this is a reply to a hello-reply */
|
|
+ err = net_accel_msg_reply_notify(bend->shared_page,
|
|
+ bend->msg_channel_irq,
|
|
+ &bend->to_domU, &msg);
|
|
+
|
|
+ /*
|
|
+ * The message should succeed as it's logically a reply and we
|
|
+ * guarantee space for replies, but a misbehaving frontend
|
|
+ * could result in that behaviour, so be tolerant
|
|
+ */
|
|
+ if (err != 0) {
|
|
+ netback_accel_release_hwinfo(bend);
|
|
+ netback_accel_filter_shutdown(bend);
|
|
+ ef_free_vnic(bend);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Free hardware resources */
|
|
+void netback_accel_shutdown_vnic_hw(struct netback_accel *bend)
|
|
+{
|
|
+ /*
|
|
+ * Only try and release resources if accel_hw_priv was setup,
|
|
+ * otherwise there is nothing to do as we're on "null-op"
|
|
+ * acceleration
|
|
+ */
|
|
+ switch (bend->hw_state) {
|
|
+ case NETBACK_ACCEL_RES_HWINFO:
|
|
+ VPRINTK("Release hardware resources\n");
|
|
+ netback_accel_release_hwinfo(bend);
|
|
+ /* deliberate drop through */
|
|
+ case NETBACK_ACCEL_RES_FILTER:
|
|
+ VPRINTK("Free filters...\n");
|
|
+ netback_accel_filter_shutdown(bend);
|
|
+ /* deliberate drop through */
|
|
+ case NETBACK_ACCEL_RES_ALLOC:
|
|
+ VPRINTK("Free vnic...\n");
|
|
+ ef_free_vnic(bend);
|
|
+ /* deliberate drop through */
|
|
+ case NETBACK_ACCEL_RES_NONE:
|
|
+ break;
|
|
+ default:
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
+
|
|
+/**************************************************************************
|
|
+ *
|
|
+ * Buffer table stuff
|
|
+ *
|
|
+ **************************************************************************/
|
|
+
|
|
+/*
|
|
+ * Undo any allocation that netback_accel_msg_rx_buffer_map() has made
|
|
+ * if it fails half way through
|
|
+ */
|
|
+static inline void buffer_map_cleanup(struct netback_accel *bend, int i)
|
|
+{
|
|
+ while (i > 0) {
|
|
+ i--;
|
|
+ bend->buffer_maps_index--;
|
|
+ net_accel_unmap_device_page(bend->hdev_data,
|
|
+ bend->buffer_maps[bend->buffer_maps_index],
|
|
+ bend->buffer_addrs[bend->buffer_maps_index]);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_add_buffers(struct netback_accel *bend, int pages, int log2_pages,
|
|
+ u32 *grants, u32 *buf_addr_out)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ unsigned long long addr_array[NET_ACCEL_MSG_MAX_PAGE_REQ];
|
|
+ int rc, i, index;
|
|
+ u64 dev_bus_addr;
|
|
+
|
|
+ /* Make sure we can't overflow the dma_maps array */
|
|
+ if (accel_hw_priv->dma_maps_index >=
|
|
+ bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ) {
|
|
+ EPRINTK("%s: too many buffer table allocations: %d %d\n",
|
|
+ __FUNCTION__, accel_hw_priv->dma_maps_index,
|
|
+ bend->max_pages / NET_ACCEL_MSG_MAX_PAGE_REQ);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /* Make sure we can't overflow the buffer_maps array */
|
|
+ if (bend->buffer_maps_index + pages > bend->max_pages) {
|
|
+ EPRINTK("%s: too many pages mapped: %d + %d > %d\n",
|
|
+ __FUNCTION__, bend->buffer_maps_index,
|
|
+ pages, bend->max_pages);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < pages; i++) {
|
|
+ VPRINTK("%s: mapping page %d\n", __FUNCTION__, i);
|
|
+ rc = net_accel_map_device_page
|
|
+ (bend->hdev_data, grants[i],
|
|
+ &bend->buffer_maps[bend->buffer_maps_index],
|
|
+ &dev_bus_addr);
|
|
+
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("error in net_accel_map_device_page\n");
|
|
+ buffer_map_cleanup(bend, i);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ bend->buffer_addrs[bend->buffer_maps_index] = dev_bus_addr;
|
|
+
|
|
+ bend->buffer_maps_index++;
|
|
+
|
|
+ addr_array[i] = dev_bus_addr;
|
|
+ }
|
|
+
|
|
+ VPRINTK("%s: mapping dma addresses to vih %p\n", __FUNCTION__,
|
|
+ accel_hw_priv->efx_vih);
|
|
+
|
|
+ index = accel_hw_priv->dma_maps_index;
|
|
+ if ((rc = efx_vi_dma_map_addrs(accel_hw_priv->efx_vih, addr_array, pages,
|
|
+ &(accel_hw_priv->dma_maps[index]))) < 0) {
|
|
+ EPRINTK("error in dma_map_pages\n");
|
|
+ buffer_map_cleanup(bend, i);
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv->dma_maps_index++;
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages += pages);
|
|
+
|
|
+ //DPRINTK("%s: getting map address\n", __FUNCTION__);
|
|
+
|
|
+ *buf_addr_out = efx_vi_dma_get_map_addr(accel_hw_priv->efx_vih,
|
|
+ accel_hw_priv->dma_maps[index]);
|
|
+
|
|
+ //DPRINTK("%s: done\n", __FUNCTION__);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_remove_buffers(struct netback_accel *bend)
|
|
+{
|
|
+ /* Only try to free buffers if accel_hw_priv was setup */
|
|
+ if (bend->hw_state != NETBACK_ACCEL_RES_NONE) {
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ int i;
|
|
+
|
|
+ efx_vi_reset(accel_hw_priv->efx_vih);
|
|
+
|
|
+ while (accel_hw_priv->dma_maps_index > 0) {
|
|
+ accel_hw_priv->dma_maps_index--;
|
|
+ i = accel_hw_priv->dma_maps_index;
|
|
+ efx_vi_dma_unmap_addrs(accel_hw_priv->efx_vih,
|
|
+ accel_hw_priv->dma_maps[i]);
|
|
+ }
|
|
+
|
|
+ while (bend->buffer_maps_index > 0) {
|
|
+ VPRINTK("Unmapping granted buffer %d\n",
|
|
+ bend->buffer_maps_index);
|
|
+ bend->buffer_maps_index--;
|
|
+ i = bend->buffer_maps_index;
|
|
+ net_accel_unmap_device_page(bend->hdev_data,
|
|
+ bend->buffer_maps[i],
|
|
+ bend->buffer_addrs[i]);
|
|
+ }
|
|
+
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.num_buffer_pages = 0);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**************************************************************************
|
|
+ *
|
|
+ * Filter stuff
|
|
+ *
|
|
+ **************************************************************************/
|
|
+
|
|
+static int netback_accel_filter_init(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ int i, rc;
|
|
+
|
|
+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_ALLOC);
|
|
+
|
|
+ spin_lock_init(&accel_hw_priv->filter_lock);
|
|
+
|
|
+ if ((rc = cuckoo_hash_init(&accel_hw_priv->filter_hash_table,
|
|
+ 5 /* space for 32 filters */, 8)) != 0) {
|
|
+ EPRINTK("Failed to initialise filter hash table\n");
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ accel_hw_priv->fspecs = kzalloc(sizeof(struct netback_accel_filter_spec) *
|
|
+ bend->quotas.max_filters,
|
|
+ GFP_KERNEL);
|
|
+
|
|
+ if (accel_hw_priv->fspecs == NULL) {
|
|
+ EPRINTK("No memory for filter specs.\n");
|
|
+ cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < bend->quotas.max_filters; i++) {
|
|
+ accel_hw_priv->free_filters |= (1 << i);
|
|
+ }
|
|
+
|
|
+ /* Base mask on highest set bit in max_filters */
|
|
+ accel_hw_priv->filter_idx_mask = (1 << fls(bend->quotas.max_filters)) - 1;
|
|
+ VPRINTK("filter setup: max is %x mask is %x\n",
|
|
+ bend->quotas.max_filters, accel_hw_priv->filter_idx_mask);
|
|
+
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_FILTER;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static inline void make_filter_key(cuckoo_hash_ip_key *key,
|
|
+ struct netback_accel_filter_spec *filt)
|
|
+
|
|
+{
|
|
+ key->local_ip = filt->destip_be;
|
|
+ key->local_port = filt->destport_be;
|
|
+ key->proto = filt->proto;
|
|
+}
|
|
+
|
|
+
|
|
+static inline
|
|
+void netback_accel_free_filter(struct falcon_bend_accel_priv *accel_hw_priv,
|
|
+ int filter)
|
|
+{
|
|
+ cuckoo_hash_ip_key filter_key;
|
|
+
|
|
+ if (!(accel_hw_priv->free_filters & (1 << filter))) {
|
|
+ efx_vi_filter_stop(accel_hw_priv->efx_vih,
|
|
+ accel_hw_priv->fspecs[filter].filter_handle);
|
|
+ make_filter_key(&filter_key, &(accel_hw_priv->fspecs[filter]));
|
|
+ if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)&filter_key)) {
|
|
+ EPRINTK("%s: Couldn't find filter to remove from table\n",
|
|
+ __FUNCTION__);
|
|
+ BUG();
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void netback_accel_filter_shutdown(struct netback_accel *bend)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ int i;
|
|
+ unsigned long flags;
|
|
+
|
|
+ BUG_ON(bend->hw_state != NETBACK_ACCEL_RES_FILTER);
|
|
+
|
|
+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
|
|
+
|
|
+ BUG_ON(accel_hw_priv->fspecs == NULL);
|
|
+
|
|
+ for (i = 0; i < bend->quotas.max_filters; i++) {
|
|
+ netback_accel_free_filter(accel_hw_priv, i);
|
|
+ }
|
|
+
|
|
+ kfree(accel_hw_priv->fspecs);
|
|
+ accel_hw_priv->fspecs = NULL;
|
|
+ accel_hw_priv->free_filters = 0;
|
|
+
|
|
+ cuckoo_hash_destroy(&accel_hw_priv->filter_hash_table);
|
|
+
|
|
+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
|
|
+
|
|
+ bend->hw_state = NETBACK_ACCEL_RES_ALLOC;
|
|
+}
|
|
+
|
|
+
|
|
+/*! Suggest a filter to replace when we want to insert a new one and have
|
|
+ * none free.
|
|
+ */
|
|
+static unsigned get_victim_filter(struct netback_accel *bend)
|
|
+{
|
|
+ /*
|
|
+ * We could attempt to get really clever, and may do at some
|
|
+ * point, but random replacement is v. cheap and low on
|
|
+ * pathological worst cases.
|
|
+ */
|
|
+ unsigned index, cycles;
|
|
+
|
|
+ rdtscl(cycles);
|
|
+
|
|
+ /*
|
|
+ * Some doubt about the quality of the bottom few bits, so
|
|
+ * throw 'em * away
|
|
+ */
|
|
+ index = (cycles >> 4) & ((struct falcon_bend_accel_priv *)
|
|
+ bend->accel_hw_priv)->filter_idx_mask;
|
|
+ /*
|
|
+ * We don't enforce that the number of filters is a power of
|
|
+ * two, but the masking gets us to within one subtraction of a
|
|
+ * valid index
|
|
+ */
|
|
+ if (index >= bend->quotas.max_filters)
|
|
+ index -= bend->quotas.max_filters;
|
|
+ DPRINTK("backend %s->%d has no free filters. Filter %d will be evicted\n",
|
|
+ bend->nicname, bend->far_end, index);
|
|
+ return index;
|
|
+}
|
|
+
|
|
+
|
|
+/* Add a filter for the specified IP/port to the backend */
|
|
+int
|
|
+netback_accel_filter_check_add(struct netback_accel *bend,
|
|
+ struct netback_accel_filter_spec *filt)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ struct netback_accel_filter_spec *fs;
|
|
+ unsigned filter_index;
|
|
+ unsigned long flags;
|
|
+ int rc, recycling = 0;
|
|
+ cuckoo_hash_ip_key filter_key, evict_key;
|
|
+
|
|
+ BUG_ON(filt->proto != IPPROTO_TCP && filt->proto != IPPROTO_UDP);
|
|
+
|
|
+ DPRINTK("Will add %s filter for dst ip %08x and dst port %d\n",
|
|
+ (filt->proto == IPPROTO_TCP) ? "TCP" : "UDP",
|
|
+ be32_to_cpu(filt->destip_be), be16_to_cpu(filt->destport_be));
|
|
+
|
|
+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
|
|
+ /*
|
|
+ * Check to see if we're already filtering this IP address and
|
|
+ * port. Happens if you insert a filter mid-stream as there
|
|
+ * are many packets backed up to be delivered to dom0 already
|
|
+ */
|
|
+ make_filter_key(&filter_key, filt);
|
|
+ if (cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)(&filter_key),
|
|
+ &filter_index)) {
|
|
+ DPRINTK("Found matching filter %d already in table\n",
|
|
+ filter_index);
|
|
+ rc = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (accel_hw_priv->free_filters == 0) {
|
|
+ filter_index = get_victim_filter(bend);
|
|
+ recycling = 1;
|
|
+ } else {
|
|
+ filter_index = __ffs(accel_hw_priv->free_filters);
|
|
+ clear_bit(filter_index, &accel_hw_priv->free_filters);
|
|
+ }
|
|
+
|
|
+ fs = &accel_hw_priv->fspecs[filter_index];
|
|
+
|
|
+ if (recycling) {
|
|
+ DPRINTK("Removing filter index %d handle %p\n", filter_index,
|
|
+ fs->filter_handle);
|
|
+
|
|
+ if ((rc = efx_vi_filter_stop(accel_hw_priv->efx_vih,
|
|
+ fs->filter_handle)) != 0) {
|
|
+ EPRINTK("Couldn't clear NIC filter table entry %d\n", rc);
|
|
+ }
|
|
+
|
|
+ make_filter_key(&evict_key, fs);
|
|
+ if (cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)&evict_key)) {
|
|
+ EPRINTK("Couldn't find filter to remove from table\n");
|
|
+ BUG();
|
|
+ }
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.num_filters--);
|
|
+ }
|
|
+
|
|
+ /* Update the filter spec with new details */
|
|
+ *fs = *filt;
|
|
+
|
|
+ if ((rc = cuckoo_hash_add(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)&filter_key, filter_index,
|
|
+ 1)) != 0) {
|
|
+ EPRINTK("Error (%d) adding filter to table\n", rc);
|
|
+ accel_hw_priv->free_filters |= (1 << filter_index);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ rc = efx_vi_filter(accel_hw_priv->efx_vih, filt->proto, filt->destip_be,
|
|
+ filt->destport_be,
|
|
+ (struct filter_resource_t **)&fs->filter_handle);
|
|
+
|
|
+ if (rc != 0) {
|
|
+ EPRINTK("Hardware filter insertion failed. Error %d\n", rc);
|
|
+ accel_hw_priv->free_filters |= (1 << filter_index);
|
|
+ cuckoo_hash_remove(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)&filter_key);
|
|
+ rc = -1;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ NETBACK_ACCEL_STATS_OP(bend->stats.num_filters++);
|
|
+
|
|
+ VPRINTK("%s: success index %d handle %p\n", __FUNCTION__, filter_index,
|
|
+ fs->filter_handle);
|
|
+
|
|
+ rc = filter_index;
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/* Remove a filter entry for the specific device and IP/port */
|
|
+static void netback_accel_filter_remove(struct netback_accel *bend,
|
|
+ int filter_index)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+
|
|
+ BUG_ON(accel_hw_priv->free_filters & (1 << filter_index));
|
|
+ netback_accel_free_filter(accel_hw_priv, filter_index);
|
|
+ accel_hw_priv->free_filters |= (1 << filter_index);
|
|
+}
|
|
+
|
|
+
|
|
+/* Remove a filter entry for the specific device and IP/port */
|
|
+void netback_accel_filter_remove_spec(struct netback_accel *bend,
|
|
+ struct netback_accel_filter_spec *filt)
|
|
+{
|
|
+ struct falcon_bend_accel_priv *accel_hw_priv = bend->accel_hw_priv;
|
|
+ unsigned filter_found;
|
|
+ unsigned long flags;
|
|
+ cuckoo_hash_ip_key filter_key;
|
|
+ struct netback_accel_filter_spec *fs;
|
|
+
|
|
+ if (filt->proto == IPPROTO_TCP) {
|
|
+ DPRINTK("Remove TCP filter for dst ip %08x and dst port %d\n",
|
|
+ be32_to_cpu(filt->destip_be),
|
|
+ be16_to_cpu(filt->destport_be));
|
|
+ } else if (filt->proto == IPPROTO_UDP) {
|
|
+ DPRINTK("Remove UDP filter for dst ip %08x and dst port %d\n",
|
|
+ be32_to_cpu(filt->destip_be),
|
|
+ be16_to_cpu(filt->destport_be));
|
|
+ } else {
|
|
+ /*
|
|
+ * This could be provoked by an evil frontend, so can't
|
|
+ * BUG(), but harmless as it should fail tests below
|
|
+ */
|
|
+ DPRINTK("Non-TCP/UDP filter dst ip %08x and dst port %d\n",
|
|
+ be32_to_cpu(filt->destip_be),
|
|
+ be16_to_cpu(filt->destport_be));
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&accel_hw_priv->filter_lock, flags);
|
|
+
|
|
+ make_filter_key(&filter_key, filt);
|
|
+ if (!cuckoo_hash_lookup(&accel_hw_priv->filter_hash_table,
|
|
+ (cuckoo_hash_key *)(&filter_key),
|
|
+ &filter_found)) {
|
|
+ EPRINTK("Couldn't find matching filter already in table\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ /* Do a full check to make sure we've not had a hash collision */
|
|
+ fs = &accel_hw_priv->fspecs[filter_found];
|
|
+ if (fs->destip_be == filt->destip_be &&
|
|
+ fs->destport_be == filt->destport_be &&
|
|
+ fs->proto == filt->proto &&
|
|
+ !memcmp(fs->mac, filt->mac, ETH_ALEN)) {
|
|
+ netback_accel_filter_remove(bend, filter_found);
|
|
+ } else {
|
|
+ EPRINTK("Entry in hash table does not match filter spec\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ spin_unlock_irqrestore(&accel_hw_priv->filter_lock, flags);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_solarflare.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,88 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETBACK_ACCEL_SOLARFLARE_H
|
|
+#define NETBACK_ACCEL_SOLARFLARE_H
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_msg_iface.h"
|
|
+
|
|
+#include "driverlink_api.h"
|
|
+
|
|
+#define MAX_NICS 5
|
|
+#define MAX_PORTS 2
|
|
+
|
|
+
|
|
+extern int netback_accel_sf_init(void);
|
|
+extern void netback_accel_sf_shutdown(void);
|
|
+extern int netback_accel_sf_hwtype(struct netback_accel *bend);
|
|
+
|
|
+extern int netback_accel_sf_char_init(void);
|
|
+extern void netback_accel_sf_char_shutdown(void);
|
|
+
|
|
+extern int netback_accel_setup_vnic_hw(struct netback_accel *bend);
|
|
+extern void netback_accel_shutdown_vnic_hw(struct netback_accel *bend);
|
|
+
|
|
+extern int netback_accel_add_buffers(struct netback_accel *bend, int pages,
|
|
+ int log2_pages, u32 *grants,
|
|
+ u32 *buf_addr_out);
|
|
+extern int netback_accel_remove_buffers(struct netback_accel *bend);
|
|
+
|
|
+
|
|
+/* Add a filter for the specified IP/port to the backend */
|
|
+extern int
|
|
+netback_accel_filter_check_add(struct netback_accel *bend,
|
|
+ struct netback_accel_filter_spec *filt);
|
|
+/* Remove a filter entry for the specific device and IP/port */
|
|
+extern
|
|
+void netback_accel_filter_remove_index(struct netback_accel *bend,
|
|
+ int filter_index);
|
|
+extern
|
|
+void netback_accel_filter_remove_spec(struct netback_accel *bend,
|
|
+ struct netback_accel_filter_spec *filt);
|
|
+
|
|
+/* This is designed to look a bit like a skb */
|
|
+struct netback_pkt_buf {
|
|
+ union {
|
|
+ unsigned char *raw;
|
|
+ } mac;
|
|
+ union {
|
|
+ struct iphdr *iph;
|
|
+ struct arphdr *arph;
|
|
+ unsigned char *raw;
|
|
+ } nh;
|
|
+ int protocol;
|
|
+};
|
|
+
|
|
+/*! \brief Handle a received packet: insert fast path filters as necessary
|
|
+ * \param skb The packet buffer
|
|
+ */
|
|
+extern void netback_accel_rx_packet(struct netback_pkt_buf *skb, void *fwd_priv);
|
|
+
|
|
+/*! \brief Handle a transmitted packet: update fast path filters as necessary
|
|
+ * \param skb The packet buffer
|
|
+ */
|
|
+extern void netback_accel_tx_packet(struct sk_buff *skb, void *fwd_priv);
|
|
+
|
|
+#endif /* NETBACK_ACCEL_SOLARFLARE_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/accel_xenbus.c 2010-01-04 11:56:34.000000000 +0100
|
|
@@ -0,0 +1,833 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <xen/evtchn.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+/* drivers/xen/netback/common.h */
|
|
+#include "common.h"
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_solarflare.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+#define NODENAME_PATH_FMT "backend/vif/%d/%d"
|
|
+
|
|
+#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
|
|
+ ((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
|
|
+
|
|
+/* List of all the bends currently in existence. */
|
|
+struct netback_accel *bend_list = NULL;
|
|
+DEFINE_MUTEX(bend_list_mutex);
|
|
+
|
|
+/* Put in bend_list. Must hold bend_list_mutex */
|
|
+static void link_bend(struct netback_accel *bend)
|
|
+{
|
|
+ bend->next_bend = bend_list;
|
|
+ bend_list = bend;
|
|
+}
|
|
+
|
|
+/* Remove from bend_list, Must hold bend_list_mutex */
|
|
+static void unlink_bend(struct netback_accel *bend)
|
|
+{
|
|
+ struct netback_accel *tmp = bend_list;
|
|
+ struct netback_accel *prev = NULL;
|
|
+ while (tmp != NULL) {
|
|
+ if (tmp == bend) {
|
|
+ if (prev != NULL)
|
|
+ prev->next_bend = bend->next_bend;
|
|
+ else
|
|
+ bend_list = bend->next_bend;
|
|
+ return;
|
|
+ }
|
|
+ prev = tmp;
|
|
+ tmp = tmp->next_bend;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/* Demultiplex a message IRQ from the frontend driver. */
|
|
+static irqreturn_t msgirq_from_frontend(int irq, void *context,
|
|
+ struct pt_regs *unused)
|
|
+{
|
|
+ struct xenbus_device *dev = context;
|
|
+ struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
|
|
+ VPRINTK("irq %d from device %s\n", irq, dev->nodename);
|
|
+ schedule_work(&bend->handle_msg);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Demultiplex an IRQ from the frontend driver. This is never used
|
|
+ * functionally, but we need it to pass to the bind function, and may
|
|
+ * get called spuriously
|
|
+ */
|
|
+static irqreturn_t netirq_from_frontend(int irq, void *context,
|
|
+ struct pt_regs *unused)
|
|
+{
|
|
+ VPRINTK("netirq %d from device %s\n", irq,
|
|
+ ((struct xenbus_device *)context)->nodename);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+/* Read the limits values of the xenbus structure. */
|
|
+static
|
|
+void cfg_hw_quotas(struct xenbus_device *dev, struct netback_accel *bend)
|
|
+{
|
|
+ int err = xenbus_gather
|
|
+ (XBT_NIL, dev->nodename,
|
|
+ "limits/max-filters", "%d", &bend->quotas.max_filters,
|
|
+ "limits/max-buf-pages", "%d", &bend->quotas.max_buf_pages,
|
|
+ "limits/max-mcasts", "%d", &bend->quotas.max_mcasts,
|
|
+ NULL);
|
|
+ if (err) {
|
|
+ /*
|
|
+ * TODO what if they have previously been set by the
|
|
+ * user? This will overwrite with defaults. Maybe
|
|
+ * not what we want to do, but useful in startup
|
|
+ * case
|
|
+ */
|
|
+ DPRINTK("Failed to read quotas from xenbus, using defaults\n");
|
|
+ bend->quotas.max_filters = NETBACK_ACCEL_DEFAULT_MAX_FILTERS;
|
|
+ bend->quotas.max_buf_pages = sfc_netback_max_pages;
|
|
+ bend->quotas.max_mcasts = NETBACK_ACCEL_DEFAULT_MAX_MCASTS;
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static void bend_config_accel_change(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ struct netback_accel *bend;
|
|
+
|
|
+ bend = container_of(watch, struct netback_accel, config_accel_watch);
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+ if (bend->config_accel_watch.node != NULL) {
|
|
+ struct xenbus_device *dev =
|
|
+ (struct xenbus_device *)bend->hdev_data;
|
|
+ DPRINTK("Watch matched, got dev %p otherend %p\n",
|
|
+ dev, dev->otherend);
|
|
+ if(!xenbus_exists(XBT_NIL, watch->node, "")) {
|
|
+ DPRINTK("Ignoring watch as otherend seems invalid\n");
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ cfg_hw_quotas(dev, bend);
|
|
+ }
|
|
+ out:
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Setup watch on "limits" in the backend vif info to know when
|
|
+ * configuration has been set
|
|
+ */
|
|
+static int setup_config_accel_watch(struct xenbus_device *dev,
|
|
+ struct netback_accel *bend)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ VPRINTK("Setting watch on %s/%s\n", dev->nodename, "limits");
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->nodename, "limits",
|
|
+ &bend->config_accel_watch,
|
|
+ bend_config_accel_change);
|
|
+
|
|
+ if (err) {
|
|
+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ bend->config_accel_watch.node = NULL;
|
|
+ return err;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int
|
|
+cfg_frontend_info(struct xenbus_device *dev, struct netback_accel *bend,
|
|
+ int *grants)
|
|
+{
|
|
+ /* Get some info from xenbus on the event channel and shmem grant */
|
|
+ int err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "accel-msg-channel", "%u", &bend->msg_channel,
|
|
+ "accel-ctrl-page", "%d", &(grants[0]),
|
|
+ "accel-msg-page", "%d", &(grants[1]),
|
|
+ "accel-net-channel", "%u", &bend->net_channel,
|
|
+ NULL);
|
|
+ if (err)
|
|
+ EPRINTK("failed to read event channels or shmem grant: %d\n",
|
|
+ err);
|
|
+ else
|
|
+ DPRINTK("got event chan %d and net chan %d from frontend\n",
|
|
+ bend->msg_channel, bend->net_channel);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Setup all the comms needed to chat with the front end driver */
|
|
+static int setup_vnic(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netback_accel *bend;
|
|
+ int grants[2], err, msgs_per_queue;
|
|
+
|
|
+ bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
|
|
+
|
|
+ err = cfg_frontend_info(dev, bend, grants);
|
|
+ if (err)
|
|
+ goto fail1;
|
|
+
|
|
+ /*
|
|
+ * If we get here, both frontend Connected and configuration
|
|
+ * options available. All is well.
|
|
+ */
|
|
+
|
|
+ /* Get the hardware quotas for the VNIC in question. */
|
|
+ cfg_hw_quotas(dev, bend);
|
|
+
|
|
+ /* Set up the deferred work handlers */
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ INIT_WORK(&bend->handle_msg,
|
|
+ netback_accel_msg_rx_handler);
|
|
+#else
|
|
+ INIT_WORK(&bend->handle_msg,
|
|
+ netback_accel_msg_rx_handler,
|
|
+ (void*)bend);
|
|
+#endif
|
|
+
|
|
+ /* Request the frontend mac */
|
|
+ err = net_accel_xen_net_read_mac(dev, bend->mac);
|
|
+ if (err)
|
|
+ goto fail2;
|
|
+
|
|
+ /* Set up the shared page. */
|
|
+ bend->shared_page = net_accel_map_grants_contig(dev, grants, 2,
|
|
+ &bend->sh_pages_unmap);
|
|
+
|
|
+ if (bend->shared_page == NULL) {
|
|
+ EPRINTK("failed to map shared page for %s\n", dev->otherend);
|
|
+ err = -ENOMEM;
|
|
+ goto fail2;
|
|
+ }
|
|
+
|
|
+ /* Initialise the shared page(s) used for comms */
|
|
+ net_accel_msg_init_page(bend->shared_page, PAGE_SIZE,
|
|
+ (bend->net_dev->flags & IFF_UP) &&
|
|
+ (netif_carrier_ok(bend->net_dev)));
|
|
+
|
|
+ msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
|
|
+
|
|
+ net_accel_msg_init_queue
|
|
+ (&bend->to_domU, &bend->shared_page->queue0,
|
|
+ (struct net_accel_msg *)((__u8*)bend->shared_page + PAGE_SIZE),
|
|
+ msgs_per_queue);
|
|
+
|
|
+ net_accel_msg_init_queue
|
|
+ (&bend->from_domU, &bend->shared_page->queue1,
|
|
+ (struct net_accel_msg *)((__u8*)bend->shared_page +
|
|
+ (3 * PAGE_SIZE / 2)),
|
|
+ msgs_per_queue);
|
|
+
|
|
+ /* Bind the message event channel to a handler
|
|
+ *
|
|
+ * Note that we will probably get a spurious interrupt when we
|
|
+ * do this, so it must not be done until we have set up
|
|
+ * everything we need to handle it.
|
|
+ */
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
|
|
+ bend->msg_channel,
|
|
+ msgirq_from_frontend,
|
|
+ 0,
|
|
+ "netback_accel",
|
|
+ dev);
|
|
+ if (err < 0) {
|
|
+ EPRINTK("failed to bind event channel: %d\n", err);
|
|
+ goto fail3;
|
|
+ }
|
|
+ else
|
|
+ bend->msg_channel_irq = err;
|
|
+
|
|
+ /* TODO: No need to bind this evtchn to an irq. */
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(dev->otherend_id,
|
|
+ bend->net_channel,
|
|
+ netirq_from_frontend,
|
|
+ 0,
|
|
+ "netback_accel",
|
|
+ dev);
|
|
+ if (err < 0) {
|
|
+ EPRINTK("failed to bind net channel: %d\n", err);
|
|
+ goto fail4;
|
|
+ }
|
|
+ else
|
|
+ bend->net_channel_irq = err;
|
|
+
|
|
+ /*
|
|
+ * Grab ourselves an entry in the forwarding hash table. We do
|
|
+ * this now so we don't have the embarassmesnt of sorting out
|
|
+ * an allocation failure while at IRQ. Because we pass NULL as
|
|
+ * the context, the actual hash lookup will succeed for this
|
|
+ * NIC, but the check for somewhere to forward to will
|
|
+ * fail. This is necessary to prevent forwarding before
|
|
+ * hardware resources are set up
|
|
+ */
|
|
+ err = netback_accel_fwd_add(bend->mac, NULL, bend->fwd_priv);
|
|
+ if (err) {
|
|
+ EPRINTK("failed to add to fwd hash table\n");
|
|
+ goto fail5;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Say hello to frontend. Important to do this straight after
|
|
+ * obtaining the message queue as otherwise we are vulnerable
|
|
+ * to an evil frontend sending a HELLO-REPLY before we've sent
|
|
+ * the HELLO and confusing us
|
|
+ */
|
|
+ netback_accel_msg_tx_hello(bend, NET_ACCEL_MSG_VERSION);
|
|
+ return 0;
|
|
+
|
|
+ fail5:
|
|
+ unbind_from_irqhandler(bend->net_channel_irq, dev);
|
|
+ fail4:
|
|
+ unbind_from_irqhandler(bend->msg_channel_irq, dev);
|
|
+ fail3:
|
|
+ net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
|
|
+ bend->shared_page = NULL;
|
|
+ bend->sh_pages_unmap = NULL;
|
|
+ fail2:
|
|
+ fail1:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int read_nicname(struct xenbus_device *dev, struct netback_accel *bend)
|
|
+{
|
|
+ int len;
|
|
+
|
|
+ /* nic name used to select interface used for acceleration */
|
|
+ bend->nicname = xenbus_read(XBT_NIL, dev->nodename, "accel", &len);
|
|
+ if (IS_ERR(bend->nicname))
|
|
+ return PTR_ERR(bend->nicname);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const char *frontend_name = "sfc_netfront";
|
|
+
|
|
+static int publish_frontend_name(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+
|
|
+ /* Publish the name of the frontend driver */
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: transaction start failed\n", __FUNCTION__);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_printf(tr, dev->nodename, "accel-frontend",
|
|
+ "%s", frontend_name);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: xenbus_printf failed\n", __FUNCTION__);
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("failed to end frontend name transaction\n");
|
|
+ return err;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int unpublish_frontend_name(struct xenbus_device *dev)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err != 0)
|
|
+ break;
|
|
+ err = xenbus_rm(tr, dev->nodename, "accel-frontend");
|
|
+ if (err != 0) {
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ break;
|
|
+ }
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static void cleanup_vnic(struct netback_accel *bend)
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ dev = (struct xenbus_device *)bend->hdev_data;
|
|
+
|
|
+ DPRINTK("%s: bend %p dev %p\n", __FUNCTION__, bend, dev);
|
|
+
|
|
+ DPRINTK("%s: Remove %p's mac from fwd table...\n",
|
|
+ __FUNCTION__, bend);
|
|
+ netback_accel_fwd_remove(bend->mac, bend->fwd_priv);
|
|
+
|
|
+ /* Free buffer table allocations */
|
|
+ netback_accel_remove_buffers(bend);
|
|
+
|
|
+ DPRINTK("%s: Release hardware resources...\n", __FUNCTION__);
|
|
+ if (bend->accel_shutdown)
|
|
+ bend->accel_shutdown(bend);
|
|
+
|
|
+ if (bend->net_channel_irq) {
|
|
+ unbind_from_irqhandler(bend->net_channel_irq, dev);
|
|
+ bend->net_channel_irq = 0;
|
|
+ }
|
|
+
|
|
+ if (bend->msg_channel_irq) {
|
|
+ unbind_from_irqhandler(bend->msg_channel_irq, dev);
|
|
+ bend->msg_channel_irq = 0;
|
|
+ }
|
|
+
|
|
+ if (bend->sh_pages_unmap) {
|
|
+ DPRINTK("%s: Unmap grants %p\n", __FUNCTION__,
|
|
+ bend->sh_pages_unmap);
|
|
+ net_accel_unmap_grants_contig(dev, bend->sh_pages_unmap);
|
|
+ bend->sh_pages_unmap = NULL;
|
|
+ bend->shared_page = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*************************************************************************/
|
|
+
|
|
+/*
|
|
+ * The following code handles accelstate changes between the frontend
|
|
+ * and the backend. It calls setup_vnic and cleanup_vnic in matching
|
|
+ * pairs in response to transitions.
|
|
+ *
|
|
+ * Valid state transitions for Dom0 are as follows:
|
|
+ *
|
|
+ * Closed->Init on probe or in response to Init from domU
|
|
+ * Closed->Closing on error/remove
|
|
+ *
|
|
+ * Init->Connected in response to Connected from domU
|
|
+ * Init->Closing on error/remove or in response to Closing from domU
|
|
+ *
|
|
+ * Connected->Closing on error/remove or in response to Closing from domU
|
|
+ *
|
|
+ * Closing->Closed in response to Closed from domU
|
|
+ *
|
|
+ */
|
|
+
|
|
+
|
|
+static void netback_accel_frontend_changed(struct xenbus_device *dev,
|
|
+ XenbusState frontend_state)
|
|
+{
|
|
+ struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
|
|
+ XenbusState backend_state;
|
|
+
|
|
+ DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
|
|
+ __FUNCTION__, xenbus_strstate(bend->frontend_state),
|
|
+ xenbus_strstate(frontend_state),dev->nodename, dev->otherend);
|
|
+
|
|
+ /*
|
|
+ * Ignore duplicate state changes. This can happen if the
|
|
+ * frontend changes state twice in quick succession and the
|
|
+ * first watch fires in the backend after the second
|
|
+ * transition has completed.
|
|
+ */
|
|
+ if (bend->frontend_state == frontend_state)
|
|
+ return;
|
|
+
|
|
+ bend->frontend_state = frontend_state;
|
|
+ backend_state = bend->backend_state;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ if (backend_state == XenbusStateClosed &&
|
|
+ !bend->removing)
|
|
+ backend_state = XenbusStateInitialising;
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ if (backend_state == XenbusStateInitialising) {
|
|
+ if (!bend->vnic_is_setup &&
|
|
+ setup_vnic(dev) == 0) {
|
|
+ bend->vnic_is_setup = 1;
|
|
+ backend_state = XenbusStateConnected;
|
|
+ } else {
|
|
+ backend_state = XenbusStateClosing;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitWait:
|
|
+ case XenbusStateInitialised:
|
|
+ default:
|
|
+ DPRINTK("Unknown state %s (%d) from frontend.\n",
|
|
+ xenbus_strstate(frontend_state), frontend_state);
|
|
+ /* Unknown state. Fall through. */
|
|
+ case XenbusStateClosing:
|
|
+ if (backend_state != XenbusStateClosed)
|
|
+ backend_state = XenbusStateClosing;
|
|
+
|
|
+ /*
|
|
+ * The bend will now persist (with watches active) in
|
|
+ * case the frontend comes back again, eg. after
|
|
+ * frontend module reload or suspend/resume
|
|
+ */
|
|
+
|
|
+ break;
|
|
+
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ if (bend->vnic_is_setup) {
|
|
+ bend->vnic_is_setup = 0;
|
|
+ cleanup_vnic(bend);
|
|
+ }
|
|
+
|
|
+ if (backend_state == XenbusStateClosing)
|
|
+ backend_state = XenbusStateClosed;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (backend_state != bend->backend_state) {
|
|
+ DPRINTK("Switching from state %s (%d) to %s (%d)\n",
|
|
+ xenbus_strstate(bend->backend_state),
|
|
+ bend->backend_state,
|
|
+ xenbus_strstate(backend_state), backend_state);
|
|
+ bend->backend_state = backend_state;
|
|
+ net_accel_update_state(dev, backend_state);
|
|
+ }
|
|
+
|
|
+ wake_up(&bend->state_wait_queue);
|
|
+}
|
|
+
|
|
+
|
|
+/* accelstate on the frontend's xenbus node has changed */
|
|
+static void bend_domu_accel_change(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int state;
|
|
+ struct netback_accel *bend;
|
|
+
|
|
+ bend = container_of(watch, struct netback_accel, domu_accel_watch);
|
|
+ if (bend->domu_accel_watch.node != NULL) {
|
|
+ struct xenbus_device *dev =
|
|
+ (struct xenbus_device *)bend->hdev_data;
|
|
+ VPRINTK("Watch matched, got dev %p otherend %p\n",
|
|
+ dev, dev->otherend);
|
|
+ /*
|
|
+ * dev->otherend != NULL check to protect against
|
|
+ * watch firing when domain goes away and we haven't
|
|
+ * yet cleaned up
|
|
+ */
|
|
+ if (!dev->otherend ||
|
|
+ !xenbus_exists(XBT_NIL, watch->node, "") ||
|
|
+ strncmp(dev->otherend, vec[XS_WATCH_PATH],
|
|
+ strlen(dev->otherend))) {
|
|
+ DPRINTK("Ignoring watch as otherend seems invalid\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+
|
|
+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
|
|
+ &state);
|
|
+ netback_accel_frontend_changed(dev, state);
|
|
+
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Setup watch on frontend's accelstate */
|
|
+static int setup_domu_accel_watch(struct xenbus_device *dev,
|
|
+ struct netback_accel *bend)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ VPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
|
|
+ &bend->domu_accel_watch,
|
|
+ bend_domu_accel_change);
|
|
+ if (err) {
|
|
+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail;
|
|
+ }
|
|
+ return 0;
|
|
+ fail:
|
|
+ bend->domu_accel_watch.node = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_probe(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netback_accel *bend;
|
|
+ struct backend_info *binfo;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s: passed device %s\n", __FUNCTION__, dev->nodename);
|
|
+
|
|
+ /* Allocate structure to store all our state... */
|
|
+ bend = kzalloc(sizeof(struct netback_accel), GFP_KERNEL);
|
|
+ if (bend == NULL) {
|
|
+ DPRINTK("%s: no memory for bend\n", __FUNCTION__);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ mutex_init(&bend->bend_mutex);
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+
|
|
+ /* ...and store it where we can get at it */
|
|
+ binfo = (struct backend_info *) dev->dev.driver_data;
|
|
+ binfo->netback_accel_priv = bend;
|
|
+ /* And vice-versa */
|
|
+ bend->hdev_data = dev;
|
|
+
|
|
+ DPRINTK("%s: Adding bend %p to list\n", __FUNCTION__, bend);
|
|
+
|
|
+ init_waitqueue_head(&bend->state_wait_queue);
|
|
+ bend->vnic_is_setup = 0;
|
|
+ bend->frontend_state = XenbusStateUnknown;
|
|
+ bend->backend_state = XenbusStateClosed;
|
|
+ bend->removing = 0;
|
|
+
|
|
+ sscanf(dev->nodename, NODENAME_PATH_FMT, &bend->far_end,
|
|
+ &bend->vif_num);
|
|
+
|
|
+ err = read_nicname(dev, bend);
|
|
+ if (err) {
|
|
+ /*
|
|
+ * Technically not an error, just means we're not
|
|
+ * supposed to accelerate this
|
|
+ */
|
|
+ DPRINTK("failed to get device name\n");
|
|
+ goto fail_nicname;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Look up the device name in the list of NICs provided by
|
|
+ * driverlink to get the hardware type.
|
|
+ */
|
|
+ err = netback_accel_sf_hwtype(bend);
|
|
+ if (err) {
|
|
+ /*
|
|
+ * Technically not an error, just means we're not
|
|
+ * supposed to accelerate this, probably belongs to
|
|
+ * some other backend
|
|
+ */
|
|
+ DPRINTK("failed to match device name\n");
|
|
+ goto fail_init_type;
|
|
+ }
|
|
+
|
|
+ err = publish_frontend_name(dev);
|
|
+ if (err)
|
|
+ goto fail_publish;
|
|
+
|
|
+ err = netback_accel_debugfs_create(bend);
|
|
+ if (err)
|
|
+ goto fail_debugfs;
|
|
+
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+
|
|
+ err = setup_config_accel_watch(dev, bend);
|
|
+ if (err)
|
|
+ goto fail_config_watch;
|
|
+
|
|
+ err = setup_domu_accel_watch(dev, bend);
|
|
+ if (err)
|
|
+ goto fail_domu_watch;
|
|
+
|
|
+ /*
|
|
+ * Indicate to the other end that we're ready to start unless
|
|
+ * the watch has already fired.
|
|
+ */
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+ if (bend->backend_state == XenbusStateClosed) {
|
|
+ bend->backend_state = XenbusStateInitialising;
|
|
+ net_accel_update_state(dev, XenbusStateInitialising);
|
|
+ }
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+
|
|
+ mutex_lock(&bend_list_mutex);
|
|
+ link_bend(bend);
|
|
+ mutex_unlock(&bend_list_mutex);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_domu_watch:
|
|
+
|
|
+ unregister_xenbus_watch(&bend->config_accel_watch);
|
|
+ kfree(bend->config_accel_watch.node);
|
|
+fail_config_watch:
|
|
+
|
|
+ /*
|
|
+ * Flush the scheduled work queue before freeing bend to get
|
|
+ * rid of any pending netback_accel_msg_rx_handler()
|
|
+ */
|
|
+ flush_scheduled_work();
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+ net_accel_update_state(dev, XenbusStateUnknown);
|
|
+ netback_accel_debugfs_remove(bend);
|
|
+fail_debugfs:
|
|
+
|
|
+ unpublish_frontend_name(dev);
|
|
+fail_publish:
|
|
+
|
|
+ /* No need to reverse netback_accel_sf_hwtype. */
|
|
+fail_init_type:
|
|
+
|
|
+ kfree(bend->nicname);
|
|
+fail_nicname:
|
|
+ binfo->netback_accel_priv = NULL;
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+ kfree(bend);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+int netback_accel_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *binfo;
|
|
+ struct netback_accel *bend;
|
|
+ int frontend_state;
|
|
+
|
|
+ binfo = (struct backend_info *) dev->dev.driver_data;
|
|
+ bend = (struct netback_accel *) binfo->netback_accel_priv;
|
|
+
|
|
+ DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
|
|
+
|
|
+ BUG_ON(bend == NULL);
|
|
+
|
|
+ mutex_lock(&bend_list_mutex);
|
|
+ unlink_bend(bend);
|
|
+ mutex_unlock(&bend_list_mutex);
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+
|
|
+ /* Reject any requests to connect. */
|
|
+ bend->removing = 1;
|
|
+
|
|
+ /*
|
|
+ * Switch to closing to tell the other end that we're going
|
|
+ * away.
|
|
+ */
|
|
+ if (bend->backend_state != XenbusStateClosing) {
|
|
+ bend->backend_state = XenbusStateClosing;
|
|
+ net_accel_update_state(dev, XenbusStateClosing);
|
|
+ }
|
|
+
|
|
+ frontend_state = (int)XenbusStateUnknown;
|
|
+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d",
|
|
+ &frontend_state);
|
|
+
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+
|
|
+ /*
|
|
+ * Wait until this end goes to the closed state. This happens
|
|
+ * in response to the other end going to the closed state.
|
|
+ * Don't bother doing this if the other end is already closed
|
|
+ * because if it is then there is nothing to do.
|
|
+ */
|
|
+ if (frontend_state != (int)XenbusStateClosed &&
|
|
+ frontend_state != (int)XenbusStateUnknown)
|
|
+ wait_event(bend->state_wait_queue,
|
|
+ bend->backend_state == XenbusStateClosed);
|
|
+
|
|
+ unregister_xenbus_watch(&bend->domu_accel_watch);
|
|
+ kfree(bend->domu_accel_watch.node);
|
|
+
|
|
+ unregister_xenbus_watch(&bend->config_accel_watch);
|
|
+ kfree(bend->config_accel_watch.node);
|
|
+
|
|
+ /*
|
|
+ * Flush the scheduled work queue before freeing bend to get
|
|
+ * rid of any pending netback_accel_msg_rx_handler()
|
|
+ */
|
|
+ flush_scheduled_work();
|
|
+
|
|
+ mutex_lock(&bend->bend_mutex);
|
|
+
|
|
+ /* Tear down the vnic if it was set up. */
|
|
+ if (bend->vnic_is_setup) {
|
|
+ bend->vnic_is_setup = 0;
|
|
+ cleanup_vnic(bend);
|
|
+ }
|
|
+
|
|
+ bend->backend_state = XenbusStateUnknown;
|
|
+ net_accel_update_state(dev, XenbusStateUnknown);
|
|
+
|
|
+ netback_accel_debugfs_remove(bend);
|
|
+
|
|
+ unpublish_frontend_name(dev);
|
|
+
|
|
+ kfree(bend->nicname);
|
|
+
|
|
+ binfo->netback_accel_priv = NULL;
|
|
+
|
|
+ mutex_unlock(&bend->bend_mutex);
|
|
+
|
|
+ kfree(bend);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_shutdown_bends(void)
|
|
+{
|
|
+ mutex_lock(&bend_list_mutex);
|
|
+ /*
|
|
+ * I think we should have had a remove callback for all
|
|
+ * interfaces before being allowed to unload the module
|
|
+ */
|
|
+ BUG_ON(bend_list != NULL);
|
|
+ mutex_unlock(&bend_list_mutex);
|
|
+}
|
|
+
|
|
+
|
|
+void netback_accel_set_closing(struct netback_accel *bend)
|
|
+{
|
|
+
|
|
+ bend->backend_state = XenbusStateClosing;
|
|
+ net_accel_update_state((struct xenbus_device *)bend->hdev_data,
|
|
+ XenbusStateClosing);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,53 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Compatability layer. Provides definitions of fundamental
|
|
+ * types and definitions that are used throughout CI source
|
|
+ * code. It does not introduce any link time dependencies,
|
|
+ * or include any unnecessary system headers.
|
|
+ */
|
|
+/*! \cidoxg_include_ci */
|
|
+
|
|
+#ifndef __CI_COMPAT_H__
|
|
+#define __CI_COMPAT_H__
|
|
+
|
|
+#ifdef __cplusplus
|
|
+extern "C" {
|
|
+#endif
|
|
+
|
|
+#include <ci/compat/primitive.h>
|
|
+#include <ci/compat/sysdep.h>
|
|
+#include <ci/compat/utils.h>
|
|
+
|
|
+
|
|
+#ifdef __cplusplus
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif /* __CI_COMPAT_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/gcc.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,158 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_GCC_H__
|
|
+#define __CI_COMPAT_GCC_H__
|
|
+
|
|
+
|
|
+#define CI_HAVE_INT64
|
|
+
|
|
+
|
|
+#if defined(__linux__) && defined(__KERNEL__)
|
|
+
|
|
+# include <linux/types.h>
|
|
+
|
|
+typedef __u64 ci_uint64;
|
|
+typedef __s64 ci_int64;
|
|
+# if BITS_PER_LONG == 32
|
|
+typedef __s32 ci_ptr_arith_t;
|
|
+typedef __u32 ci_uintptr_t;
|
|
+# else
|
|
+typedef __s64 ci_ptr_arith_t;
|
|
+typedef __u64 ci_uintptr_t;
|
|
+# endif
|
|
+
|
|
+
|
|
+/* it's not obvious to me why the below is wrong for x64_64, but
|
|
+ * gcc seems to complain on this platform
|
|
+ */
|
|
+# if defined(__ia64__)
|
|
+# define CI_PRId64 "ld"
|
|
+# define CI_PRIi64 "li"
|
|
+# define CI_PRIo64 "lo"
|
|
+# define CI_PRIu64 "lu"
|
|
+# define CI_PRIx64 "lx"
|
|
+# define CI_PRIX64 "lX"
|
|
+# else
|
|
+# define CI_PRId64 "lld"
|
|
+# define CI_PRIi64 "lli"
|
|
+# define CI_PRIo64 "llo"
|
|
+# define CI_PRIu64 "llu"
|
|
+# define CI_PRIx64 "llx"
|
|
+# define CI_PRIX64 "llX"
|
|
+# endif
|
|
+
|
|
+# define CI_PRId32 "d"
|
|
+# define CI_PRIi32 "i"
|
|
+# define CI_PRIo32 "o"
|
|
+# define CI_PRIu32 "u"
|
|
+# define CI_PRIx32 "x"
|
|
+# define CI_PRIX32 "X"
|
|
+
|
|
+#else
|
|
+
|
|
+# include <stdint.h>
|
|
+# include <inttypes.h>
|
|
+
|
|
+typedef uint64_t ci_uint64;
|
|
+typedef int64_t ci_int64;
|
|
+typedef intptr_t ci_ptr_arith_t;
|
|
+typedef uintptr_t ci_uintptr_t;
|
|
+
|
|
+# define CI_PRId64 PRId64
|
|
+# define CI_PRIi64 PRIi64
|
|
+# define CI_PRIo64 PRIo64
|
|
+# define CI_PRIu64 PRIu64
|
|
+# define CI_PRIx64 PRIx64
|
|
+# define CI_PRIX64 PRIX64
|
|
+
|
|
+# define CI_PRId32 PRId32
|
|
+# define CI_PRIi32 PRIi32
|
|
+# define CI_PRIo32 PRIo32
|
|
+# define CI_PRIu32 PRIu32
|
|
+# define CI_PRIx32 PRIx32
|
|
+# define CI_PRIX32 PRIX32
|
|
+
|
|
+#endif
|
|
+
|
|
+
|
|
+typedef ci_uint64 ci_fixed_descriptor_t;
|
|
+
|
|
+#define from_fixed_descriptor(desc) ((ci_uintptr_t)(desc))
|
|
+#define to_fixed_descriptor(desc) ((ci_fixed_descriptor_t)(ci_uintptr_t)(desc))
|
|
+
|
|
+
|
|
+#if __GNUC__ >= 3 && !defined(__cplusplus)
|
|
+/*
|
|
+** Checks that [p_mbr] has the same type as [&c_type::mbr_name].
|
|
+*/
|
|
+# define CI_CONTAINER(c_type, mbr_name, p_mbr) \
|
|
+ __builtin_choose_expr( \
|
|
+ __builtin_types_compatible_p(__typeof__(&((c_type*)0)->mbr_name), \
|
|
+ __typeof__(p_mbr)), \
|
|
+ __CI_CONTAINER(c_type, mbr_name, p_mbr), (void)0)
|
|
+
|
|
+# define ci_restrict __restrict__
|
|
+#endif
|
|
+
|
|
+
|
|
+#if !defined(__KERNEL__) || defined(__unix__)
|
|
+#define CI_HAVE_NPRINTF 1
|
|
+#endif
|
|
+
|
|
+
|
|
+/* At what version was this introduced? */
|
|
+#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
|
|
+# define CI_LIKELY(t) __builtin_expect((t), 1)
|
|
+# define CI_UNLIKELY(t) __builtin_expect((t), 0)
|
|
+#endif
|
|
+
|
|
+/**********************************************************************
|
|
+ * Attributes
|
|
+ */
|
|
+#if __GNUC__ >= 3 && defined(NDEBUG)
|
|
+# define CI_HF __attribute__((visibility("hidden")))
|
|
+# define CI_HV __attribute__((visibility("hidden")))
|
|
+#else
|
|
+# define CI_HF
|
|
+# define CI_HV
|
|
+#endif
|
|
+
|
|
+#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
|
|
+# define ci_noinline static __attribute__((__noinline__))
|
|
+/* (Linux 2.6 defines its own "noinline", so we use the "__noinline__" form) */
|
|
+#else
|
|
+# define ci_noinline static
|
|
+#endif
|
|
+
|
|
+#define CI_ALIGN(x) __attribute__ ((aligned (x)))
|
|
+
|
|
+#define CI_PRINTF_LIKE(a,b) __attribute__((format(printf,a,b)))
|
|
+
|
|
+#endif /* __CI_COMPAT_GCC_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/gcc_x86.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,115 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_GCC_X86_H__
|
|
+#define __CI_COMPAT_GCC_X86_H__
|
|
+
|
|
+/*
|
|
+** The facts:
|
|
+**
|
|
+** SSE sfence
|
|
+** SSE2 lfence, mfence, pause
|
|
+*/
|
|
+
|
|
+/*
|
|
+ Barriers to enforce ordering with respect to:
|
|
+
|
|
+ normal memory use: ci_wmb, ci_rmb, ci_wmb
|
|
+ IO bus access use: ci_wiob, ci_riob, ci_iob
|
|
+*/
|
|
+#if defined(__x86_64__)
|
|
+# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%rsp)":::"memory")
|
|
+#else
|
|
+# define ci_x86_mb() __asm__ __volatile__ ("lock; addl $0,0(%%esp)":::"memory")
|
|
+#endif
|
|
+
|
|
+/* ?? measure the impact of latency of sfence on a modern processor before we
|
|
+ take a decision on how to integrate with respect to writecombining */
|
|
+
|
|
+/* DJR: I don't think we need to add "memory" here. It means the asm does
|
|
+** something to memory that GCC doesn't understand. But all this does is
|
|
+** commit changes that GCC thinks have already happened. NB. GCC will not
|
|
+** reorder across a __volatile__ __asm__ anyway.
|
|
+*/
|
|
+#define ci_gcc_fence() __asm__ __volatile__ ("")
|
|
+
|
|
+#if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)
|
|
+# define ci_x86_sfence() __asm__ __volatile__ ("sfence")
|
|
+# define ci_x86_lfence() __asm__ __volatile__ ("lfence")
|
|
+# define ci_x86_mfence() __asm__ __volatile__ ("mfence")
|
|
+#else
|
|
+# define ci_x86_sfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8")
|
|
+# define ci_x86_lfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xE8")
|
|
+# define ci_x86_mfence() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF0")
|
|
+#endif
|
|
+
|
|
+
|
|
+/* x86 processors to P4 Xeon store in-order unless executing streaming
|
|
+ extensions or when using writecombining
|
|
+
|
|
+ Hence we do not define ci_wmb to use sfence by default. Requirement is that
|
|
+ we do not use writecombining to memory and any code which uses SSE
|
|
+ extensions must call sfence directly
|
|
+
|
|
+ We need to track non intel clones which may support out of order store.
|
|
+
|
|
+*/
|
|
+
|
|
+#if CI_CPU_OOS
|
|
+# if CI_CPU_HAS_SSE
|
|
+# define ci_wmb() ci_x86_sfence()
|
|
+# else
|
|
+# define ci_wmb() ci_x86_mb()
|
|
+# endif
|
|
+#else
|
|
+# define ci_wmb() ci_gcc_fence()
|
|
+#endif
|
|
+
|
|
+#if CI_CPU_HAS_SSE2
|
|
+# define ci_rmb() ci_x86_lfence()
|
|
+# define ci_mb() ci_x86_mfence()
|
|
+# define ci_riob() ci_x86_lfence()
|
|
+# define ci_wiob() ci_x86_sfence()
|
|
+# define ci_iob() ci_x86_mfence()
|
|
+#else
|
|
+# if CI_CPU_HAS_SSE
|
|
+# define ci_wiob() ci_x86_sfence()
|
|
+# else
|
|
+# define ci_wiob() ci_x86_mb()
|
|
+# endif
|
|
+# define ci_rmb() ci_x86_mb()
|
|
+# define ci_mb() ci_x86_mb()
|
|
+# define ci_riob() ci_x86_mb()
|
|
+# define ci_iob() ci_x86_mb()
|
|
+#endif
|
|
+
|
|
+typedef unsigned long ci_phys_addr_t;
|
|
+#define ci_phys_addr_fmt "%lx"
|
|
+
|
|
+#endif /* __CI_COMPAT_GCC_X86_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/primitive.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,77 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_PRIMITIVE_H__
|
|
+#define __CI_COMPAT_PRIMITIVE_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Primitive types.
|
|
+ */
|
|
+
|
|
+typedef unsigned char ci_uint8;
|
|
+typedef char ci_int8;
|
|
+
|
|
+typedef unsigned short ci_uint16;
|
|
+typedef short ci_int16;
|
|
+
|
|
+typedef unsigned int ci_uint32;
|
|
+typedef int ci_int32;
|
|
+
|
|
+/* 64-bit support is platform dependent. */
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Other fancy types.
|
|
+ */
|
|
+
|
|
+typedef ci_uint8 ci_octet;
|
|
+
|
|
+typedef enum {
|
|
+ CI_FALSE = 0,
|
|
+ CI_TRUE
|
|
+} ci_boolean_t;
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Some nice types you'd always assumed were standards.
|
|
+ * (Really, they are SYSV "standards".)
|
|
+ */
|
|
+
|
|
+#ifdef _WIN32
|
|
+typedef unsigned long ulong;
|
|
+typedef unsigned int uint;
|
|
+typedef char* caddr_t;
|
|
+#elif defined(__linux__) && defined(__KERNEL__)
|
|
+#include <linux/types.h>
|
|
+#elif defined(__linux__)
|
|
+#include <sys/types.h>
|
|
+#endif
|
|
+
|
|
+
|
|
+#endif /* __CI_COMPAT_PRIMITIVE_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/sysdep.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,166 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_SYSDEP_H__
|
|
+#define __CI_COMPAT_SYSDEP_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Platform definition fixups.
|
|
+ */
|
|
+
|
|
+#if defined(__ci_ul_driver__) && !defined(__ci_driver__)
|
|
+# define __ci_driver__
|
|
+#endif
|
|
+
|
|
+#if defined(__ci_driver__) && !defined(__ci_ul_driver__) && \
|
|
+ !defined(__KERNEL__)
|
|
+# define __KERNEL__
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Sanity checks (no cheating!)
|
|
+ */
|
|
+
|
|
+#if defined(__KERNEL__) && !defined(__ci_driver__)
|
|
+# error Insane.
|
|
+#endif
|
|
+
|
|
+#if defined(__KERNEL__) && defined(__ci_ul_driver__)
|
|
+# error Madness.
|
|
+#endif
|
|
+
|
|
+#if defined(__unix__) && defined(_WIN32)
|
|
+# error Strange.
|
|
+#endif
|
|
+
|
|
+#if defined(__GNUC__) && defined(_MSC_VER)
|
|
+# error Crazy.
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Compiler and processor dependencies.
|
|
+ */
|
|
+
|
|
+#if defined(__GNUC__)
|
|
+
|
|
+# include <ci/compat/gcc.h>
|
|
+
|
|
+# if defined(__i386__)
|
|
+# include <ci/compat/x86.h>
|
|
+# include <ci/compat/gcc_x86.h>
|
|
+# elif defined(__x86_64__)
|
|
+# include <ci/compat/x86_64.h>
|
|
+# include <ci/compat/gcc_x86.h>
|
|
+# elif defined(__PPC__)
|
|
+# include <ci/compat/ppc.h>
|
|
+# include <ci/compat/gcc_ppc.h>
|
|
+# elif defined(__ia64__)
|
|
+# include <ci/compat/ia64.h>
|
|
+# include <ci/compat/gcc_ia64.h>
|
|
+# else
|
|
+# error Unknown processor - GNU C
|
|
+# endif
|
|
+
|
|
+#elif defined(_MSC_VER)
|
|
+
|
|
+# include <ci/compat/msvc.h>
|
|
+
|
|
+# if defined(__i386__)
|
|
+# include <ci/compat/x86.h>
|
|
+# include <ci/compat/msvc_x86.h>
|
|
+# elif defined(__x86_64__)
|
|
+# include <ci/compat/x86_64.h>
|
|
+# include <ci/compat/msvc_x86_64.h>
|
|
+# else
|
|
+# error Unknown processor MSC
|
|
+# endif
|
|
+
|
|
+#elif defined(__PGI)
|
|
+
|
|
+# include <ci/compat/x86.h>
|
|
+# include <ci/compat/pg_x86.h>
|
|
+
|
|
+#elif defined(__INTEL_COMPILER)
|
|
+
|
|
+/* Intel compilers v7 claim to be very gcc compatible. */
|
|
+# if __INTEL_COMPILER >= 700
|
|
+# include <ci/compat/gcc.h>
|
|
+# include <ci/compat/x86.h>
|
|
+# include <ci/compat/gcc_x86.h>
|
|
+# else
|
|
+# error Old Intel compiler not supported. Yet.
|
|
+# endif
|
|
+
|
|
+#else
|
|
+# error Unknown compiler.
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Misc stuff (that probably shouldn't be here).
|
|
+ */
|
|
+
|
|
+#ifdef __sun
|
|
+# ifdef __KERNEL__
|
|
+# define _KERNEL
|
|
+# define _SYSCALL32
|
|
+# ifdef _LP64
|
|
+# define _SYSCALL32_IMPL
|
|
+# endif
|
|
+# else
|
|
+# define _REENTRANT
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Defaults for anything left undefined.
|
|
+ */
|
|
+
|
|
+#ifndef CI_LIKELY
|
|
+# define CI_LIKELY(t) (t)
|
|
+# define CI_UNLIKELY(t) (t)
|
|
+#endif
|
|
+
|
|
+#ifndef ci_restrict
|
|
+# define ci_restrict
|
|
+#endif
|
|
+
|
|
+#ifndef ci_inline
|
|
+# define ci_inline static inline
|
|
+#endif
|
|
+
|
|
+#ifndef ci_noinline
|
|
+# define ci_noinline static
|
|
+#endif
|
|
+
|
|
+#endif /* __CI_COMPAT_SYSDEP_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/utils.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,269 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Handy utility macros.
|
|
+ * \date 2003/01/17
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_UTILS_H__
|
|
+#define __CI_COMPAT_UTILS_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Alignment -- [align] must be a power of 2.
|
|
+ **********************************************************************/
|
|
+
|
|
+ /*! Align forward onto next boundary. */
|
|
+
|
|
+#define CI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
|
|
+
|
|
+
|
|
+ /*! Align back onto prev boundary. */
|
|
+
|
|
+#define CI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
|
|
+
|
|
+
|
|
+ /*! How far to next boundary? */
|
|
+
|
|
+#define CI_ALIGN_NEEDED(p, align, signed_t) (-(signed_t)(p) & ((align)-1u))
|
|
+
|
|
+
|
|
+ /*! How far beyond prev boundary? */
|
|
+
|
|
+#define CI_OFFSET(p, align) ((p) & ((align)-1u))
|
|
+
|
|
+
|
|
+ /*! Does object fit in gap before next boundary? */
|
|
+
|
|
+#define CI_FITS(p, size, align, signed_t) \
|
|
+ (CI_ALIGN_NEEDED((p) + 1, (align), signed_t) + 1 >= (size))
|
|
+
|
|
+
|
|
+ /*! Align forward onto next boundary. */
|
|
+
|
|
+#define CI_PTR_ALIGN_FWD(p, align) \
|
|
+ ((char*) CI_ALIGN_FWD(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
|
|
+
|
|
+ /*! Align back onto prev boundary. */
|
|
+
|
|
+#define CI_PTR_ALIGN_BACK(p, align) \
|
|
+ ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))
|
|
+
|
|
+ /*! How far to next boundary? */
|
|
+
|
|
+#define CI_PTR_ALIGN_NEEDED(p, align) \
|
|
+ CI_ALIGN_NEEDED(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)), \
|
|
+ ci_ptr_arith_t)
|
|
+
|
|
+ /*! How far to next boundary? NZ = not zero i.e. give align if on boundary */
|
|
+
|
|
+#define CI_PTR_ALIGN_NEEDED_NZ(p, align) \
|
|
+ ((align) - (((char*)p) - \
|
|
+ ((char*) CI_ALIGN_BACK(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align))))))
|
|
+
|
|
+ /*! How far beyond prev boundary? */
|
|
+
|
|
+#define CI_PTR_OFFSET(p, align) \
|
|
+ CI_OFFSET(((ci_ptr_arith_t)(p)), ((ci_ptr_arith_t)(align)))
|
|
+
|
|
+
|
|
+ /* Same as CI_ALIGN_FWD and CI_ALIGN_BACK. */
|
|
+
|
|
+#define CI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
|
|
+
|
|
+#define CI_ROUND_DOWN(i, align) ((i) & ~((align)-1u))
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Byte-order
|
|
+ **********************************************************************/
|
|
+
|
|
+/* These are not flags. They are enumeration values for use with
|
|
+ * CI_MY_BYTE_ORDER. */
|
|
+#define CI_BIG_ENDIAN 1
|
|
+#define CI_LITTLE_ENDIAN 0
|
|
+
|
|
+/*
|
|
+** Note that these byte-swapping primitives may leave junk in bits above
|
|
+** the range they operate on.
|
|
+**
|
|
+** The CI_BSWAP_nn() routines require that bits above [nn] are zero. Use
|
|
+** CI_BSWAPM_nn(x) if this cannot be guaranteed.
|
|
+*/
|
|
+
|
|
+/* ?? May be able to improve on some of these with inline assembler on some
|
|
+** platforms.
|
|
+*/
|
|
+
|
|
+#define CI_BSWAP_16(v) ((((v) & 0xff) << 8) | ((v) >> 8))
|
|
+#define CI_BSWAPM_16(v) ((((v) & 0xff) << 8) | (((v) & 0xff00) >> 8))
|
|
+
|
|
+#define CI_BSWAP_32(v) (((v) >> 24) | \
|
|
+ (((v) & 0x00ff0000) >> 8) | \
|
|
+ (((v) & 0x0000ff00) << 8) | \
|
|
+ ((v) << 24))
|
|
+#define CI_BSWAPM_32(v) ((((v) & 0xff000000) >> 24) | \
|
|
+ (((v) & 0x00ff0000) >> 8) | \
|
|
+ (((v) & 0x0000ff00) << 8) | \
|
|
+ ((v) << 24))
|
|
+
|
|
+#define CI_BSWAP_64(v) (((v) >> 56) | \
|
|
+ (((v) & 0x00ff000000000000) >> 40) | \
|
|
+ (((v) & 0x0000ff0000000000) >> 24) | \
|
|
+ (((v) & 0x000000ff00000000) >> 8) | \
|
|
+ (((v) & 0x00000000ff000000) << 8) | \
|
|
+ (((v) & 0x0000000000ff0000) << 24) | \
|
|
+ (((v) & 0x000000000000ff00) << 40) | \
|
|
+ ((v) << 56))
|
|
+
|
|
+# define CI_BSWAPPED_16_IF(c,v) ((c) ? CI_BSWAP_16(v) : (v))
|
|
+# define CI_BSWAPPED_32_IF(c,v) ((c) ? CI_BSWAP_32(v) : (v))
|
|
+# define CI_BSWAPPED_64_IF(c,v) ((c) ? CI_BSWAP_64(v) : (v))
|
|
+# define CI_BSWAP_16_IF(c,v) do{ if((c)) (v) = CI_BSWAP_16(v); }while(0)
|
|
+# define CI_BSWAP_32_IF(c,v) do{ if((c)) (v) = CI_BSWAP_32(v); }while(0)
|
|
+# define CI_BSWAP_64_IF(c,v) do{ if((c)) (v) = CI_BSWAP_64(v); }while(0)
|
|
+
|
|
+#if (CI_MY_BYTE_ORDER == CI_LITTLE_ENDIAN)
|
|
+# define CI_BSWAP_LE16(v) (v)
|
|
+# define CI_BSWAP_LE32(v) (v)
|
|
+# define CI_BSWAP_LE64(v) (v)
|
|
+# define CI_BSWAP_BE16(v) CI_BSWAP_16(v)
|
|
+# define CI_BSWAP_BE32(v) CI_BSWAP_32(v)
|
|
+# define CI_BSWAP_BE64(v) CI_BSWAP_64(v)
|
|
+# define CI_BSWAPM_LE16(v) (v)
|
|
+# define CI_BSWAPM_LE32(v) (v)
|
|
+# define CI_BSWAPM_LE64(v) (v)
|
|
+# define CI_BSWAPM_BE16(v) CI_BSWAPM_16(v)
|
|
+# define CI_BSWAPM_BE32(v) CI_BSWAPM_32(v)
|
|
+#elif (CI_MY_BYTE_ORDER == CI_BIG_ENDIAN)
|
|
+# define CI_BSWAP_BE16(v) (v)
|
|
+# define CI_BSWAP_BE32(v) (v)
|
|
+# define CI_BSWAP_BE64(v) (v)
|
|
+# define CI_BSWAP_LE16(v) CI_BSWAP_16(v)
|
|
+# define CI_BSWAP_LE32(v) CI_BSWAP_32(v)
|
|
+# define CI_BSWAP_LE64(v) CI_BSWAP_64(v)
|
|
+# define CI_BSWAPM_BE16(v) (v)
|
|
+# define CI_BSWAPM_BE32(v) (v)
|
|
+# define CI_BSWAPM_BE64(v) (v)
|
|
+# define CI_BSWAPM_LE16(v) CI_BSWAPM_16(v)
|
|
+# define CI_BSWAPM_LE32(v) CI_BSWAPM_32(v)
|
|
+#else
|
|
+# error Bad endian.
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Get pointer to struct from pointer to member
|
|
+ **********************************************************************/
|
|
+
|
|
+#define CI_MEMBER_OFFSET(c_type, mbr_name) \
|
|
+ ((ci_uint32) (ci_uintptr_t)(&((c_type*)0)->mbr_name))
|
|
+
|
|
+#define CI_MEMBER_SIZE(c_type, mbr_name) \
|
|
+ sizeof(((c_type*)0)->mbr_name)
|
|
+
|
|
+#define __CI_CONTAINER(c_type, mbr_name, p_mbr) \
|
|
+ ( (c_type*) ((char*)(p_mbr) - CI_MEMBER_OFFSET(c_type, mbr_name)) )
|
|
+
|
|
+#ifndef CI_CONTAINER
|
|
+# define CI_CONTAINER(t,m,p) __CI_CONTAINER(t,m,p)
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Structure member initialiser.
|
|
+ **********************************************************************/
|
|
+
|
|
+#ifndef CI_STRUCT_MBR
|
|
+# define CI_STRUCT_MBR(name, val) .name = val
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * min / max
|
|
+ **********************************************************************/
|
|
+
|
|
+#define CI_MIN(x,y) (((x) < (y)) ? (x) : (y))
|
|
+#define CI_MAX(x,y) (((x) > (y)) ? (x) : (y))
|
|
+
|
|
+/**********************************************************************
|
|
+ * abs
|
|
+ **********************************************************************/
|
|
+
|
|
+#define CI_ABS(x) (((x) < 0) ? -(x) : (x))
|
|
+
|
|
+/**********************************************************************
|
|
+ * Conditional debugging
|
|
+ **********************************************************************/
|
|
+
|
|
+#ifdef NDEBUG
|
|
+# define CI_DEBUG(x)
|
|
+# define CI_NDEBUG(x) x
|
|
+# define CI_IF_DEBUG(y,n) (n)
|
|
+# define CI_DEBUG_ARG(x)
|
|
+#else
|
|
+# define CI_DEBUG(x) x
|
|
+# define CI_NDEBUG(x)
|
|
+# define CI_IF_DEBUG(y,n) (y)
|
|
+# define CI_DEBUG_ARG(x) ,x
|
|
+#endif
|
|
+
|
|
+#ifdef __KERNEL__
|
|
+#define CI_KERNEL_ARG(x) ,x
|
|
+#else
|
|
+#define CI_KERNEL_ARG(x)
|
|
+#endif
|
|
+
|
|
+#ifdef _WIN32
|
|
+# define CI_KERNEL_ARG_WIN(x) CI_KERNEL_ARG(x)
|
|
+# define CI_ARG_WIN(x) ,x
|
|
+#else
|
|
+# define CI_KERNEL_ARG_WIN(x)
|
|
+# define CI_ARG_WIN(x)
|
|
+#endif
|
|
+
|
|
+#ifdef __unix__
|
|
+# define CI_KERNEL_ARG_UNIX(x) CI_KERNEL_ARG(x)
|
|
+# define CI_ARG_UNIX(x) ,x
|
|
+#else
|
|
+# define CI_KERNEL_ARG_UNIX(x)
|
|
+# define CI_ARG_UNIX(x)
|
|
+#endif
|
|
+
|
|
+#ifdef __linux__
|
|
+# define CI_KERNEL_ARG_LINUX(x) CI_KERNEL_ARG(x)
|
|
+# define CI_ARG_LINUX(x) ,x
|
|
+#else
|
|
+# define CI_KERNEL_ARG_LINUX(x)
|
|
+# define CI_ARG_LINUX(x)
|
|
+#endif
|
|
+
|
|
+
|
|
+#endif /* __CI_COMPAT_UTILS_H__ */
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/x86.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,48 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+
|
|
+#ifndef __CI_COMPAT_X86_H__
|
|
+#define __CI_COMPAT_X86_H__
|
|
+
|
|
+
|
|
+#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
|
|
+
|
|
+#define CI_WORD_SIZE 4
|
|
+#define CI_PTR_SIZE 4
|
|
+
|
|
+#define CI_PAGE_SIZE 4096
|
|
+#define CI_PAGE_SHIFT 12
|
|
+#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
|
|
+
|
|
+#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
|
|
+#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
|
|
+#define CI_CPU_OOS 0 /* CPU does out of order stores */
|
|
+
|
|
+
|
|
+#endif /* __CI_COMPAT_X86_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/compat/x86_64.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,54 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Arch stuff for AMD x86_64.
|
|
+ * \date 2004/08/17
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_compat */
|
|
+#ifndef __CI_COMPAT_X86_64_H__
|
|
+#define __CI_COMPAT_X86_64_H__
|
|
+
|
|
+
|
|
+#define CI_MY_BYTE_ORDER CI_LITTLE_ENDIAN
|
|
+
|
|
+#define CI_WORD_SIZE 8
|
|
+#define CI_PTR_SIZE 8
|
|
+
|
|
+#define CI_PAGE_SIZE 4096
|
|
+#define CI_PAGE_SHIFT 12
|
|
+#define CI_PAGE_MASK (~(CI_PAGE_SIZE - 1))
|
|
+
|
|
+#define CI_CPU_HAS_SSE 1 /* SSE extensions supported */
|
|
+
|
|
+/* SSE2 disabled while investigating BUG1060 */
|
|
+#define CI_CPU_HAS_SSE2 0 /* SSE2 extensions supported */
|
|
+#define CI_CPU_OOS 0 /* CPU does out of order stores */
|
|
+
|
|
+
|
|
+#endif /* __CI_COMPAT_X86_64_H__ */
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/config.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,49 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_tools */
|
|
+
|
|
+#ifndef __CI_TOOLS_CONFIG_H__
|
|
+#define __CI_TOOLS_CONFIG_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Debugging.
|
|
+ */
|
|
+
|
|
+#define CI_INCLUDE_ASSERT_VALID 0
|
|
+
|
|
+/* Set non-zero to allow info about who has allocated what to appear in
|
|
+ * /proc/drivers/level5/mem.
|
|
+ * However - Note that doing so can lead to segfault when you unload the
|
|
+ * driver, and other weirdness. i.e. I don't think the code for is quite
|
|
+ * right (written by Oktet, hacked by gel), but it does work well enough to be
|
|
+ * useful.
|
|
+ */
|
|
+#define CI_MEMLEAK_DEBUG_ALLOC_TABLE 0
|
|
+
|
|
+
|
|
+#endif /* __CI_TOOLS_CONFIG_H__ */
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/debug.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,336 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_tools */
|
|
+
|
|
+#ifndef __CI_TOOLS_DEBUG_H__
|
|
+#define __CI_TOOLS_DEBUG_H__
|
|
+
|
|
+#define CI_LOG_E(x) x /* errors */
|
|
+#define CI_LOG_W(x) x /* warnings */
|
|
+#define CI_LOG_I(x) x /* information */
|
|
+#define CI_LOG_V(x) x /* verbose */
|
|
+
|
|
+/* Build time asserts. We paste the line number into the type name
|
|
+ * so that the macro can be used more than once per file even if the
|
|
+ * compiler objects to multiple identical typedefs. Collisions
|
|
+ * between use in different header files is still possible. */
|
|
+#ifndef CI_BUILD_ASSERT
|
|
+#define __CI_BUILD_ASSERT_NAME(_x) __CI_BUILD_ASSERT_ILOATHECPP(_x)
|
|
+#define __CI_BUILD_ASSERT_ILOATHECPP(_x) __CI_BUILD_ASSERT__ ##_x
|
|
+#define CI_BUILD_ASSERT(e)\
|
|
+ typedef char __CI_BUILD_ASSERT_NAME(__LINE__)[(e)?1:-1]
|
|
+#endif
|
|
+
|
|
+
|
|
+#ifdef NDEBUG
|
|
+
|
|
+# define _ci_check(exp, file, line)
|
|
+# define _ci_assert2(e, x, y, file, line)
|
|
+# define _ci_assert(exp, file, line)
|
|
+# define _ci_assert_equal(exp1, exp2, file, line)
|
|
+# define _ci_assert_equiv(exp1, exp2, file, line)
|
|
+# define _ci_assert_nequal(exp1, exp2, file, line)
|
|
+# define _ci_assert_le(exp1, exp2, file, line)
|
|
+# define _ci_assert_lt(exp1, exp2, file, line)
|
|
+# define _ci_assert_ge(exp1, exp2, file, line)
|
|
+# define _ci_assert_gt(exp1, exp2, file, line)
|
|
+# define _ci_assert_impl(exp1, exp2, file, line)
|
|
+
|
|
+# define _ci_verify(exp, file, line) \
|
|
+ do { \
|
|
+ (void)(exp); \
|
|
+ } while (0)
|
|
+
|
|
+# define CI_DEBUG_TRY(exp) \
|
|
+ do { \
|
|
+ (void)(exp); \
|
|
+ } while (0)
|
|
+
|
|
+#define CI_TRACE(exp,fmt)
|
|
+#define CI_TRACE_INT(integer)
|
|
+#define CI_TRACE_INT32(integer)
|
|
+#define CI_TRACE_INT64(integer)
|
|
+#define CI_TRACE_UINT(integer)
|
|
+#define CI_TRACE_UINT32(integer)
|
|
+#define CI_TRACE_UINT64(integer)
|
|
+#define CI_TRACE_HEX(integer)
|
|
+#define CI_TRACE_HEX32(integer)
|
|
+#define CI_TRACE_HEX64(integer)
|
|
+#define CI_TRACE_PTR(pointer)
|
|
+#define CI_TRACE_STRING(string)
|
|
+#define CI_TRACE_MAC(mac)
|
|
+#define CI_TRACE_IP(ip_be32)
|
|
+#define CI_TRACE_ARP(arp_pkt)
|
|
+
|
|
+#else
|
|
+
|
|
+# define _CI_ASSERT_FMT "\nfrom %s:%d"
|
|
+
|
|
+# define _ci_check(exp, file, line) \
|
|
+ do { \
|
|
+ if (CI_UNLIKELY(!(exp))) \
|
|
+ ci_warn(("ci_check(%s)"_CI_ASSERT_FMT, #exp, \
|
|
+ (file), (line))); \
|
|
+ } while (0)
|
|
+
|
|
+/*
|
|
+ * NOTE: ci_fail() emits the file and line where the assert is actually
|
|
+ * coded.
|
|
+ */
|
|
+
|
|
+# define _ci_assert(exp, file, line) \
|
|
+ do { \
|
|
+ if (CI_UNLIKELY(!(exp))) \
|
|
+ ci_fail(("ci_assert(%s)"_CI_ASSERT_FMT, #exp, \
|
|
+ (file), (line))); \
|
|
+ } while (0)
|
|
+
|
|
+# define _ci_assert2(e, x, y, file, line) do { \
|
|
+ if(CI_UNLIKELY( ! (e) )) \
|
|
+ ci_fail(("ci_assert(%s)\nwhere [%s=%"CI_PRIx64"] " \
|
|
+ "[%s=%"CI_PRIx64"]\nat %s:%d\nfrom %s:%d", #e \
|
|
+ , #x, (ci_uint64)(ci_uintptr_t)(x) \
|
|
+ , #y, (ci_uint64)(ci_uintptr_t)(y), \
|
|
+ __FILE__, __LINE__, (file), (line))); \
|
|
+ } while (0)
|
|
+
|
|
+# define _ci_verify(exp, file, line) \
|
|
+ do { \
|
|
+ if (CI_UNLIKELY(!(exp))) \
|
|
+ ci_fail(("ci_verify(%s)"_CI_ASSERT_FMT, #exp, \
|
|
+ (file), (line))); \
|
|
+ } while (0)
|
|
+
|
|
+# define _ci_assert_equal(x, y, f, l) _ci_assert2((x)==(y), x, y, (f), (l))
|
|
+# define _ci_assert_nequal(x, y, f, l) _ci_assert2((x)!=(y), x, y, (f), (l))
|
|
+# define _ci_assert_le(x, y, f, l) _ci_assert2((x)<=(y), x, y, (f), (l))
|
|
+# define _ci_assert_lt(x, y, f, l) _ci_assert2((x)< (y), x, y, (f), (l))
|
|
+# define _ci_assert_ge(x, y, f, l) _ci_assert2((x)>=(y), x, y, (f), (l))
|
|
+# define _ci_assert_gt(x, y, f, l) _ci_assert2((x)> (y), x, y, (f), (l))
|
|
+# define _ci_assert_or(x, y, f, l) _ci_assert2((x)||(y), x, y, (f), (l))
|
|
+# define _ci_assert_impl(x, y, f, l) _ci_assert2(!(x) || (y), x, y, (f), (l))
|
|
+# define _ci_assert_equiv(x, y, f, l) _ci_assert2(!(x)== !(y), x, y, (f), (l))
|
|
+
|
|
+#define _ci_assert_equal_msg(exp1, exp2, msg, file, line) \
|
|
+ do { \
|
|
+ if (CI_UNLIKELY((exp1)!=(exp2))) \
|
|
+ ci_fail(("ci_assert_equal_msg(%s == %s) were " \
|
|
+ "(%"CI_PRIx64":%"CI_PRIx64") with msg[%c%c%c%c]" \
|
|
+ _CI_ASSERT_FMT, #exp1, #exp2, \
|
|
+ (ci_uint64)(ci_uintptr_t)(exp1), \
|
|
+ (ci_uint64)(ci_uintptr_t)(exp2), \
|
|
+ (((ci_uint32)msg) >> 24) && 0xff, \
|
|
+ (((ci_uint32)msg) >> 16) && 0xff, \
|
|
+ (((ci_uint32)msg) >> 8 ) && 0xff, \
|
|
+ (((ci_uint32)msg) ) && 0xff, \
|
|
+ (file), (line))); \
|
|
+ } while (0)
|
|
+
|
|
+# define CI_DEBUG_TRY(exp) CI_TRY(exp)
|
|
+
|
|
+#define CI_TRACE(exp,fmt) \
|
|
+ ci_log("%s:%d:%s] " #exp "=" fmt, \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (exp))
|
|
+
|
|
+
|
|
+#define CI_TRACE_INT(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%d", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_INT32(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%d", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_int32)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_INT64(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%lld", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_int64)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_UINT(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%ud", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_UINT32(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%ud", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_UINT64(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=%ulld", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_HEX(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=0x%x", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_HEX32(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=0x%x", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint32)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_HEX64(integer) \
|
|
+ ci_log("%s:%d:%s] " #integer "=0x%llx", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, ((ci_uint64)integer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_PTR(pointer) \
|
|
+ ci_log("%s:%d:%s] " #pointer "=0x%p", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (pointer))
|
|
+
|
|
+
|
|
+#define CI_TRACE_STRING(string) \
|
|
+ ci_log("%s:%d:%s] " #string "=%s", \
|
|
+ __FILE__, __LINE__, __FUNCTION__, (string))
|
|
+
|
|
+
|
|
+#define CI_TRACE_MAC(mac) \
|
|
+ ci_log("%s:%d:%s] " #mac "=" CI_MAC_PRINTF_FORMAT, \
|
|
+ __FILE__, __LINE__, __FUNCTION__, CI_MAC_PRINTF_ARGS(mac))
|
|
+
|
|
+
|
|
+#define CI_TRACE_IP(ip_be32) \
|
|
+ ci_log("%s:%d:%s] " #ip_be32 "=" CI_IP_PRINTF_FORMAT, __FILE__, \
|
|
+ __LINE__, __FUNCTION__, CI_IP_PRINTF_ARGS(&(ip_be32)))
|
|
+
|
|
+
|
|
+#define CI_TRACE_ARP(arp_pkt) \
|
|
+ ci_log("%s:%d:%s]\n"CI_ARP_PRINTF_FORMAT, \
|
|
+ __FILE__, __LINE__, __FUNCTION__, CI_ARP_PRINTF_ARGS(arp_pkt))
|
|
+
|
|
+#endif /* NDEBUG */
|
|
+
|
|
+#define ci_check(exp) \
|
|
+ _ci_check(exp, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert(exp) \
|
|
+ _ci_assert(exp, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_verify(exp) \
|
|
+ _ci_verify(exp, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_equal(exp1, exp2) \
|
|
+ _ci_assert_equal(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_equal_msg(exp1, exp2, msg) \
|
|
+ _ci_assert_equal_msg(exp1, exp2, msg, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_nequal(exp1, exp2) \
|
|
+ _ci_assert_nequal(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_le(exp1, exp2) \
|
|
+ _ci_assert_le(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_lt(exp1, exp2) \
|
|
+ _ci_assert_lt(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_ge(exp1, exp2) \
|
|
+ _ci_assert_ge(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_gt(exp1, exp2) \
|
|
+ _ci_assert_gt(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_impl(exp1, exp2) \
|
|
+ _ci_assert_impl(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+#define ci_assert_equiv(exp1, exp2) \
|
|
+ _ci_assert_equiv(exp1, exp2, __FILE__, __LINE__)
|
|
+
|
|
+
|
|
+#define CI_TEST(exp) \
|
|
+ do{ \
|
|
+ if( CI_UNLIKELY(!(exp)) ) \
|
|
+ ci_fail(("CI_TEST(%s)", #exp)); \
|
|
+ }while(0)
|
|
+
|
|
+
|
|
+#define CI_TRY(exp) \
|
|
+ do{ \
|
|
+ int _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(_trc < 0) ) \
|
|
+ ci_sys_fail(#exp, _trc); \
|
|
+ }while(0)
|
|
+
|
|
+
|
|
+#define CI_TRY_RET(exp) \
|
|
+ do{ \
|
|
+ int _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(_trc < 0) ) { \
|
|
+ ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__); \
|
|
+ return _trc; \
|
|
+ } \
|
|
+ }while(0)
|
|
+
|
|
+#define CI_LOGLEVEL_TRY_RET(logfn, exp) \
|
|
+ do{ \
|
|
+ int _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(_trc < 0) ) { \
|
|
+ logfn (ci_log("%s returned %d at %s:%d", #exp, _trc, __FILE__, __LINE__)); \
|
|
+ return _trc; \
|
|
+ } \
|
|
+ }while(0)
|
|
+
|
|
+
|
|
+#define CI_SOCK_TRY(exp) \
|
|
+ do{ \
|
|
+ ci_sock_err_t _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) \
|
|
+ ci_sys_fail(#exp, _trc.val); \
|
|
+ }while(0)
|
|
+
|
|
+
|
|
+#define CI_SOCK_TRY_RET(exp) \
|
|
+ do{ \
|
|
+ ci_sock_err_t _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
|
|
+ ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
|
|
+ return ci_sock_errcode(_trc); \
|
|
+ } \
|
|
+ }while(0)
|
|
+
|
|
+
|
|
+#define CI_SOCK_TRY_SOCK_RET(exp) \
|
|
+ do{ \
|
|
+ ci_sock_err_t _trc; \
|
|
+ _trc=(exp); \
|
|
+ if( CI_UNLIKELY(!ci_sock_errok(_trc)) ) { \
|
|
+ ci_log("%s returned %d at %s:%d", #exp, _trc.val, __FILE__, __LINE__); \
|
|
+ return _trc; \
|
|
+ } \
|
|
+ }while(0)
|
|
+
|
|
+#endif /* __CI_TOOLS_DEBUG_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/log.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,269 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Functions for logging and pretty-printing.
|
|
+ * \date 2002/08/07
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_tools */
|
|
+
|
|
+#ifndef __CI_TOOLS_LOG_H__
|
|
+#define __CI_TOOLS_LOG_H__
|
|
+
|
|
+#include <stdarg.h>
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Logging.
|
|
+ */
|
|
+
|
|
+/* size of internal log buffer */
|
|
+#define CI_LOG_MAX_LINE 512
|
|
+/* uses of ci_log must ensure that all trace messages are shorter than this */
|
|
+#define CI_LOG_MAX_MSG_LENGTH (CI_LOG_MAX_LINE-50)
|
|
+
|
|
+extern void ci_vlog(const char* fmt, va_list args) CI_HF;
|
|
+extern void ci_log(const char* fmt, ...) CI_PRINTF_LIKE(1,2) CI_HF;
|
|
+
|
|
+ /*! Set the prefix for log messages.
|
|
+ **
|
|
+ ** Uses the storage pointed to by \em prefix. Therefore \em prefix must
|
|
+ ** be allocated on the heap, or statically.
|
|
+ */
|
|
+extern void ci_set_log_prefix(const char* prefix) CI_HF;
|
|
+
|
|
+typedef void (*ci_log_fn_t)(const char* msg);
|
|
+extern ci_log_fn_t ci_log_fn CI_HV;
|
|
+
|
|
+/* Log functions. */
|
|
+extern void ci_log_null(const char* msg) CI_HF;
|
|
+extern void ci_log_stderr(const char* msg) CI_HF;
|
|
+extern void ci_log_stdout(const char* msg) CI_HF;
|
|
+extern void ci_log_syslog(const char* msg) CI_HF;
|
|
+
|
|
+/*! Call the following to install special logging behaviours. */
|
|
+extern void ci_log_buffer_till_fail(void) CI_HF;
|
|
+extern void ci_log_buffer_till_exit(void) CI_HF;
|
|
+
|
|
+extern void __ci_log_unique(const char* msg) CI_HF;
|
|
+extern ci_log_fn_t __ci_log_unique_fn CI_HV;
|
|
+ci_inline void ci_log_uniquify(void) {
|
|
+ if( ci_log_fn != __ci_log_unique ) {
|
|
+ __ci_log_unique_fn = ci_log_fn;
|
|
+ ci_log_fn = __ci_log_unique;
|
|
+ }
|
|
+}
|
|
+
|
|
+extern void ci_log_file(const char* msg) CI_HF;
|
|
+extern int ci_log_file_fd CI_HV;
|
|
+
|
|
+extern void __ci_log_nth(const char* msg) CI_HF;
|
|
+extern ci_log_fn_t __ci_log_nth_fn CI_HV;
|
|
+extern int ci_log_nth_n CI_HV; /* default 100 */
|
|
+ci_inline void ci_log_nth(void) {
|
|
+ if( ci_log_fn != __ci_log_nth ) {
|
|
+ __ci_log_nth_fn = ci_log_fn;
|
|
+ ci_log_fn = __ci_log_nth;
|
|
+ }
|
|
+}
|
|
+
|
|
+extern int ci_log_level CI_HV;
|
|
+
|
|
+extern int ci_log_options CI_HV;
|
|
+#define CI_LOG_PID 0x1
|
|
+#define CI_LOG_TID 0x2
|
|
+#define CI_LOG_TIME 0x4
|
|
+#define CI_LOG_DELTA 0x8
|
|
+
|
|
+/**********************************************************************
|
|
+ * Used to define which mode we are in
|
|
+ */
|
|
+#if (defined(_WIN32) && !defined(__KERNEL__))
|
|
+typedef enum {
|
|
+ ci_log_md_NULL=0,
|
|
+ ci_log_md_ioctl,
|
|
+ ci_log_md_stderr,
|
|
+ ci_log_md_stdout,
|
|
+ ci_log_md_file,
|
|
+ ci_log_md_serial,
|
|
+ ci_log_md_syslog,
|
|
+ ci_log_md_pidfile
|
|
+} ci_log_mode_t;
|
|
+extern ci_log_mode_t ci_log_mode;
|
|
+#endif
|
|
+
|
|
+/**********************************************************************
|
|
+ * Pretty-printing.
|
|
+ */
|
|
+
|
|
+extern char ci_printable_char(char c) CI_HF;
|
|
+
|
|
+extern void (*ci_hex_dump_formatter)(char* buf, const ci_octet* s,
|
|
+ int i, int off, int len) CI_HV;
|
|
+extern void ci_hex_dump_format_octets(char*,const ci_octet*,int,int,int) CI_HF;
|
|
+extern void ci_hex_dump_format_dwords(char*,const ci_octet*,int,int,int) CI_HF;
|
|
+
|
|
+extern void ci_hex_dump_row(char* buf, volatile const void* s, int len,
|
|
+ ci_ptr_arith_t address) CI_HF;
|
|
+ /*!< A row contains up to 16 bytes. Row starts at [address & 15u], so
|
|
+ ** therefore [len + (address & 15u)] must be <= 16.
|
|
+ */
|
|
+
|
|
+extern void ci_hex_dump(ci_log_fn_t, volatile const void*,
|
|
+ int len, ci_ptr_arith_t address) CI_HF;
|
|
+
|
|
+extern int ci_hex_dump_to_raw(const char* src_hex, void* buf,
|
|
+ unsigned* addr_out_opt, int* skip) CI_HF;
|
|
+ /*!< Recovers raw data from a single line of a hex dump. [buf] must be at
|
|
+ ** least 16 bytes long. Returns the number of bytes written to [buf] (in
|
|
+ ** range 1 -> 16), or -1 if [src_hex] doesn't contain hex data. Does not
|
|
+ ** cope with missing bytes at the start of a line.
|
|
+ */
|
|
+
|
|
+extern int ci_format_eth_addr(char* buf, const void* eth_mac_addr,
|
|
+ char sep) CI_HF;
|
|
+ /*!< This will write 18 characters to <buf> including terminating null.
|
|
+ ** Returns number of bytes written excluding null. If [sep] is zero, ':'
|
|
+ ** is used.
|
|
+ */
|
|
+
|
|
+extern int ci_parse_eth_addr(void* eth_mac_addr,
|
|
+ const char* str, char sep) CI_HF;
|
|
+ /*!< If [sep] is zero, absolutely any separator is accepted (even
|
|
+ ** inconsistent separators). Returns 0 on success, -1 on error.
|
|
+ */
|
|
+
|
|
+extern int ci_format_ip4_addr(char* buf, unsigned addr_be32) CI_HF;
|
|
+ /*!< Formats the IP address (in network endian) in dotted-quad. Returns
|
|
+ ** the number of bytes written (up to 15), excluding the null. [buf]
|
|
+ ** must be at least 16 bytes long.
|
|
+ */
|
|
+
|
|
+#if defined(__unix__) && ! defined(__KERNEL__)
|
|
+extern int ci_format_select_set(char* s, int len_s, int nfds, const fd_set*);
|
|
+extern int ci_format_select(char* s, int len_s,
|
|
+ int nfds, const fd_set* rds, const fd_set* wrs,
|
|
+ const fd_set* exs, struct timeval* timeout);
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Error checking.
|
|
+ */
|
|
+
|
|
+extern void (*ci_fail_stop_fn)(void) CI_HV;
|
|
+
|
|
+extern void ci_fail_stop(void) CI_HF;
|
|
+extern void ci_fail_hang(void) CI_HF;
|
|
+extern void ci_fail_bomb(void) CI_HF;
|
|
+extern void ci_backtrace(void) CI_HF;
|
|
+
|
|
+#if defined __linux__ && !defined __KERNEL__
|
|
+extern void ci_fail_abort (void) CI_HF;
|
|
+#endif
|
|
+
|
|
+#ifdef __GNUC__
|
|
+extern void
|
|
+__ci_fail(const char*, ...) CI_PRINTF_LIKE(1,2) CI_HF;
|
|
+#else
|
|
+# if _PREFAST_
|
|
+ extern void _declspec(noreturn) __ci_fail(const char* fmt, ...);
|
|
+# else
|
|
+ extern void __ci_fail(const char* fmt, ...);
|
|
+# endif
|
|
+
|
|
+#endif
|
|
+
|
|
+#define ci_warn(x) \
|
|
+ do{ ci_log("WARN at %s:%d", __FILE__, __LINE__); }while(0)
|
|
+
|
|
+#define ci_fail(x) \
|
|
+ do{ ci_log("FAIL at %s:%d", __FILE__, __LINE__); __ci_fail x; }while(0)
|
|
+
|
|
+extern void __ci_sys_fail(const char* fn, int rc,
|
|
+ const char* file, int line) CI_HF;
|
|
+#define ci_sys_fail(fn, rc) __ci_sys_fail(fn, rc, __FILE__, __LINE__)
|
|
+
|
|
+/**********************************************************************
|
|
+ * Logging to buffer (src/citools/log_buffer.c)
|
|
+ */
|
|
+
|
|
+/*! Divert ci_log() messages to the log buffer
|
|
+ * normally they go to the system console */
|
|
+extern void ci_log_buffer_till_fail(void) CI_HF;
|
|
+
|
|
+/*! Dump the contents of the log buffer to the system console */
|
|
+extern void ci_log_buffer_dump(void) CI_HF;
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Some useful pretty-printing.
|
|
+ */
|
|
+
|
|
+#ifdef __linux__
|
|
+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s%s%s"
|
|
+
|
|
+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
|
|
+ (((x) & MSG_OOB ) ? "OOB " :""), \
|
|
+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
|
|
+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
|
|
+ (((x) & MSG_EOR ) ? "EOR " :""), \
|
|
+ (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
|
|
+ (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
|
|
+ (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
|
|
+ (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
|
|
+ (((x) & MSG_NOSIGNAL ) ? "NOSIGNAL " :""), \
|
|
+ (((x) & MSG_ERRQUEUE ) ? "ERRQUEUE " :""), \
|
|
+ (((x) & MSG_CONFIRM ) ? "CONFIRM " :"")
|
|
+#endif
|
|
+
|
|
+#ifdef _WIN32
|
|
+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s"
|
|
+
|
|
+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
|
|
+ (((x) & MSG_OOB ) ? "OOB " :""), \
|
|
+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
|
|
+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :"")
|
|
+#endif
|
|
+
|
|
+#ifdef __sun__
|
|
+# define CI_SOCKCALL_FLAGS_FMT "%s%s%s%s%s%s%s%s%s"
|
|
+
|
|
+# define CI_SOCKCALL_FLAGS_PRI_ARG(x) \
|
|
+ (((x) & MSG_OOB ) ? "OOB " :""), \
|
|
+ (((x) & MSG_PEEK ) ? "PEEK " :""), \
|
|
+ (((x) & MSG_DONTROUTE ) ? "DONTROUTE " :""), \
|
|
+ (((x) & MSG_EOR ) ? "EOR " :""), \
|
|
+ (((x) & MSG_CTRUNC ) ? "CTRUNC " :""), \
|
|
+ (((x) & MSG_TRUNC ) ? "TRUNC " :""), \
|
|
+ (((x) & MSG_WAITALL ) ? "WAITALL " :""), \
|
|
+ (((x) & MSG_DONTWAIT ) ? "DONTWAIT " :""), \
|
|
+ (((x) & MSG_NOTIFICATION) ? "NOTIFICATION" :"")
|
|
+#endif
|
|
+
|
|
+#endif /* __CI_TOOLS_LOG_H__ */
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/platform/gcc_x86.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,370 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_tools_platform */
|
|
+
|
|
+#ifndef __CI_TOOLS_GCC_X86_H__
|
|
+#define __CI_TOOLS_GCC_X86_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Free-running cycle counters.
|
|
+ */
|
|
+
|
|
+#define CI_HAVE_FRC64
|
|
+#define CI_HAVE_FRC32
|
|
+
|
|
+#define ci_frc32(pval) __asm__ __volatile__("rdtsc" : "=a" (*pval) : : "edx")
|
|
+
|
|
+#if defined(__x86_64__)
|
|
+ci_inline void ci_frc64(ci_uint64* pval) {
|
|
+ /* temp fix until we figure how to get this out in one bite */
|
|
+ ci_uint64 low, high;
|
|
+ __asm__ __volatile__("rdtsc" : "=a" (low) , "=d" (high));
|
|
+ *pval = (high << 32) | low;
|
|
+}
|
|
+
|
|
+#else
|
|
+#define ci_frc64(pval) __asm__ __volatile__("rdtsc" : "=A" (*pval))
|
|
+#endif
|
|
+
|
|
+#define ci_frc_flush() /* ?? Need a pipeline barrier. */
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Atomic integer.
|
|
+ */
|
|
+
|
|
+/*
|
|
+** int ci_atomic_read(a) { return a->n; }
|
|
+** void ci_atomic_set(a, v) { a->n = v; }
|
|
+** void ci_atomic_inc(a) { ++a->n; }
|
|
+** void ci_atomic_dec(a) { --a->n; }
|
|
+** int ci_atomic_inc_and_test(a) { return ++a->n == 0; }
|
|
+** int ci_atomic_dec_and_test(a) { return --a->n == 0; }
|
|
+** void ci_atomic_and(a, v) { a->n &= v; }
|
|
+** void ci_atomic_or(a, v) { a->n |= v; }
|
|
+*/
|
|
+
|
|
+typedef struct { volatile ci_int32 n; } ci_atomic_t;
|
|
+
|
|
+#define CI_ATOMIC_INITIALISER(i) {(i)}
|
|
+
|
|
+static inline ci_int32 ci_atomic_read(const ci_atomic_t* a) { return a->n; }
|
|
+static inline void ci_atomic_set(ci_atomic_t* a, int v) { a->n = v; ci_wmb(); }
|
|
+
|
|
+static inline void ci_atomic_inc(ci_atomic_t* a)
|
|
+{ __asm__ __volatile__("lock; incl %0" : "+m" (a->n)); }
|
|
+
|
|
+
|
|
+static inline void ci_atomic_dec(ci_atomic_t* a)
|
|
+{ __asm__ __volatile__("lock; decl %0" : "+m" (a->n)); }
|
|
+
|
|
+static inline int ci_atomic_inc_and_test(ci_atomic_t* a) {
|
|
+ char r;
|
|
+ __asm__ __volatile__("lock; incl %0; sete %1"
|
|
+ : "+m" (a->n), "=qm" (r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+static inline int ci_atomic_dec_and_test(ci_atomic_t* a) {
|
|
+ char r;
|
|
+ __asm__ __volatile__("lock; decl %0; sete %1"
|
|
+ : "+m" (a->n), "=qm" (r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+ci_inline int
|
|
+ci_atomic_xadd (ci_atomic_t *a, int v) {
|
|
+ __asm__ ("lock xadd %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
|
|
+ return v;
|
|
+}
|
|
+ci_inline int
|
|
+ci_atomic_xchg (ci_atomic_t *a, int v) {
|
|
+ __asm__ ("lock xchg %0, %1" : "=r" (v), "+m" (a->n) : "0" (v));
|
|
+ return v;
|
|
+}
|
|
+
|
|
+ci_inline void ci_atomic32_or(volatile ci_uint32* p, ci_uint32 mask)
|
|
+{ __asm__ __volatile__("lock; orl %1, %0" : "+m" (*p) : "ir" (mask)); }
|
|
+
|
|
+ci_inline void ci_atomic32_and(volatile ci_uint32* p, ci_uint32 mask)
|
|
+{ __asm__ __volatile__("lock; andl %1, %0" : "+m" (*p) : "ir" (mask)); }
|
|
+
|
|
+ci_inline void ci_atomic32_add(volatile ci_uint32* p, ci_uint32 v)
|
|
+{ __asm__ __volatile__("lock; addl %1, %0" : "+m" (*p) : "ir" (v)); }
|
|
+
|
|
+ci_inline void ci_atomic32_inc(volatile ci_uint32* p)
|
|
+{ __asm__ __volatile__("lock; incl %0" : "+m" (*p)); }
|
|
+
|
|
+ci_inline int ci_atomic32_dec_and_test(volatile ci_uint32* p) {
|
|
+ char r;
|
|
+ __asm__ __volatile__("lock; decl %0; sete %1" : "+m" (*p), "=qm" (r));
|
|
+ return r;
|
|
+}
|
|
+
|
|
+#define ci_atomic_or(a, v) ci_atomic32_or ((ci_uint32*) &(a)->n, (v))
|
|
+#define ci_atomic_and(a, v) ci_atomic32_and((ci_uint32*) &(a)->n, (v))
|
|
+#define ci_atomic_add(a, v) ci_atomic32_add((ci_uint32*) &(a)->n, (v))
|
|
+
|
|
+extern int ci_glibc_uses_nptl (void) CI_HF;
|
|
+extern int ci_glibc_nptl_broken(void) CI_HF;
|
|
+extern int ci_glibc_gs_get_is_multihreaded_offset (void) CI_HF;
|
|
+extern int ci_glibc_gs_is_multihreaded_offset CI_HV;
|
|
+
|
|
+#if !defined(__x86_64__)
|
|
+#ifdef __GLIBC__
|
|
+/* Returns non-zero if the calling process might be mulithreaded, returns 0 if
|
|
+ * it definitely isn't (i.e. if reimplementing this function for other
|
|
+ * architectures and platforms, you can safely just return 1).
|
|
+ */
|
|
+static inline int ci_is_multithreaded (void) {
|
|
+
|
|
+ while (1) {
|
|
+ if (ci_glibc_gs_is_multihreaded_offset >= 0) {
|
|
+ /* NPTL keeps a variable that tells us this hanging off gs (i.e. in thread-
|
|
+ * local storage); just return this
|
|
+ */
|
|
+ int r;
|
|
+ __asm__ __volatile__ ("movl %%gs:(%1), %0"
|
|
+ : "=r" (r)
|
|
+ : "r" (ci_glibc_gs_is_multihreaded_offset));
|
|
+ return r;
|
|
+ }
|
|
+
|
|
+ if (ci_glibc_gs_is_multihreaded_offset == -2) {
|
|
+ /* This means we've already determined that the libc version is NOT good
|
|
+ * for our funky "is multithreaded" hack
|
|
+ */
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ /* If we get here, it means this is the first time the function has been
|
|
+ * called -- detect the libc version and go around again.
|
|
+ */
|
|
+ ci_glibc_gs_is_multihreaded_offset = ci_glibc_gs_get_is_multihreaded_offset ();
|
|
+
|
|
+ /* Go around again. We do the test here rather than at the top so that we go
|
|
+ * quicker in the common the case
|
|
+ */
|
|
+ }
|
|
+}
|
|
+
|
|
+#else /* def __GLIBC__ */
|
|
+
|
|
+#define ci_is_multithreaded() 1 /* ?? Is the the POSIX way of finding out */
|
|
+ /* whether the appication is single */
|
|
+ /* threaded? */
|
|
+
|
|
+#endif /* def __GLIBC__ */
|
|
+
|
|
+#else /* defined __x86_64__ */
|
|
+
|
|
+static inline int ci_is_multithreaded (void) {
|
|
+ /* Now easy way to tell on x86_64; so assume we're multithreaded */
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+#endif /* defined __x86_64__ */
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Compare and swap.
|
|
+ */
|
|
+
|
|
+#define CI_HAVE_COMPARE_AND_SWAP
|
|
+
|
|
+ci_inline int ci_cas32_succeed(volatile ci_int32* p, ci_int32 oldval,
|
|
+ ci_int32 newval) {
|
|
+ char ret;
|
|
+ ci_int32 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+ci_inline int ci_cas32_fail(volatile ci_int32* p, ci_int32 oldval,
|
|
+ ci_int32 newval) {
|
|
+ char ret;
|
|
+ ci_int32 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#ifdef __x86_64__
|
|
+ci_inline int ci_cas64_succeed(volatile ci_int64* p, ci_int64 oldval,
|
|
+ ci_int64 newval) {
|
|
+ char ret;
|
|
+ ci_int64 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+ci_inline int ci_cas64_fail(volatile ci_int64* p, ci_int64 oldval,
|
|
+ ci_int64 newval) {
|
|
+ char ret;
|
|
+ ci_int64 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+#endif
|
|
+
|
|
+ci_inline int ci_cas32u_succeed(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
|
|
+ char ret;
|
|
+ ci_uint32 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgl %3, %1; sete %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+ci_inline int ci_cas32u_fail(volatile ci_uint32* p, ci_uint32 oldval, ci_uint32 newval) {
|
|
+ char ret;
|
|
+ ci_uint32 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgl %3, %1; setne %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+ci_inline int ci_cas64u_succeed(volatile ci_uint64* p, ci_uint64 oldval,
|
|
+ ci_uint64 newval) {
|
|
+ char ret;
|
|
+ ci_uint64 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgq %3, %1; sete %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+ci_inline int ci_cas64u_fail(volatile ci_uint64* p, ci_uint64 oldval,
|
|
+ ci_uint64 newval) {
|
|
+ char ret;
|
|
+ ci_uint64 prevval;
|
|
+ __asm__ __volatile__("lock; cmpxchgq %3, %1; setne %0"
|
|
+ : "=q"(ret), "+m"(*p), "=a"(prevval)
|
|
+ : "r"(newval), "a"(oldval));
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+#ifdef __x86_64__
|
|
+
|
|
+# define ci_cas_uintptr_succeed(p,o,n) \
|
|
+ ci_cas64u_succeed((volatile ci_uint64*) (p), (o), (n))
|
|
+# define ci_cas_uintptr_fail(p,o,n) \
|
|
+ ci_cas64u_fail((volatile ci_uint64*) (p), (o), (n))
|
|
+
|
|
+#else
|
|
+
|
|
+# define ci_cas_uintptr_succeed(p,o,n) \
|
|
+ ci_cas32u_succeed((volatile ci_uint32*) (p), (o), (n))
|
|
+# define ci_cas_uintptr_fail(p,o,n) \
|
|
+ ci_cas32u_fail((volatile ci_uint32*) (p), (o), (n))
|
|
+
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Atomic bit field.
|
|
+ */
|
|
+
|
|
+typedef ci_uint32 ci_bits;
|
|
+#define CI_BITS_N 32u
|
|
+
|
|
+#define CI_BITS_DECLARE(name, n) \
|
|
+ ci_bits name[((n) + CI_BITS_N - 1u) / CI_BITS_N]
|
|
+
|
|
+ci_inline void ci_bits_clear_all(volatile ci_bits* b, int n_bits)
|
|
+{ memset((void*) b, 0, (n_bits+CI_BITS_N-1u) / CI_BITS_N * sizeof(ci_bits)); }
|
|
+
|
|
+ci_inline void ci_bit_set(volatile ci_bits* b, int i) {
|
|
+ __asm__ __volatile__("lock; btsl %1, %0"
|
|
+ : "=m" (*b)
|
|
+ : "Ir" (i));
|
|
+}
|
|
+
|
|
+ci_inline void ci_bit_clear(volatile ci_bits* b, int i) {
|
|
+ __asm__ __volatile__("lock; btrl %1, %0"
|
|
+ : "=m" (*b)
|
|
+ : "Ir" (i));
|
|
+}
|
|
+
|
|
+ci_inline int ci_bit_test(volatile ci_bits* b, int i) {
|
|
+ char rc;
|
|
+ __asm__("btl %2, %1; setc %0"
|
|
+ : "=r" (rc)
|
|
+ : "m" (*b), "Ir" (i));
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+ci_inline int ci_bit_test_and_set(volatile ci_bits* b, int i) {
|
|
+ char rc;
|
|
+ __asm__ __volatile__("lock; btsl %2, %1; setc %0"
|
|
+ : "=r" (rc), "+m" (*b)
|
|
+ : "Ir" (i));
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+ci_inline int ci_bit_test_and_clear(volatile ci_bits* b, int i) {
|
|
+ char rc;
|
|
+ __asm__ __volatile__("lock; btrl %2, %1; setc %0"
|
|
+ : "=r" (rc), "+m" (*b)
|
|
+ : "Ir" (i));
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/* These mask ops only work within a single ci_bits word. */
|
|
+#define ci_bit_mask_set(b,m) ci_atomic32_or((b), (m))
|
|
+#define ci_bit_mask_clear(b,m) ci_atomic32_and((b), ~(m))
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Misc.
|
|
+ */
|
|
+
|
|
+#if __GNUC__ >= 3
|
|
+# define ci_spinloop_pause() __asm__("pause")
|
|
+#else
|
|
+# define ci_spinloop_pause() __asm__(".byte 0xf3, 0x90")
|
|
+#endif
|
|
+
|
|
+
|
|
+#define CI_HAVE_ADDC32
|
|
+#define ci_add_carry32(sum, v) __asm__("addl %1, %0 ;" \
|
|
+ "adcl $0, %0 ;" \
|
|
+ : "=r" (sum) \
|
|
+ : "g" ((ci_uint32) v), "0" (sum))
|
|
+
|
|
+
|
|
+#endif /* __CI_TOOLS_GCC_X86_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,362 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+
|
|
+/*! \cidoxg_include_ci_tools_platform */
|
|
+
|
|
+#ifndef __CI_TOOLS_LINUX_KERNEL_H__
|
|
+#define __CI_TOOLS_LINUX_KERNEL_H__
|
|
+
|
|
+/**********************************************************************
|
|
+ * Need to know the kernel version.
|
|
+ */
|
|
+
|
|
+#ifndef LINUX_VERSION_CODE
|
|
+# include <linux/version.h>
|
|
+# ifndef UTS_RELEASE
|
|
+ /* 2.6.18 onwards defines UTS_RELEASE in a separate header */
|
|
+# include <linux/utsrelease.h>
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) || \
|
|
+ LINUX_VERSION_CODE >= KERNEL_VERSION(2,7,0)
|
|
+# error "Linux 2.6 required"
|
|
+#endif
|
|
+
|
|
+
|
|
+#include <linux/slab.h> /* kmalloc / kfree */
|
|
+#include <linux/vmalloc.h> /* vmalloc / vfree */
|
|
+#include <linux/interrupt.h>/* in_interrupt() */
|
|
+#include <linux/in.h>
|
|
+#include <linux/in6.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/highmem.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/ctype.h>
|
|
+#include <linux/uio.h>
|
|
+#include <asm/current.h>
|
|
+#include <asm/errno.h>
|
|
+#include <asm/kmap_types.h>
|
|
+#include <asm/semaphore.h>
|
|
+
|
|
+#include <ci/tools/config.h>
|
|
+
|
|
+#define ci_in_irq in_irq
|
|
+#define ci_in_interrupt in_interrupt
|
|
+#define ci_in_atomic in_atomic
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Misc stuff.
|
|
+ */
|
|
+
|
|
+#ifdef BUG
|
|
+# define CI_BOMB BUG
|
|
+#endif
|
|
+
|
|
+ci_inline void* __ci_alloc(size_t n)
|
|
+{ return kmalloc(n, (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)); }
|
|
+
|
|
+ci_inline void* __ci_atomic_alloc(size_t n)
|
|
+{ return kmalloc(n, GFP_ATOMIC ); }
|
|
+
|
|
+ci_inline void __ci_free(void* p) { return kfree(p); }
|
|
+ci_inline void* __ci_vmalloc(size_t n) { return vmalloc(n); }
|
|
+ci_inline void __ci_vfree(void* p) { return vfree(p); }
|
|
+
|
|
+
|
|
+#if CI_MEMLEAK_DEBUG_ALLOC_TABLE
|
|
+ #define ci_alloc(s) ci_alloc_memleak_debug (s, __FILE__, __LINE__)
|
|
+ #define ci_atomic_alloc(s) ci_atomic_alloc_memleak_debug(s, __FILE__, __LINE__)
|
|
+ #define ci_free ci_free_memleak_debug
|
|
+ #define ci_vmalloc(s) ci_vmalloc_memleak_debug (s, __FILE__,__LINE__)
|
|
+ #define ci_vfree ci_vfree_memleak_debug
|
|
+ #define ci_alloc_fn ci_alloc_fn_memleak_debug
|
|
+ #define ci_vmalloc_fn ci_vmalloc_fn_memleak_debug
|
|
+#else /* !CI_MEMLEAK_DEBUG_ALLOC_TABLE */
|
|
+ #define ci_alloc_fn __ci_alloc
|
|
+ #define ci_vmalloc_fn __ci_vmalloc
|
|
+#endif
|
|
+
|
|
+#ifndef ci_alloc
|
|
+ #define ci_atomic_alloc __ci_atomic_alloc
|
|
+ #define ci_alloc __ci_alloc
|
|
+ #define ci_free __ci_free
|
|
+ #define ci_vmalloc __ci_vmalloc
|
|
+ #define ci_vmalloc_fn __ci_vmalloc
|
|
+ #define ci_vfree __ci_vfree
|
|
+#endif
|
|
+
|
|
+#define ci_sprintf sprintf
|
|
+#define ci_vsprintf vsprintf
|
|
+#define ci_snprintf snprintf
|
|
+#define ci_vsnprintf vsnprintf
|
|
+#define ci_sscanf sscanf
|
|
+
|
|
+
|
|
+#define CI_LOG_FN_DEFAULT ci_log_syslog
|
|
+
|
|
+
|
|
+/*--------------------------------------------------------------------
|
|
+ *
|
|
+ * irqs_disabled - needed for kmap helpers on some kernels
|
|
+ *
|
|
+ *--------------------------------------------------------------------*/
|
|
+#ifdef irqs_disabled
|
|
+# define ci_irqs_disabled irqs_disabled
|
|
+#else
|
|
+# if defined(__i386__) | defined(__x86_64__)
|
|
+# define ci_irqs_disabled(x) \
|
|
+ ({ \
|
|
+ unsigned long flags; \
|
|
+ local_save_flags(flags); \
|
|
+ !(flags & (1<<9)); \
|
|
+ })
|
|
+# else
|
|
+# error "Need to implement irqs_disabled() for your architecture"
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * kmap helpers.
|
|
+ *
|
|
+ * Use ci_k(un)map for code paths which are not in an atomic context.
|
|
+ * For atomic code you need to use ci_k(un)map_in_atomic. This will grab
|
|
+ * one of the per-CPU kmap slots.
|
|
+ *
|
|
+ * NB in_interrupt != in_irq. If you don't know the difference then
|
|
+ * don't use kmap_in_atomic
|
|
+ *
|
|
+ * 2.4 allocates kmap slots by function. We are going to re-use the
|
|
+ * skb module's slot - we also use the same interlock
|
|
+ *
|
|
+ * 2.6 allocates kmap slots by type as well as by function. We are
|
|
+ * going to use the currently (2.6.10) unsused SOFTIRQ slot
|
|
+ *
|
|
+ */
|
|
+
|
|
+ci_inline void* ci_kmap(struct page *page) {
|
|
+ CI_DEBUG(if( ci_in_atomic() | ci_in_interrupt() | ci_in_irq() ) BUG());
|
|
+ return kmap(page);
|
|
+}
|
|
+
|
|
+ci_inline void ci_kunmap(struct page *page) {
|
|
+ kunmap(page);
|
|
+}
|
|
+
|
|
+#define CI_KM_SLOT KM_SOFTIRQ0
|
|
+
|
|
+
|
|
+typedef struct semaphore ci_semaphore_t;
|
|
+
|
|
+ci_inline void
|
|
+ci_sem_init (ci_semaphore_t *sem, int val) {
|
|
+ sema_init (sem, val);
|
|
+}
|
|
+
|
|
+ci_inline void
|
|
+ci_sem_down (ci_semaphore_t *sem) {
|
|
+ down (sem);
|
|
+}
|
|
+
|
|
+ci_inline int
|
|
+ci_sem_trydown (ci_semaphore_t *sem) {
|
|
+ return down_trylock (sem);
|
|
+}
|
|
+
|
|
+ci_inline void
|
|
+ci_sem_up (ci_semaphore_t *sem) {
|
|
+ up (sem);
|
|
+}
|
|
+
|
|
+ci_inline int
|
|
+ci_sem_get_count(ci_semaphore_t *sem) {
|
|
+ return sem->count.counter;
|
|
+}
|
|
+
|
|
+ci_inline void* ci_kmap_in_atomic(struct page *page)
|
|
+{
|
|
+ CI_DEBUG(if( ci_in_irq() ) BUG());
|
|
+
|
|
+ /* iSCSI can call without in_interrupt() but with irqs_disabled()
|
|
+ and in a context that can't sleep, so we need to check that
|
|
+ too */
|
|
+ if(ci_in_interrupt() || ci_irqs_disabled())
|
|
+ return kmap_atomic(page, CI_KM_SLOT);
|
|
+ else
|
|
+ return kmap(page);
|
|
+}
|
|
+
|
|
+ci_inline void ci_kunmap_in_atomic(struct page *page, void* kaddr)
|
|
+{
|
|
+ CI_DEBUG(if( ci_in_irq() ) BUG());
|
|
+
|
|
+ /* iSCSI can call without in_interrupt() but with irqs_disabled()
|
|
+ and in a context that can't sleep, so we need to check that
|
|
+ too */
|
|
+ if(ci_in_interrupt() || ci_irqs_disabled())
|
|
+ kunmap_atomic(kaddr, CI_KM_SLOT);
|
|
+ else
|
|
+ kunmap(page);
|
|
+}
|
|
+
|
|
+/**********************************************************************
|
|
+ * spinlock implementation: used by <ci/tools/spinlock.h>
|
|
+ */
|
|
+
|
|
+#define CI_HAVE_SPINLOCKS
|
|
+
|
|
+typedef ci_uintptr_t ci_lock_holder_t;
|
|
+#define ci_lock_thisthread (ci_lock_holder_t)current
|
|
+#define ci_lock_no_holder (ci_lock_holder_t)NULL
|
|
+
|
|
+typedef spinlock_t ci_lock_i;
|
|
+typedef spinlock_t ci_irqlock_i;
|
|
+typedef unsigned long ci_irqlock_state_t;
|
|
+
|
|
+#define IRQLOCK_CYCLES 500000
|
|
+
|
|
+#define ci_lock_ctor_i(l) spin_lock_init(l)
|
|
+#define ci_lock_dtor_i(l) do{}while(0)
|
|
+#define ci_lock_lock_i(l) spin_lock(l)
|
|
+#define ci_lock_trylock_i(l) spin_trylock(l)
|
|
+#define ci_lock_unlock_i(l) spin_unlock(l)
|
|
+
|
|
+#define ci_irqlock_ctor_i(l) spin_lock_init(l)
|
|
+#define ci_irqlock_dtor_i(l) do{}while(0)
|
|
+#define ci_irqlock_lock_i(l,s) spin_lock_irqsave(l,*(s))
|
|
+#define ci_irqlock_unlock_i(l,s) spin_unlock_irqrestore(l, *(s))
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * register access
|
|
+ */
|
|
+
|
|
+#include <asm/io.h>
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9)
|
|
+typedef volatile void __iomem* ioaddr_t;
|
|
+#else
|
|
+typedef unsigned long ioaddr_t;
|
|
+#endif
|
|
+
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * thread implementation -- kernel dependancies probably should be
|
|
+ * moved to driver/linux_kernel.h
|
|
+ */
|
|
+
|
|
+#define ci_linux_daemonize(name) daemonize(name)
|
|
+
|
|
+#include <linux/workqueue.h>
|
|
+
|
|
+
|
|
+typedef struct {
|
|
+ void* (*fn)(void* arg);
|
|
+ void* arg;
|
|
+ const char* name;
|
|
+ int thrd_id;
|
|
+ struct completion exit_event;
|
|
+ struct work_struct keventd_witem;
|
|
+} ci_kernel_thread_t;
|
|
+
|
|
+
|
|
+typedef ci_kernel_thread_t* cithread_t;
|
|
+
|
|
+
|
|
+extern int cithread_create(cithread_t* tid, void* (*fn)(void*), void* arg,
|
|
+ const char* name);
|
|
+extern int cithread_detach(cithread_t kt);
|
|
+extern int cithread_join(cithread_t kt);
|
|
+
|
|
+
|
|
+/* Kernel sysctl variables. */
|
|
+extern int sysctl_tcp_wmem[3];
|
|
+extern int sysctl_tcp_rmem[3];
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
|
|
+#define LINUX_HAS_SYSCTL_MEM_MAX
|
|
+extern ci_uint32 sysctl_wmem_max;
|
|
+extern ci_uint32 sysctl_rmem_max;
|
|
+#endif
|
|
+
|
|
+
|
|
+/*--------------------------------------------------------------------
|
|
+ *
|
|
+ * ci_bigbuf_t: An abstraction of a large buffer. Needed because in the
|
|
+ * Linux kernel, large buffers need to be allocated with vmalloc(), whereas
|
|
+ * smaller buffers should use kmalloc(). This abstraction chooses the
|
|
+ * appropriate mechansim.
|
|
+ *
|
|
+ *--------------------------------------------------------------------*/
|
|
+
|
|
+typedef struct {
|
|
+ char* p;
|
|
+ int is_vmalloc;
|
|
+} ci_bigbuf_t;
|
|
+
|
|
+
|
|
+ci_inline int ci_bigbuf_alloc(ci_bigbuf_t* bb, size_t bytes) {
|
|
+ if( bytes >= CI_PAGE_SIZE && ! ci_in_atomic() ) {
|
|
+ bb->is_vmalloc = 1;
|
|
+ if( (bb->p = vmalloc(bytes)) ) return 0;
|
|
+ }
|
|
+ bb->is_vmalloc = 0;
|
|
+ bb->p = kmalloc(bytes, ci_in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
|
|
+ return bb->p ? 0 : -ENOMEM;
|
|
+}
|
|
+
|
|
+ci_inline void ci_bigbuf_free(ci_bigbuf_t* bb) {
|
|
+ if( bb->is_vmalloc ) vfree(bb->p);
|
|
+ else kfree(bb->p);
|
|
+}
|
|
+
|
|
+ci_inline char* ci_bigbuf_ptr(ci_bigbuf_t* bb)
|
|
+{ return bb->p; }
|
|
+
|
|
+/**********************************************************************
|
|
+ * struct iovec abstraction (for Windows port)
|
|
+ */
|
|
+
|
|
+typedef struct iovec ci_iovec;
|
|
+
|
|
+/* Accessors for buffer/length */
|
|
+#define CI_IOVEC_BASE(i) ((i)->iov_base)
|
|
+#define CI_IOVEC_LEN(i) ((i)->iov_len)
|
|
+
|
|
+/**********************************************************************
|
|
+ * Signals
|
|
+ */
|
|
+
|
|
+ci_inline void
|
|
+ci_send_sig(int signum)
|
|
+{
|
|
+ send_sig(signum, current, 0);
|
|
+}
|
|
+
|
|
+#endif /* __CI_TOOLS_LINUX_KERNEL_H__ */
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netback/ci/tools/sysdep.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,132 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_tools */
|
|
+
|
|
+#ifndef __CI_TOOLS_SYSDEP_H__
|
|
+#define __CI_TOOLS_SYSDEP_H__
|
|
+
|
|
+/* Make this header self-sufficient */
|
|
+#include <ci/compat.h>
|
|
+#include <ci/tools/log.h>
|
|
+#include <ci/tools/debug.h>
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Platform dependencies.
|
|
+ */
|
|
+
|
|
+#if defined(__KERNEL__)
|
|
+
|
|
+# if defined(__linux__)
|
|
+# include <ci/tools/platform/linux_kernel.h>
|
|
+# elif defined(_WIN32)
|
|
+# include <ci/tools/platform/win32_kernel.h>
|
|
+# elif defined(__sun__)
|
|
+# include <ci/tools/platform/sunos_kernel.h>
|
|
+# else
|
|
+# error Unknown platform.
|
|
+# endif
|
|
+
|
|
+#elif defined(_WIN32)
|
|
+
|
|
+# include <ci/tools/platform/win32.h>
|
|
+
|
|
+#elif defined(__unix__)
|
|
+
|
|
+# include <ci/tools/platform/unix.h>
|
|
+
|
|
+#else
|
|
+
|
|
+# error Unknown platform.
|
|
+
|
|
+#endif
|
|
+
|
|
+#if defined(__linux__)
|
|
+/*! Linux sendfile() support enable/disable. */
|
|
+# define CI_HAVE_SENDFILE /* provide sendfile i/f */
|
|
+
|
|
+# define CI_HAVE_OS_NOPAGE
|
|
+#endif
|
|
+
|
|
+#if defined(__sun__)
|
|
+# define CI_HAVE_SENDFILE /* provide sendfile i/f */
|
|
+# define CI_HAVE_SENDFILEV /* provide sendfilev i/f */
|
|
+
|
|
+# define CI_IOCTL_SENDFILE /* use efrm CI_SENDFILEV ioctl */
|
|
+#endif
|
|
+
|
|
+#if defined(_WIN32)
|
|
+typedef ci_uint32 ci_uerr_t; /* range of OS user-mode return codes */
|
|
+typedef ci_uint32 ci_kerr_t; /* range of OS kernel-mode return codes */
|
|
+#elif defined(__unix__)
|
|
+typedef ci_int32 ci_uerr_t; /* range of OS user-mode return codes */
|
|
+typedef ci_int32 ci_kerr_t; /* range of OS kernel-mode return codes */
|
|
+#endif
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Compiler and processor dependencies.
|
|
+ */
|
|
+
|
|
+#if defined(__GNUC__)
|
|
+
|
|
+#if defined(__i386__) || defined(__x86_64__)
|
|
+# include <ci/tools/platform/gcc_x86.h>
|
|
+#elif defined(__PPC__)
|
|
+# include <ci/tools/platform/gcc_ppc.h>
|
|
+#elif defined(__ia64__)
|
|
+# include <ci/tools/platform/gcc_ia64.h>
|
|
+#else
|
|
+# error Unknown processor.
|
|
+#endif
|
|
+
|
|
+#elif defined(_MSC_VER)
|
|
+
|
|
+#if defined(__i386__)
|
|
+# include <ci/tools/platform/msvc_x86.h>
|
|
+# elif defined(__x86_64__)
|
|
+# include <ci/tools/platform/msvc_x86_64.h>
|
|
+#else
|
|
+# error Unknown processor.
|
|
+#endif
|
|
+
|
|
+#elif defined(__PGI)
|
|
+
|
|
+# include <ci/tools/platform/pg_x86.h>
|
|
+
|
|
+#elif defined(__INTEL_COMPILER)
|
|
+
|
|
+/* Intel compilers v7 claim to be very gcc compatible. */
|
|
+# include <ci/tools/platform/gcc_x86.h>
|
|
+
|
|
+#else
|
|
+# error Unknown compiler.
|
|
+#endif
|
|
+
|
|
+
|
|
+#endif /* __CI_TOOLS_SYSDEP_H__ */
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/Makefile 2008-02-26 10:54:11.000000000 +0100
|
|
@@ -0,0 +1,11 @@
|
|
+EXTRA_CFLAGS += -Idrivers/xen/sfc_netfront -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront
|
|
+EXTRA_CFLAGS += -D__ci_driver__
|
|
+EXTRA_CFLAGS += -Werror
|
|
+
|
|
+ifdef GCOV
|
|
+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
|
|
+endif
|
|
+
|
|
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) := sfc_netfront.o
|
|
+
|
|
+sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o falcon_vi.o pt_tx.o vi_init.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,495 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETFRONT_ACCEL_H
|
|
+#define NETFRONT_ACCEL_H
|
|
+
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_cuckoo_hash.h"
|
|
+#include "accel_bufs.h"
|
|
+
|
|
+#include "etherfabric/ef_vi.h"
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+enum netfront_accel_post_status {
|
|
+ NETFRONT_ACCEL_STATUS_GOOD,
|
|
+ NETFRONT_ACCEL_STATUS_BUSY,
|
|
+ NETFRONT_ACCEL_STATUS_CANT
|
|
+};
|
|
+
|
|
+#define NETFRONT_ACCEL_STATS 1
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+#define NETFRONT_ACCEL_STATS_OP(x) x
|
|
+#else
|
|
+#define NETFRONT_ACCEL_STATS_OP(x)
|
|
+#endif
|
|
+
|
|
+
|
|
+enum netfront_accel_msg_state {
|
|
+ NETFRONT_ACCEL_MSG_NONE = 0,
|
|
+ NETFRONT_ACCEL_MSG_HELLO = 1,
|
|
+ NETFRONT_ACCEL_MSG_HW = 2
|
|
+};
|
|
+
|
|
+
|
|
+typedef struct {
|
|
+ u32 in_progress;
|
|
+ u32 total_len;
|
|
+ struct sk_buff *skb;
|
|
+} netfront_accel_jumbo_state;
|
|
+
|
|
+
|
|
+struct netfront_accel_ssr_state {
|
|
+ /** List of tracked connections. */
|
|
+ struct list_head conns;
|
|
+
|
|
+ /** Free efx_ssr_conn instances. */
|
|
+ struct list_head free_conns;
|
|
+};
|
|
+
|
|
+
|
|
+struct netfront_accel_netdev_stats {
|
|
+ /* Fastpath stats. */
|
|
+ u32 fastpath_rx_pkts;
|
|
+ u32 fastpath_rx_bytes;
|
|
+ u32 fastpath_rx_errors;
|
|
+ u32 fastpath_tx_pkts;
|
|
+ u32 fastpath_tx_bytes;
|
|
+ u32 fastpath_tx_errors;
|
|
+};
|
|
+
|
|
+
|
|
+struct netfront_accel_netdev_dbfs {
|
|
+ struct dentry *fastpath_rx_pkts;
|
|
+ struct dentry *fastpath_rx_bytes;
|
|
+ struct dentry *fastpath_rx_errors;
|
|
+ struct dentry *fastpath_tx_pkts;
|
|
+ struct dentry *fastpath_tx_bytes;
|
|
+ struct dentry *fastpath_tx_errors;
|
|
+};
|
|
+
|
|
+
|
|
+struct netfront_accel_stats {
|
|
+ /** Fast path events */
|
|
+ u64 fastpath_tx_busy;
|
|
+
|
|
+ /** TX DMA queue status */
|
|
+ u64 fastpath_tx_completions;
|
|
+
|
|
+ /** The number of events processed. */
|
|
+ u64 event_count;
|
|
+
|
|
+ /** Number of frame trunc events seen on fastpath */
|
|
+ u64 fastpath_frm_trunc;
|
|
+
|
|
+ /** Number of rx discard (bad crc) events seen on fastpath */
|
|
+ u64 fastpath_crc_bad;
|
|
+
|
|
+ /** Number of rx discard (bad csum) events seen on fastpath */
|
|
+ u64 fastpath_csum_bad;
|
|
+
|
|
+ /** Number of rx discard (bad rights) events seen on fastpath */
|
|
+ u64 fastpath_rights_bad;
|
|
+
|
|
+ /** Number of rx discard ("other") events seen on fastpath */
|
|
+ u64 fastpath_discard_other;
|
|
+
|
|
+ /** Number of no rx descriptor trunc events seen on fastpath */
|
|
+ u64 rx_no_desc_trunc;
|
|
+
|
|
+ /** The number of misc bad events processed. */
|
|
+ u64 bad_event_count;
|
|
+
|
|
+ /** Number of events dealt with in poll loop */
|
|
+ u32 events_per_poll_max;
|
|
+ u32 events_per_poll_tx_max;
|
|
+ u32 events_per_poll_rx_max;
|
|
+
|
|
+ /** Largest number of concurrently outstanding tx descriptors */
|
|
+ u32 fastpath_tx_pending_max;
|
|
+
|
|
+ /** The number of events since the last interrupts. */
|
|
+ u32 event_count_since_irq;
|
|
+
|
|
+ /** The max number of events between interrupts. */
|
|
+ u32 events_per_irq_max;
|
|
+
|
|
+ /** The number of interrupts. */
|
|
+ u64 irq_count;
|
|
+
|
|
+ /** The number of useless interrupts. */
|
|
+ u64 useless_irq_count;
|
|
+
|
|
+ /** The number of polls scheduled. */
|
|
+ u64 poll_schedule_count;
|
|
+
|
|
+ /** The number of polls called. */
|
|
+ u64 poll_call_count;
|
|
+
|
|
+ /** The number of rechecks. */
|
|
+ u64 poll_reschedule_count;
|
|
+
|
|
+ /** Number of times we've called netif_stop_queue/netif_wake_queue */
|
|
+ u64 queue_stops;
|
|
+ u64 queue_wakes;
|
|
+
|
|
+ /** SSR stats */
|
|
+ u64 ssr_bursts;
|
|
+ u64 ssr_drop_stream;
|
|
+ u64 ssr_misorder;
|
|
+ u64 ssr_slow_start;
|
|
+ u64 ssr_merges;
|
|
+ u64 ssr_too_many;
|
|
+ u64 ssr_new_stream;
|
|
+};
|
|
+
|
|
+
|
|
+struct netfront_accel_dbfs {
|
|
+ struct dentry *fastpath_tx_busy;
|
|
+ struct dentry *fastpath_tx_completions;
|
|
+ struct dentry *fastpath_tx_pending_max;
|
|
+ struct dentry *fastpath_frm_trunc;
|
|
+ struct dentry *fastpath_crc_bad;
|
|
+ struct dentry *fastpath_csum_bad;
|
|
+ struct dentry *fastpath_rights_bad;
|
|
+ struct dentry *fastpath_discard_other;
|
|
+ struct dentry *rx_no_desc_trunc;
|
|
+ struct dentry *event_count;
|
|
+ struct dentry *bad_event_count;
|
|
+ struct dentry *events_per_poll_max;
|
|
+ struct dentry *events_per_poll_rx_max;
|
|
+ struct dentry *events_per_poll_tx_max;
|
|
+ struct dentry *event_count_since_irq;
|
|
+ struct dentry *events_per_irq_max;
|
|
+ struct dentry *irq_count;
|
|
+ struct dentry *useless_irq_count;
|
|
+ struct dentry *poll_schedule_count;
|
|
+ struct dentry *poll_call_count;
|
|
+ struct dentry *poll_reschedule_count;
|
|
+ struct dentry *queue_stops;
|
|
+ struct dentry *queue_wakes;
|
|
+ struct dentry *ssr_bursts;
|
|
+ struct dentry *ssr_drop_stream;
|
|
+ struct dentry *ssr_misorder;
|
|
+ struct dentry *ssr_slow_start;
|
|
+ struct dentry *ssr_merges;
|
|
+ struct dentry *ssr_too_many;
|
|
+ struct dentry *ssr_new_stream;
|
|
+};
|
|
+
|
|
+
|
|
+typedef struct netfront_accel_vnic {
|
|
+ struct netfront_accel_vnic *next;
|
|
+
|
|
+ struct mutex vnic_mutex;
|
|
+
|
|
+ spinlock_t tx_lock;
|
|
+
|
|
+ struct netfront_accel_bufpages bufpages;
|
|
+ struct netfront_accel_bufinfo *rx_bufs;
|
|
+ struct netfront_accel_bufinfo *tx_bufs;
|
|
+
|
|
+ /** Hardware & VI state */
|
|
+ ef_vi vi;
|
|
+
|
|
+ ef_vi_state *vi_state;
|
|
+
|
|
+ ef_eventq_state evq_state;
|
|
+
|
|
+ void *evq_mapping;
|
|
+
|
|
+ /** Hardware dependant state */
|
|
+ union {
|
|
+ struct {
|
|
+ /** Falcon A or B */
|
|
+ enum net_accel_hw_type type;
|
|
+ u32 *evq_rptr;
|
|
+ u32 *doorbell;
|
|
+ void *evq_rptr_mapping;
|
|
+ void *doorbell_mapping;
|
|
+ void *txdmaq_mapping;
|
|
+ void *rxdmaq_mapping;
|
|
+ } falcon;
|
|
+ } hw;
|
|
+
|
|
+ /** RX DMA queue status */
|
|
+ u32 rx_dma_level;
|
|
+
|
|
+ /** Number of RX descriptors waiting to be pushed to the card. */
|
|
+ u32 rx_dma_batched;
|
|
+#define NETFRONT_ACCEL_RX_DESC_BATCH 16
|
|
+
|
|
+ /**
|
|
+ * Hash table of remote mac addresses to decide whether to try
|
|
+ * fast path
|
|
+ */
|
|
+ cuckoo_hash_table fastpath_table;
|
|
+ spinlock_t table_lock;
|
|
+
|
|
+ /** the local mac address of virtual interface we're accelerating */
|
|
+ u8 mac[ETH_ALEN];
|
|
+
|
|
+ int rx_pkt_stride;
|
|
+ int rx_skb_stride;
|
|
+
|
|
+ /**
|
|
+ * Keep track of fragments of jumbo packets as events are
|
|
+ * delivered by NIC
|
|
+ */
|
|
+ netfront_accel_jumbo_state jumbo_state;
|
|
+
|
|
+ struct net_device *net_dev;
|
|
+
|
|
+ /** These two gate the enabling of fast path operations */
|
|
+ int frontend_ready;
|
|
+ int backend_netdev_up;
|
|
+
|
|
+ int irq_enabled;
|
|
+ spinlock_t irq_enabled_lock;
|
|
+
|
|
+ int tx_enabled;
|
|
+
|
|
+ int poll_enabled;
|
|
+
|
|
+ /** A spare slot for a TX packet. This is treated as an
|
|
+ * extension of the DMA queue. Reads require either
|
|
+ * netfront's tx_lock or the vnic tx_lock; writes require both
|
|
+ * locks */
|
|
+ struct sk_buff *tx_skb;
|
|
+
|
|
+ /** Keep track of fragments of SSR packets */
|
|
+ struct netfront_accel_ssr_state ssr_state;
|
|
+
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ /** Event channel for messages */
|
|
+ int msg_channel;
|
|
+ int msg_channel_irq;
|
|
+
|
|
+ /** Event channel for network interrupts. */
|
|
+ int net_channel;
|
|
+ int net_channel_irq;
|
|
+
|
|
+ struct net_accel_shared_page *shared_page;
|
|
+
|
|
+ grant_ref_t ctrl_page_gnt;
|
|
+ grant_ref_t msg_page_gnt;
|
|
+
|
|
+ /** Message Qs, 1 each way. */
|
|
+ sh_msg_fifo2 to_dom0;
|
|
+ sh_msg_fifo2 from_dom0;
|
|
+
|
|
+ enum netfront_accel_msg_state msg_state;
|
|
+
|
|
+ /** Watch on accelstate */
|
|
+ struct xenbus_watch backend_accel_watch;
|
|
+ /** Watch on frontend's MAC address */
|
|
+ struct xenbus_watch mac_address_watch;
|
|
+
|
|
+ /** Work to process received irq/msg */
|
|
+ struct work_struct msg_from_bend;
|
|
+
|
|
+ /** Wait queue for changes in accelstate. */
|
|
+ wait_queue_head_t state_wait_queue;
|
|
+
|
|
+ /** The current accelstate of this driver. */
|
|
+ XenbusState frontend_state;
|
|
+
|
|
+ /** The most recent accelstate seen by the xenbus watch. */
|
|
+ XenbusState backend_state;
|
|
+
|
|
+ /** Non-zero if we should reject requests to connect. */
|
|
+ int removing;
|
|
+
|
|
+ /** Non-zero if the domU shared state has been initialised. */
|
|
+ int domU_state_is_setup;
|
|
+
|
|
+ /** Non-zero if the dom0 shared state has been initialised. */
|
|
+ int dom0_state_is_setup;
|
|
+
|
|
+ /* Those statistics that are added to the netdev stats */
|
|
+ struct netfront_accel_netdev_stats netdev_stats;
|
|
+ struct netfront_accel_netdev_stats stats_last_read;
|
|
+#ifdef CONFIG_DEBUG_FS
|
|
+ struct netfront_accel_netdev_dbfs netdev_dbfs;
|
|
+#endif
|
|
+
|
|
+ /* These statistics are internal and optional */
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ struct netfront_accel_stats stats;
|
|
+#ifdef CONFIG_DEBUG_FS
|
|
+ struct netfront_accel_dbfs dbfs;
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+ /** Debufs fs dir for this interface */
|
|
+ struct dentry *dbfs_dir;
|
|
+} netfront_accel_vnic;
|
|
+
|
|
+
|
|
+/* Module parameters */
|
|
+extern unsigned sfc_netfront_max_pages;
|
|
+extern unsigned sfc_netfront_buffer_split;
|
|
+
|
|
+extern const char *frontend_name;
|
|
+extern struct netfront_accel_hooks accel_hooks;
|
|
+extern struct workqueue_struct *netfront_accel_workqueue;
|
|
+
|
|
+
|
|
+extern
|
|
+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic);
|
|
+
|
|
+extern
|
|
+int netfront_accel_vi_init(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg_hw *hw_msg);
|
|
+
|
|
+extern
|
|
+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic);
|
|
+
|
|
+
|
|
+/**
|
|
+ * Add new buffers which have been registered with the NIC.
|
|
+ *
|
|
+ * @v vnic The vnic instance to process the response.
|
|
+ *
|
|
+ * The buffers contained in the message are added to the buffer pool.
|
|
+ */
|
|
+extern
|
|
+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx);
|
|
+
|
|
+/**
|
|
+ * Put a packet on the tx DMA queue.
|
|
+ *
|
|
+ * @v vnic The vnic instance to accept the packet.
|
|
+ * @v skb A sk_buff to send.
|
|
+ *
|
|
+ * Attempt to send a packet. On success, the skb is owned by the DMA
|
|
+ * queue and will be released when the completion event arrives.
|
|
+ */
|
|
+extern enum netfront_accel_post_status
|
|
+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic,
|
|
+ struct sk_buff *skb);
|
|
+
|
|
+
|
|
+/**
|
|
+ * Process events in response to an interrupt.
|
|
+ *
|
|
+ * @v vnic The vnic instance to poll.
|
|
+ * @v rx_packets The maximum number of rx packets to process.
|
|
+ * @ret rx_done The number of rx packets processed.
|
|
+ *
|
|
+ * The vnic will process events until there are no more events
|
|
+ * remaining or the specified number of rx packets has been processed.
|
|
+ * The split from the interrupt call is to allow Linux NAPI
|
|
+ * polling.
|
|
+ */
|
|
+extern
|
|
+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets);
|
|
+
|
|
+
|
|
+/**
|
|
+ * Iterate over the fragments of a packet buffer.
|
|
+ *
|
|
+ * @v skb The packet buffer to examine.
|
|
+ * @v idx A variable name for the fragment index.
|
|
+ * @v data A variable name for the address of the fragment data.
|
|
+ * @v length A variable name for the fragment length.
|
|
+ * @v code A section of code to execute for each fragment.
|
|
+ *
|
|
+ * This macro iterates over the fragments in a packet buffer and
|
|
+ * executes the code for each of them.
|
|
+ */
|
|
+#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx, \
|
|
+ frag_data, frag_len, \
|
|
+ code) \
|
|
+ do { \
|
|
+ int frag_idx; \
|
|
+ void *frag_data; \
|
|
+ unsigned int frag_len; \
|
|
+ \
|
|
+ frag_data = skb->data; \
|
|
+ frag_len = skb_headlen(skb); \
|
|
+ frag_idx = 0; \
|
|
+ while (1) { /* For each fragment */ \
|
|
+ code; \
|
|
+ if (frag_idx >= skb_shinfo(skb)->nr_frags) { \
|
|
+ break; \
|
|
+ } else { \
|
|
+ skb_frag_t *fragment; \
|
|
+ fragment = &skb_shinfo(skb)->frags[frag_idx]; \
|
|
+ frag_len = fragment->size; \
|
|
+ frag_data = ((void*)page_address(fragment->page) \
|
|
+ + fragment->page_offset); \
|
|
+ }; \
|
|
+ frag_idx++; \
|
|
+ } \
|
|
+ } while(0)
|
|
+
|
|
+static inline
|
|
+void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ mask_evtchn(vnic->net_channel);
|
|
+}
|
|
+
|
|
+static inline
|
|
+void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ unmask_evtchn(vnic->net_channel);
|
|
+}
|
|
+
|
|
+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
|
|
+ u32 ip, u16 port, u8 protocol);
|
|
+
|
|
+/* Process an IRQ received from back end driver */
|
|
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
|
|
+ struct pt_regs *unused);
|
|
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
|
|
+ struct pt_regs *unused);
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+extern void netfront_accel_msg_from_bend(struct work_struct *context);
|
|
+#else
|
|
+extern void netfront_accel_msg_from_bend(void *context);
|
|
+#endif
|
|
+
|
|
+extern void vnic_stop_fastpath(netfront_accel_vnic *vnic);
|
|
+
|
|
+extern int netfront_accel_probe(struct net_device *net_dev,
|
|
+ struct xenbus_device *dev);
|
|
+extern int netfront_accel_remove(struct xenbus_device *dev);
|
|
+extern void netfront_accel_set_closing(netfront_accel_vnic *vnic);
|
|
+
|
|
+extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic);
|
|
+
|
|
+extern void netfront_accel_debugfs_init(void);
|
|
+extern void netfront_accel_debugfs_fini(void);
|
|
+extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic);
|
|
+extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic);
|
|
+
|
|
+#endif /* NETFRONT_ACCEL_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_bufs.c 2008-02-26 10:54:12.000000000 +0100
|
|
@@ -0,0 +1,393 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+#include "accel_bufs.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+#include "accel.h"
|
|
+
|
|
+
|
|
+static int
|
|
+netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager,
|
|
+ int pages)
|
|
+{
|
|
+ manager->desc_blocks =
|
|
+ kzalloc(sizeof(struct netfront_accel_pkt_desc *) *
|
|
+ NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL);
|
|
+ if (manager->desc_blocks == NULL) {
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int
|
|
+netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages,
|
|
+ int pages)
|
|
+{
|
|
+ bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL);
|
|
+ if (bufpages->page_list == NULL) {
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL);
|
|
+ if (bufpages->grant_list == NULL) {
|
|
+ kfree(bufpages->page_list);
|
|
+ bufpages->page_list = NULL;
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *rx_manager,
|
|
+ struct netfront_accel_bufinfo *tx_manager,
|
|
+ int pages)
|
|
+{
|
|
+ int n, rc;
|
|
+
|
|
+ if ((rc = netfront_accel_alloc_buf_desc_blocks
|
|
+ (rx_manager, pages - (pages / sfc_netfront_buffer_split))) < 0) {
|
|
+ goto rx_fail;
|
|
+ }
|
|
+
|
|
+ if ((rc = netfront_accel_alloc_buf_desc_blocks
|
|
+ (tx_manager, pages / sfc_netfront_buffer_split)) < 0) {
|
|
+ goto tx_fail;
|
|
+ }
|
|
+
|
|
+ if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) {
|
|
+ goto lists_fail;
|
|
+ }
|
|
+
|
|
+ for (n = 0; n < pages; n++) {
|
|
+ void *tmp = (void*)__get_free_page(GFP_KERNEL);
|
|
+ if (tmp == NULL)
|
|
+ break;
|
|
+
|
|
+ bufpages->page_list[n] = tmp;
|
|
+ }
|
|
+
|
|
+ if (n != pages) {
|
|
+ EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n,
|
|
+ pages);
|
|
+ for (; n >= 0; n--)
|
|
+ free_page((unsigned long)(bufpages->page_list[n]));
|
|
+ rc = -ENOMEM;
|
|
+ goto pages_fail;
|
|
+ }
|
|
+
|
|
+ bufpages->max_pages = pages;
|
|
+ bufpages->page_reqs = 0;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ pages_fail:
|
|
+ kfree(bufpages->page_list);
|
|
+ kfree(bufpages->grant_list);
|
|
+
|
|
+ bufpages->page_list = NULL;
|
|
+ bufpages->grant_list = NULL;
|
|
+ lists_fail:
|
|
+ kfree(tx_manager->desc_blocks);
|
|
+ tx_manager->desc_blocks = NULL;
|
|
+
|
|
+ tx_fail:
|
|
+ kfree(rx_manager->desc_blocks);
|
|
+ rx_manager->desc_blocks = NULL;
|
|
+ rx_fail:
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *rx_manager,
|
|
+ struct netfront_accel_bufinfo *tx_manager)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < bufpages->max_pages; i++) {
|
|
+ if (bufpages->grant_list[i] != 0)
|
|
+ net_accel_ungrant_page(bufpages->grant_list[i]);
|
|
+ free_page((unsigned long)(bufpages->page_list[i]));
|
|
+ }
|
|
+
|
|
+ if (bufpages->max_pages) {
|
|
+ kfree(bufpages->page_list);
|
|
+ kfree(bufpages->grant_list);
|
|
+ kfree(rx_manager->desc_blocks);
|
|
+ kfree(tx_manager->desc_blocks);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Allocate memory for the buffer manager and create a lock. If no
|
|
+ * lock is supplied its own is allocated.
|
|
+ */
|
|
+struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock)
|
|
+{
|
|
+ struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL);
|
|
+ if (res != NULL) {
|
|
+ res->npages = res->nused = 0;
|
|
+ res->first_free = -1;
|
|
+
|
|
+ if (lock == NULL) {
|
|
+ res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL);
|
|
+ if (res->lock == NULL) {
|
|
+ kfree(res);
|
|
+ return NULL;
|
|
+ }
|
|
+ spin_lock_init(res->lock);
|
|
+ res->internally_locked = 1;
|
|
+ } else {
|
|
+ res->lock = lock;
|
|
+ res->internally_locked = 0;
|
|
+ }
|
|
+
|
|
+ res->desc_blocks = NULL;
|
|
+ }
|
|
+
|
|
+ return res;
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs)
|
|
+{
|
|
+ if (bufs->internally_locked)
|
|
+ kfree(bufs->lock);
|
|
+ kfree(bufs);
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_buf_map_request(struct xenbus_device *dev,
|
|
+ struct netfront_accel_bufpages *bufpages,
|
|
+ struct net_accel_msg *msg,
|
|
+ int pages, int offset)
|
|
+{
|
|
+ int i, mfn;
|
|
+ int err;
|
|
+
|
|
+ net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF);
|
|
+
|
|
+ BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
|
|
+
|
|
+ msg->u.mapbufs.pages = pages;
|
|
+
|
|
+ for (i = 0; i < msg->u.mapbufs.pages; i++) {
|
|
+ /*
|
|
+ * This can happen if we tried to send this message
|
|
+ * earlier but the queue was full.
|
|
+ */
|
|
+ if (bufpages->grant_list[offset+i] != 0) {
|
|
+ msg->u.mapbufs.grants[i] =
|
|
+ bufpages->grant_list[offset+i];
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ mfn = virt_to_mfn(bufpages->page_list[offset+i]);
|
|
+ VPRINTK("%s: Granting page %d, mfn %08x\n",
|
|
+ __FUNCTION__, i, mfn);
|
|
+
|
|
+ bufpages->grant_list[offset+i] =
|
|
+ net_accel_grant_page(dev, mfn, 0);
|
|
+ msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i];
|
|
+
|
|
+ if (msg->u.mapbufs.grants[i] < 0) {
|
|
+ EPRINTK("%s: Failed to grant buffer: %d\n",
|
|
+ __FUNCTION__, msg->u.mapbufs.grants[i]);
|
|
+ err = -EIO;
|
|
+ goto error;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* This is interpreted on return as the offset in the the page_list */
|
|
+ msg->u.mapbufs.reqid = offset;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+error:
|
|
+ /* Ungrant all the pages we've successfully granted. */
|
|
+ for (i--; i >= 0; i--) {
|
|
+ net_accel_ungrant_page(bufpages->grant_list[offset+i]);
|
|
+ bufpages->grant_list[offset+i] = 0;
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Process a response to a buffer request. */
|
|
+int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *manager,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int msg_pages, page_offset, i, newtot;
|
|
+ int old_block_count, new_block_count;
|
|
+ u32 msg_buf;
|
|
+ unsigned long flags;
|
|
+
|
|
+ VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg);
|
|
+
|
|
+ BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY));
|
|
+
|
|
+ msg_pages = msg->u.mapbufs.pages;
|
|
+ msg_buf = msg->u.mapbufs.buf;
|
|
+ page_offset = msg->u.mapbufs.reqid;
|
|
+
|
|
+ spin_lock_irqsave(manager->lock, flags);
|
|
+ newtot = manager->npages + msg_pages;
|
|
+ old_block_count =
|
|
+ (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
|
|
+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
|
|
+ new_block_count =
|
|
+ (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >>
|
|
+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
|
|
+
|
|
+ for (i = old_block_count; i < new_block_count; i++) {
|
|
+ struct netfront_accel_pkt_desc *block;
|
|
+ if (manager->desc_blocks[i] != NULL) {
|
|
+ VPRINTK("Not needed\n");
|
|
+ continue;
|
|
+ }
|
|
+ block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK *
|
|
+ sizeof(netfront_accel_pkt_desc), GFP_ATOMIC);
|
|
+ if (block == NULL) {
|
|
+ spin_unlock_irqrestore(manager->lock, flags);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ manager->desc_blocks[i] = block;
|
|
+ }
|
|
+ for (i = manager->npages; i < newtot; i++) {
|
|
+ int k, j = i - manager->npages;
|
|
+ int block_num;
|
|
+ int block_idx;
|
|
+ struct netfront_accel_pkt_desc *pkt;
|
|
+
|
|
+ block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT;
|
|
+ block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i)
|
|
+ & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1);
|
|
+
|
|
+ pkt = manager->desc_blocks[block_num] + block_idx;
|
|
+
|
|
+ for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) {
|
|
+ BUG_ON(page_offset + j >= bufpages->max_pages);
|
|
+
|
|
+ pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k;
|
|
+ pkt[k].pkt_kva = bufpages->page_list[page_offset + j] +
|
|
+ (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k;
|
|
+ pkt[k].pkt_buff_addr = msg_buf +
|
|
+ (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) *
|
|
+ (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k);
|
|
+ pkt[k].next_free = manager->first_free;
|
|
+ manager->first_free = pkt[k].buf_id;
|
|
+ *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id;
|
|
+
|
|
+ VPRINTK("buf %d desc %p kva %p buffaddr %x\n",
|
|
+ pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva,
|
|
+ pkt[k].pkt_buff_addr);
|
|
+ }
|
|
+ }
|
|
+ manager->npages = newtot;
|
|
+ spin_unlock_irqrestore(manager->lock, flags);
|
|
+ VPRINTK("Added %d pages. Total is now %d\n", msg_pages,
|
|
+ manager->npages);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+netfront_accel_pkt_desc *
|
|
+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id)
|
|
+{
|
|
+ netfront_accel_pkt_desc *pkt;
|
|
+ int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT;
|
|
+ int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1);
|
|
+ BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE);
|
|
+ BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK);
|
|
+ pkt = manager->desc_blocks[block_num] + block_idx;
|
|
+ return pkt;
|
|
+}
|
|
+
|
|
+
|
|
+/* Allocate a buffer from the buffer manager */
|
|
+netfront_accel_pkt_desc *
|
|
+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager)
|
|
+{
|
|
+ int bufno = -1;
|
|
+ netfront_accel_pkt_desc *buf = NULL;
|
|
+ unsigned long flags = 0;
|
|
+
|
|
+ /* Any spare? */
|
|
+ if (manager->first_free == -1)
|
|
+ return NULL;
|
|
+ /* Take lock */
|
|
+ if (manager->internally_locked)
|
|
+ spin_lock_irqsave(manager->lock, flags);
|
|
+ bufno = manager->first_free;
|
|
+ if (bufno != -1) {
|
|
+ buf = netfront_accel_buf_find(manager, bufno);
|
|
+ manager->first_free = buf->next_free;
|
|
+ manager->nused++;
|
|
+ }
|
|
+ /* Release lock */
|
|
+ if (manager->internally_locked)
|
|
+ spin_unlock_irqrestore(manager->lock, flags);
|
|
+
|
|
+ /* Tell the world */
|
|
+ VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno,
|
|
+ buf->pkt_buff_addr);
|
|
+
|
|
+ return buf;
|
|
+}
|
|
+
|
|
+
|
|
+/* Release a buffer back to the buffer manager pool */
|
|
+int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id)
|
|
+{
|
|
+ netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id);
|
|
+ unsigned long flags = 0;
|
|
+ unsigned was_empty = 0;
|
|
+ int bufno = id;
|
|
+
|
|
+ VPRINTK("Freeing buffer %i\n", id);
|
|
+ BUG_ON(id == (u16)-1);
|
|
+
|
|
+ if (manager->internally_locked)
|
|
+ spin_lock_irqsave(manager->lock, flags);
|
|
+
|
|
+ if (manager->first_free == -1)
|
|
+ was_empty = 1;
|
|
+
|
|
+ buf->next_free = manager->first_free;
|
|
+ manager->first_free = bufno;
|
|
+ manager->nused--;
|
|
+
|
|
+ if (manager->internally_locked)
|
|
+ spin_unlock_irqrestore(manager->lock, flags);
|
|
+
|
|
+ return was_empty;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_bufs.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,181 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETFRONT_ACCEL_BUFS_H
|
|
+#define NETFRONT_ACCEL_BUFS_H
|
|
+
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#include "accel_msg_iface.h"
|
|
+
|
|
+
|
|
+/*! Buffer descriptor structure */
|
|
+typedef struct netfront_accel_pkt_desc {
|
|
+ int buf_id;
|
|
+ u32 pkt_buff_addr;
|
|
+ void *pkt_kva;
|
|
+ /* This is the socket buffer currently married to this buffer */
|
|
+ struct sk_buff *skb;
|
|
+ int next_free;
|
|
+} netfront_accel_pkt_desc;
|
|
+
|
|
+
|
|
+#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384)
|
|
+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4)
|
|
+#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK \
|
|
+ (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT))
|
|
+#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1)
|
|
+#define NETFRONT_ACCEL_BUFS_PER_PAGE \
|
|
+ (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT))
|
|
+#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT \
|
|
+ (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT + \
|
|
+ NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)
|
|
+#define NETFRONT_ACCEL_BUFS_PER_BLOCK \
|
|
+ (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT)
|
|
+#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages) \
|
|
+ (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) / \
|
|
+ NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK)
|
|
+
|
|
+/*! Buffer management structure. */
|
|
+struct netfront_accel_bufinfo {
|
|
+ /* number added to this manager */
|
|
+ unsigned npages;
|
|
+ /* number currently used from this manager */
|
|
+ unsigned nused;
|
|
+
|
|
+ int first_free;
|
|
+
|
|
+ int internally_locked;
|
|
+ spinlock_t *lock;
|
|
+
|
|
+ /*
|
|
+ * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to
|
|
+ * pkt descs
|
|
+ */
|
|
+ struct netfront_accel_pkt_desc **desc_blocks;
|
|
+};
|
|
+
|
|
+
|
|
+struct netfront_accel_bufpages {
|
|
+ /* length of lists of pages/grants */
|
|
+ int max_pages;
|
|
+ /* list of pages allocated for network buffers */
|
|
+ void **page_list;
|
|
+ /* list of grants for the above pages */
|
|
+ grant_ref_t *grant_list;
|
|
+
|
|
+ /* number of page requests that have been made */
|
|
+ unsigned page_reqs;
|
|
+};
|
|
+
|
|
+
|
|
+/*! Allocate memory for the buffer manager, set up locks etc.
|
|
+ * Optionally takes a lock to use, if not supplied it makes its own.
|
|
+ *
|
|
+ * \return pointer to netfront_accel_bufinfo structure that represents the
|
|
+ * buffer manager
|
|
+ */
|
|
+extern struct netfront_accel_bufinfo *
|
|
+netfront_accel_init_bufs(spinlock_t *lock);
|
|
+
|
|
+/*! Allocate memory for the buffers
|
|
+ */
|
|
+extern int
|
|
+netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *rx_res,
|
|
+ struct netfront_accel_bufinfo *tx_res,
|
|
+ int pages);
|
|
+extern void
|
|
+netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *rx_res,
|
|
+ struct netfront_accel_bufinfo *tx_res);
|
|
+
|
|
+/*! Release memory for the buffer manager, buffers, etc.
|
|
+ *
|
|
+ * \param manager pointer to netfront_accel_bufinfo structure that
|
|
+ * represents the buffer manager
|
|
+ */
|
|
+extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager);
|
|
+
|
|
+/*! Release a buffer.
|
|
+ *
|
|
+ * \param manager The buffer manager which owns the buffer.
|
|
+ * \param id The buffer identifier.
|
|
+ */
|
|
+extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager,
|
|
+ u16 id);
|
|
+
|
|
+/*! Get the packet descriptor associated with a buffer id.
|
|
+ *
|
|
+ * \param manager The buffer manager which owns the buffer.
|
|
+ * \param id The buffer identifier.
|
|
+ *
|
|
+ * The returned value is the packet descriptor for this buffer.
|
|
+ */
|
|
+extern netfront_accel_pkt_desc *
|
|
+netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id);
|
|
+
|
|
+
|
|
+/*! Fill out a message request for some buffers to be mapped by the
|
|
+ * back end driver
|
|
+ *
|
|
+ * \param manager The buffer manager
|
|
+ * \param msg Pointer to an ef_msg to complete.
|
|
+ * \return 0 on success
|
|
+ */
|
|
+extern int
|
|
+netfront_accel_buf_map_request(struct xenbus_device *dev,
|
|
+ struct netfront_accel_bufpages *bufpages,
|
|
+ struct net_accel_msg *msg,
|
|
+ int pages, int offset);
|
|
+
|
|
+/*! Process a response to a buffer request.
|
|
+ *
|
|
+ * Deal with a received message from the back end in response to our
|
|
+ * request for buffers
|
|
+ *
|
|
+ * \param manager The buffer manager
|
|
+ * \param msg The received message from the back end describing new
|
|
+ * buffers
|
|
+ * \return 0 on success
|
|
+ */
|
|
+extern int
|
|
+netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages,
|
|
+ struct netfront_accel_bufinfo *manager,
|
|
+ struct net_accel_msg *msg);
|
|
+
|
|
+
|
|
+/*! Allocate a buffer from the buffer manager
|
|
+ *
|
|
+ * \param manager The buffer manager data structure
|
|
+ * \param id On exit, the id of the buffer allocated
|
|
+ * \return Pointer to buffer descriptor.
|
|
+ */
|
|
+struct netfront_accel_pkt_desc *
|
|
+netfront_accel_buf_get(struct netfront_accel_bufinfo *manager);
|
|
+
|
|
+#endif /* NETFRONT_ACCEL_BUFS_H */
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_debugfs.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,227 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/fs.h>
|
|
+#include <linux/debugfs.h>
|
|
+
|
|
+#include "accel.h"
|
|
+
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+static struct dentry *sfc_debugfs_root = NULL;
|
|
+#endif
|
|
+
|
|
+void netfront_accel_debugfs_init(void)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL);
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_debugfs_fini(void)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ if (sfc_debugfs_root)
|
|
+ debugfs_remove(sfc_debugfs_root);
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_debugfs_create(netfront_accel_vnic *vnic)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ if (sfc_debugfs_root == NULL)
|
|
+ return -ENOENT;
|
|
+
|
|
+ vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name,
|
|
+ sfc_debugfs_root);
|
|
+ if (vnic->dbfs_dir == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32
|
|
+ ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts);
|
|
+ vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32
|
|
+ ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes);
|
|
+ vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32
|
|
+ ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors);
|
|
+ vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32
|
|
+ ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts);
|
|
+ vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32
|
|
+ ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes);
|
|
+ vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32
|
|
+ ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors);
|
|
+
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ vnic->dbfs.irq_count = debugfs_create_u64
|
|
+ ("irq_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.irq_count);
|
|
+ vnic->dbfs.useless_irq_count = debugfs_create_u64
|
|
+ ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.useless_irq_count);
|
|
+ vnic->dbfs.poll_schedule_count = debugfs_create_u64
|
|
+ ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.poll_schedule_count);
|
|
+ vnic->dbfs.poll_call_count = debugfs_create_u64
|
|
+ ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.poll_call_count);
|
|
+ vnic->dbfs.poll_reschedule_count = debugfs_create_u64
|
|
+ ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.poll_reschedule_count);
|
|
+ vnic->dbfs.queue_stops = debugfs_create_u64
|
|
+ ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.queue_stops);
|
|
+ vnic->dbfs.queue_wakes = debugfs_create_u64
|
|
+ ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.queue_wakes);
|
|
+ vnic->dbfs.ssr_bursts = debugfs_create_u64
|
|
+ ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_bursts);
|
|
+ vnic->dbfs.ssr_drop_stream = debugfs_create_u64
|
|
+ ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_drop_stream);
|
|
+ vnic->dbfs.ssr_misorder = debugfs_create_u64
|
|
+ ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_misorder);
|
|
+ vnic->dbfs.ssr_slow_start = debugfs_create_u64
|
|
+ ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_slow_start);
|
|
+ vnic->dbfs.ssr_merges = debugfs_create_u64
|
|
+ ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_merges);
|
|
+ vnic->dbfs.ssr_too_many = debugfs_create_u64
|
|
+ ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_too_many);
|
|
+ vnic->dbfs.ssr_new_stream = debugfs_create_u64
|
|
+ ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.ssr_new_stream);
|
|
+
|
|
+ vnic->dbfs.fastpath_tx_busy = debugfs_create_u64
|
|
+ ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy);
|
|
+ vnic->dbfs.fastpath_tx_completions = debugfs_create_u64
|
|
+ ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions);
|
|
+ vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32
|
|
+ ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max);
|
|
+ vnic->dbfs.event_count = debugfs_create_u64
|
|
+ ("event_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.event_count);
|
|
+ vnic->dbfs.bad_event_count = debugfs_create_u64
|
|
+ ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.bad_event_count);
|
|
+ vnic->dbfs.event_count_since_irq = debugfs_create_u32
|
|
+ ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.event_count_since_irq);
|
|
+ vnic->dbfs.events_per_irq_max = debugfs_create_u32
|
|
+ ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.events_per_irq_max);
|
|
+ vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64
|
|
+ ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc);
|
|
+ vnic->dbfs.fastpath_crc_bad = debugfs_create_u64
|
|
+ ("fastpath_crc_bad", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_crc_bad);
|
|
+ vnic->dbfs.fastpath_csum_bad = debugfs_create_u64
|
|
+ ("fastpath_csum_bad", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_csum_bad);
|
|
+ vnic->dbfs.fastpath_rights_bad = debugfs_create_u64
|
|
+ ("fastpath_rights_bad", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_rights_bad);
|
|
+ vnic->dbfs.fastpath_discard_other = debugfs_create_u64
|
|
+ ("fastpath_discard_other", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.fastpath_discard_other);
|
|
+ vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64
|
|
+ ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc);
|
|
+ vnic->dbfs.events_per_poll_max = debugfs_create_u32
|
|
+ ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.events_per_poll_max);
|
|
+ vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32
|
|
+ ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max);
|
|
+ vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32
|
|
+ ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH,
|
|
+ vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max);
|
|
+#endif
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic)
|
|
+{
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+ if (vnic->dbfs_dir != NULL) {
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts);
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes);
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors);
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts);
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes);
|
|
+ debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors);
|
|
+
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ debugfs_remove(vnic->dbfs.irq_count);
|
|
+ debugfs_remove(vnic->dbfs.useless_irq_count);
|
|
+ debugfs_remove(vnic->dbfs.poll_schedule_count);
|
|
+ debugfs_remove(vnic->dbfs.poll_call_count);
|
|
+ debugfs_remove(vnic->dbfs.poll_reschedule_count);
|
|
+ debugfs_remove(vnic->dbfs.queue_stops);
|
|
+ debugfs_remove(vnic->dbfs.queue_wakes);
|
|
+ debugfs_remove(vnic->dbfs.ssr_bursts);
|
|
+ debugfs_remove(vnic->dbfs.ssr_drop_stream);
|
|
+ debugfs_remove(vnic->dbfs.ssr_misorder);
|
|
+ debugfs_remove(vnic->dbfs.ssr_slow_start);
|
|
+ debugfs_remove(vnic->dbfs.ssr_merges);
|
|
+ debugfs_remove(vnic->dbfs.ssr_too_many);
|
|
+ debugfs_remove(vnic->dbfs.ssr_new_stream);
|
|
+
|
|
+ debugfs_remove(vnic->dbfs.fastpath_tx_busy);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_tx_completions);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_tx_pending_max);
|
|
+ debugfs_remove(vnic->dbfs.event_count);
|
|
+ debugfs_remove(vnic->dbfs.bad_event_count);
|
|
+ debugfs_remove(vnic->dbfs.event_count_since_irq);
|
|
+ debugfs_remove(vnic->dbfs.events_per_irq_max);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_frm_trunc);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_crc_bad);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_csum_bad);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_rights_bad);
|
|
+ debugfs_remove(vnic->dbfs.fastpath_discard_other);
|
|
+ debugfs_remove(vnic->dbfs.rx_no_desc_trunc);
|
|
+ debugfs_remove(vnic->dbfs.events_per_poll_max);
|
|
+ debugfs_remove(vnic->dbfs.events_per_poll_rx_max);
|
|
+ debugfs_remove(vnic->dbfs.events_per_poll_tx_max);
|
|
+#endif
|
|
+ debugfs_remove(vnic->dbfs_dir);
|
|
+ }
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_msg.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,564 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/stddef.h>
|
|
+#include <linux/errno.h>
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_bufs.h"
|
|
+
|
|
+#include "netfront.h" /* drivers/xen/netfront/netfront.h */
|
|
+
|
|
+static void vnic_start_interrupts(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ /* Prime our interrupt */
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
|
|
+ if (!netfront_accel_vi_enable_interrupts(vnic)) {
|
|
+ /* Cripes, that was quick, better pass it up */
|
|
+ netfront_accel_disable_net_interrupts(vnic);
|
|
+ vnic->irq_enabled = 0;
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
|
|
+ netif_rx_schedule(vnic->net_dev);
|
|
+ } else {
|
|
+ /*
|
|
+ * Nothing yet, make sure we get interrupts through
|
|
+ * back end
|
|
+ */
|
|
+ vnic->irq_enabled = 1;
|
|
+ netfront_accel_enable_net_interrupts(vnic);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+}
|
|
+
|
|
+
|
|
+static void vnic_stop_interrupts(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
|
|
+ netfront_accel_disable_net_interrupts(vnic);
|
|
+ vnic->irq_enabled = 0;
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+}
|
|
+
|
|
+
|
|
+static void vnic_start_fastpath(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct net_device *net_dev = vnic->net_dev;
|
|
+ unsigned long flags;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ spin_lock_irqsave(&vnic->tx_lock, flags);
|
|
+ vnic->tx_enabled = 1;
|
|
+ spin_unlock_irqrestore(&vnic->tx_lock, flags);
|
|
+
|
|
+ netif_poll_disable(net_dev);
|
|
+ vnic->poll_enabled = 1;
|
|
+ netif_poll_enable(net_dev);
|
|
+
|
|
+ vnic_start_interrupts(vnic);
|
|
+}
|
|
+
|
|
+
|
|
+void vnic_stop_fastpath(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct net_device *net_dev = vnic->net_dev;
|
|
+ struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev);
|
|
+ unsigned long flags1, flags2;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ vnic_stop_interrupts(vnic);
|
|
+
|
|
+ spin_lock_irqsave(&vnic->tx_lock, flags1);
|
|
+ vnic->tx_enabled = 0;
|
|
+ spin_lock_irqsave(&np->tx_lock, flags2);
|
|
+ if (vnic->tx_skb != NULL) {
|
|
+ dev_kfree_skb_any(vnic->tx_skb);
|
|
+ vnic->tx_skb = NULL;
|
|
+ if (netfront_check_queue_ready(net_dev)) {
|
|
+ netif_wake_queue(net_dev);
|
|
+ NETFRONT_ACCEL_STATS_OP
|
|
+ (vnic->stats.queue_wakes++);
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&np->tx_lock, flags2);
|
|
+ spin_unlock_irqrestore(&vnic->tx_lock, flags1);
|
|
+
|
|
+ /* Must prevent polls and hold lock to modify poll_enabled */
|
|
+ netif_poll_disable(net_dev);
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags1);
|
|
+ vnic->poll_enabled = 0;
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1);
|
|
+ netif_poll_enable(net_dev);
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_interface_up(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ if (!vnic->backend_netdev_up) {
|
|
+ vnic->backend_netdev_up = 1;
|
|
+
|
|
+ if (vnic->frontend_ready)
|
|
+ vnic_start_fastpath(vnic);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_interface_down(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ if (vnic->backend_netdev_up) {
|
|
+ vnic->backend_netdev_up = 0;
|
|
+
|
|
+ if (vnic->frontend_ready)
|
|
+ vnic_stop_fastpath(vnic);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static int vnic_add_bufs(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int rc, offset;
|
|
+ struct netfront_accel_bufinfo *bufinfo;
|
|
+
|
|
+ BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ);
|
|
+
|
|
+ offset = msg->u.mapbufs.reqid;
|
|
+
|
|
+ if (offset < vnic->bufpages.max_pages -
|
|
+ (vnic->bufpages.max_pages / sfc_netfront_buffer_split)) {
|
|
+ bufinfo = vnic->rx_bufs;
|
|
+ } else
|
|
+ bufinfo = vnic->tx_bufs;
|
|
+
|
|
+ /* Queue up some Rx buffers to start things off. */
|
|
+ if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) {
|
|
+ netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs);
|
|
+
|
|
+ if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) {
|
|
+ VPRINTK("%s: got all buffers back\n", __FUNCTION__);
|
|
+ vnic->frontend_ready = 1;
|
|
+ if (vnic->backend_netdev_up)
|
|
+ vnic_start_fastpath(vnic);
|
|
+ } else {
|
|
+ VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__,
|
|
+ offset, msg->u.mapbufs.pages);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/* The largest [o] such that (1u << o) <= n. Requires n > 0. */
|
|
+
|
|
+inline unsigned log2_le(unsigned long n) {
|
|
+ unsigned order = 1;
|
|
+ while ((1ul << order) <= n) ++order;
|
|
+ return (order - 1);
|
|
+}
|
|
+
|
|
+static int vnic_send_buffer_requests(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_bufpages *bufpages)
|
|
+{
|
|
+ int pages, offset, rc = 0, sent = 0;
|
|
+ struct net_accel_msg msg;
|
|
+
|
|
+ while (bufpages->page_reqs < bufpages->max_pages) {
|
|
+ offset = bufpages->page_reqs;
|
|
+
|
|
+ pages = pow2(log2_le(bufpages->max_pages -
|
|
+ bufpages->page_reqs));
|
|
+ pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ?
|
|
+ pages : NET_ACCEL_MSG_MAX_PAGE_REQ;
|
|
+
|
|
+ BUG_ON(offset < 0);
|
|
+ BUG_ON(pages <= 0);
|
|
+
|
|
+ rc = netfront_accel_buf_map_request(vnic->dev, bufpages,
|
|
+ &msg, pages, offset);
|
|
+ if (rc == 0) {
|
|
+ rc = net_accel_msg_send(vnic->shared_page,
|
|
+ &vnic->to_dom0, &msg);
|
|
+ if (rc < 0) {
|
|
+ VPRINTK("%s: queue full, stopping for now\n",
|
|
+ __FUNCTION__);
|
|
+ break;
|
|
+ }
|
|
+ sent++;
|
|
+ } else {
|
|
+ EPRINTK("%s: problem with grant, stopping for now\n",
|
|
+ __FUNCTION__);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ bufpages->page_reqs += pages;
|
|
+ }
|
|
+
|
|
+ if (sent)
|
|
+ net_accel_msg_notify(vnic->msg_channel_irq);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * In response to dom0 saying "my queue is full", we reply with this
|
|
+ * when it is no longer full
|
|
+ */
|
|
+inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic)
|
|
+{
|
|
+
|
|
+ if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B,
|
|
+ (unsigned long *)&vnic->shared_page->aflags))
|
|
+ notify_remote_via_irq(vnic->msg_channel_irq);
|
|
+ else
|
|
+ VPRINTK("queue not full bit already set, not signalling\n");
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Notify dom0 that the queue we want to use is full, it should
|
|
+ * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course
|
|
+ */
|
|
+inline void vnic_set_queue_full(netfront_accel_vnic *vnic)
|
|
+{
|
|
+
|
|
+ if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B,
|
|
+ (unsigned long *)&vnic->shared_page->aflags))
|
|
+ notify_remote_via_irq(vnic->msg_channel_irq);
|
|
+ else
|
|
+ VPRINTK("queue full bit already set, not signalling\n");
|
|
+}
|
|
+
|
|
+
|
|
+static int vnic_check_hello_version(unsigned version)
|
|
+{
|
|
+ if (version > NET_ACCEL_MSG_VERSION) {
|
|
+ /* Newer protocol, we must refuse */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+
|
|
+ if (version < NET_ACCEL_MSG_VERSION) {
|
|
+ /*
|
|
+ * We are newer, so have discretion to accept if we
|
|
+ * wish. For now however, just reject
|
|
+ */
|
|
+ return -EPROTO;
|
|
+ }
|
|
+
|
|
+ BUG_ON(version != NET_ACCEL_MSG_VERSION);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int vnic_process_hello_msg(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int err = 0;
|
|
+ unsigned pages = sfc_netfront_max_pages;
|
|
+
|
|
+ if (vnic_check_hello_version(msg->u.hello.version) < 0) {
|
|
+ msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY
|
|
+ | NET_ACCEL_MSG_ERROR;
|
|
+ msg->u.hello.version = NET_ACCEL_MSG_VERSION;
|
|
+ } else {
|
|
+ vnic->backend_netdev_up
|
|
+ = vnic->shared_page->net_dev_up;
|
|
+
|
|
+ msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY;
|
|
+ msg->u.hello.version = NET_ACCEL_MSG_VERSION;
|
|
+ if (msg->u.hello.max_pages &&
|
|
+ msg->u.hello.max_pages < pages)
|
|
+ pages = msg->u.hello.max_pages;
|
|
+ msg->u.hello.max_pages = pages;
|
|
+
|
|
+ /* Half of pages for rx, half for tx */
|
|
+ err = netfront_accel_alloc_buffer_mem(&vnic->bufpages,
|
|
+ vnic->rx_bufs,
|
|
+ vnic->tx_bufs,
|
|
+ pages);
|
|
+ if (err)
|
|
+ msg->id |= NET_ACCEL_MSG_ERROR;
|
|
+ }
|
|
+
|
|
+ /* Send reply */
|
|
+ net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq,
|
|
+ &vnic->to_dom0, msg);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static int vnic_process_localmac_msg(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ cuckoo_hash_mac_key key;
|
|
+
|
|
+ if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
|
|
+ DPRINTK("MAC has moved, could be local: " MAC_FMT "\n",
|
|
+ MAC_ARG(msg->u.localmac.mac));
|
|
+ key = cuckoo_mac_to_key(msg->u.localmac.mac);
|
|
+ spin_lock_irqsave(&vnic->table_lock, flags);
|
|
+ /* Try to remove it, not a big deal if not there */
|
|
+ cuckoo_hash_remove(&vnic->fastpath_table,
|
|
+ (cuckoo_hash_key *)&key);
|
|
+ spin_unlock_irqrestore(&vnic->table_lock, flags);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+int vnic_process_rx_msg(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ switch (msg->id) {
|
|
+ case NET_ACCEL_MSG_HELLO:
|
|
+ /* Hello, reply with Reply */
|
|
+ DPRINTK("got Hello, with version %.8x\n",
|
|
+ msg->u.hello.version);
|
|
+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE);
|
|
+ err = vnic_process_hello_msg(vnic, msg);
|
|
+ if (err == 0)
|
|
+ vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO;
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_SETHW:
|
|
+ /* Hardware info message */
|
|
+ DPRINTK("got H/W info\n");
|
|
+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO);
|
|
+ err = netfront_accel_vi_init(vnic, &msg->u.hw);
|
|
+ if (err == 0)
|
|
+ vnic->msg_state = NETFRONT_ACCEL_MSG_HW;
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY:
|
|
+ VPRINTK("Got mapped buffers back\n");
|
|
+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
|
|
+ err = vnic_add_bufs(vnic, msg);
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR:
|
|
+ /* No buffers. Can't use the fast path. */
|
|
+ EPRINTK("Got mapped buffers error. Cannot accelerate.\n");
|
|
+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
|
|
+ err = -EIO;
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_LOCALMAC:
|
|
+ /* Should be add, remove not currently used */
|
|
+ EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD));
|
|
+ BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW);
|
|
+ err = vnic_process_localmac_msg(vnic, msg);
|
|
+ break;
|
|
+ default:
|
|
+ EPRINTK("Huh? Message code is 0x%x\n", msg->id);
|
|
+ err = -EPROTO;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Process an IRQ received from back end driver */
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+void netfront_accel_msg_from_bend(struct work_struct *context)
|
|
+#else
|
|
+void netfront_accel_msg_from_bend(void *context)
|
|
+#endif
|
|
+{
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ netfront_accel_vnic *vnic =
|
|
+ container_of(context, netfront_accel_vnic, msg_from_bend);
|
|
+#else
|
|
+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
|
|
+#endif
|
|
+ struct net_accel_msg msg;
|
|
+ int err, queue_was_full = 0;
|
|
+
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ /*
|
|
+ * This happens when the shared pages have been unmapped but
|
|
+ * the workqueue has yet to be flushed
|
|
+ */
|
|
+ if (!vnic->dom0_state_is_setup)
|
|
+ goto unlock_out;
|
|
+
|
|
+ while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK)
|
|
+ != 0) {
|
|
+ if (vnic->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) {
|
|
+ /* We've been told there may now be space. */
|
|
+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B,
|
|
+ (unsigned long *)&vnic->shared_page->aflags);
|
|
+ }
|
|
+
|
|
+ if (vnic->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) {
|
|
+ /*
|
|
+ * There will be space at the end of this
|
|
+ * function if we can make any.
|
|
+ */
|
|
+ clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B,
|
|
+ (unsigned long *)&vnic->shared_page->aflags);
|
|
+ queue_was_full = 1;
|
|
+ }
|
|
+
|
|
+ if (vnic->shared_page->aflags &
|
|
+ NET_ACCEL_MSG_AFLAGS_NETUPDOWN) {
|
|
+ DPRINTK("%s: net interface change\n", __FUNCTION__);
|
|
+ clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B,
|
|
+ (unsigned long *)&vnic->shared_page->aflags);
|
|
+ if (vnic->shared_page->net_dev_up)
|
|
+ netfront_accel_interface_up(vnic);
|
|
+ else
|
|
+ netfront_accel_interface_down(vnic);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Pull msg out of shared memory */
|
|
+ while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0,
|
|
+ &msg)) == 0) {
|
|
+ err = vnic_process_rx_msg(vnic, &msg);
|
|
+
|
|
+ if (err != 0)
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Send any pending buffer map request messages that we can,
|
|
+ * and mark domU->dom0 as full if necessary.
|
|
+ */
|
|
+ if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW &&
|
|
+ vnic->bufpages.page_reqs < vnic->bufpages.max_pages) {
|
|
+ if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC)
|
|
+ vnic_set_queue_full(vnic);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * If there are no messages then this is not an error. It
|
|
+ * just means that we've finished processing the queue.
|
|
+ */
|
|
+ if (err == -ENOENT)
|
|
+ err = 0;
|
|
+ done:
|
|
+ /* We will now have made space in the dom0->domU queue if we can */
|
|
+ if (queue_was_full)
|
|
+ vnic_set_queue_not_full(vnic);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s returned %d\n", __FUNCTION__, err);
|
|
+ netfront_accel_set_closing(vnic);
|
|
+ }
|
|
+
|
|
+ unlock_out:
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
|
|
+ struct pt_regs *unused)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
|
|
+ VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
|
|
+
|
|
+ queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+/* Process an interrupt received from the NIC via backend */
|
|
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
|
|
+ struct pt_regs *unused)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
|
|
+ struct net_device *net_dev = vnic->net_dev;
|
|
+ unsigned long flags;
|
|
+
|
|
+ VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename);
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++);
|
|
+
|
|
+ BUG_ON(net_dev==NULL);
|
|
+
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
|
|
+ if (vnic->irq_enabled) {
|
|
+ netfront_accel_disable_net_interrupts(vnic);
|
|
+ vnic->irq_enabled = 0;
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ vnic->stats.poll_schedule_count++;
|
|
+ if (vnic->stats.event_count_since_irq >
|
|
+ vnic->stats.events_per_irq_max)
|
|
+ vnic->stats.events_per_irq_max =
|
|
+ vnic->stats.event_count_since_irq;
|
|
+ vnic->stats.event_count_since_irq = 0;
|
|
+#endif
|
|
+ netif_rx_schedule(net_dev);
|
|
+ }
|
|
+ else {
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++);
|
|
+ DPRINTK("%s: irq when disabled\n", __FUNCTION__);
|
|
+ }
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac,
|
|
+ u32 ip, u16 port, u8 protocol)
|
|
+{
|
|
+ unsigned long lock_state;
|
|
+ struct net_accel_msg *msg;
|
|
+
|
|
+ msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0,
|
|
+ &lock_state);
|
|
+
|
|
+ if (msg == NULL)
|
|
+ return;
|
|
+
|
|
+ net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH);
|
|
+ msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE;
|
|
+ memcpy(msg->u.fastpath.mac, mac, ETH_ALEN);
|
|
+
|
|
+ msg->u.fastpath.port = port;
|
|
+ msg->u.fastpath.ip = ip;
|
|
+ msg->u.fastpath.proto = protocol;
|
|
+
|
|
+ net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0,
|
|
+ &lock_state, vnic->msg_channel_irq);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_netfront.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,328 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/netdevice.h>
|
|
+
|
|
+/* drivers/xen/netfront/netfront.h */
|
|
+#include "netfront.h"
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_bufs.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_ssr.h"
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+#include "gcov.h"
|
|
+#endif
|
|
+
|
|
+#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd) \
|
|
+ ((netfront_accel_vnic *)((struct netfront_info *)netdev_priv(net_dev))->accel_priv)
|
|
+
|
|
+static int netfront_accel_netdev_start_xmit(struct sk_buff *skb,
|
|
+ struct net_device *net_dev)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+ struct netfront_info *np =
|
|
+ (struct netfront_info *)netdev_priv(net_dev);
|
|
+ int handled, rc;
|
|
+ unsigned long flags1, flags2;
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ /* Take our tx lock and hold for the duration */
|
|
+ spin_lock_irqsave(&vnic->tx_lock, flags1);
|
|
+
|
|
+ if (!vnic->tx_enabled) {
|
|
+ rc = 0;
|
|
+ goto unlock_out;
|
|
+ }
|
|
+
|
|
+ handled = netfront_accel_vi_tx_post(vnic, skb);
|
|
+ if (handled == NETFRONT_ACCEL_STATUS_BUSY) {
|
|
+ BUG_ON(vnic->net_dev != net_dev);
|
|
+ DPRINTK("%s stopping queue\n", __FUNCTION__);
|
|
+
|
|
+ /* Need netfront's tx_lock and vnic tx_lock to write tx_skb */
|
|
+ spin_lock_irqsave(&np->tx_lock, flags2);
|
|
+ BUG_ON(vnic->tx_skb != NULL);
|
|
+ vnic->tx_skb = skb;
|
|
+ netif_stop_queue(net_dev);
|
|
+ spin_unlock_irqrestore(&np->tx_lock, flags2);
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++);
|
|
+ }
|
|
+
|
|
+ if (handled == NETFRONT_ACCEL_STATUS_CANT)
|
|
+ rc = 0;
|
|
+ else
|
|
+ rc = 1;
|
|
+
|
|
+unlock_out:
|
|
+ spin_unlock_irqrestore(&vnic->tx_lock, flags1);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+ int rx_allowed = *budget, rx_done;
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ /* Can check this without lock as modifier excludes polls */
|
|
+ if (!vnic->poll_enabled)
|
|
+ return 0;
|
|
+
|
|
+ rx_done = netfront_accel_vi_poll(vnic, rx_allowed);
|
|
+ *budget -= rx_done;
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++);
|
|
+
|
|
+ VPRINTK("%s: done %d allowed %d\n",
|
|
+ __FUNCTION__, rx_done, rx_allowed);
|
|
+
|
|
+ netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state);
|
|
+
|
|
+ if (rx_done < rx_allowed) {
|
|
+ return 0; /* Done */
|
|
+ }
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++);
|
|
+
|
|
+ return 1; /* More to do. */
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Process request from netfront to start napi interrupt
|
|
+ * mode. (i.e. enable interrupts as it's finished polling)
|
|
+ */
|
|
+static int netfront_accel_start_napi_interrupts(struct net_device *net_dev)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+ unsigned long flags;
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ /*
|
|
+ * Can check this without lock as writer excludes poll before
|
|
+ * modifying
|
|
+ */
|
|
+ if (!vnic->poll_enabled)
|
|
+ return 0;
|
|
+
|
|
+ if (!netfront_accel_vi_enable_interrupts(vnic)) {
|
|
+ /*
|
|
+ * There was something there, tell caller we had
|
|
+ * something to do.
|
|
+ */
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
|
|
+ vnic->irq_enabled = 1;
|
|
+ netfront_accel_enable_net_interrupts(vnic);
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Process request from netfront to stop napi interrupt
|
|
+ * mode. (i.e. disable interrupts as it's starting to poll
|
|
+ */
|
|
+static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+ unsigned long flags;
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ spin_lock_irqsave(&vnic->irq_enabled_lock, flags);
|
|
+
|
|
+ if (!vnic->poll_enabled) {
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ netfront_accel_disable_net_interrupts(vnic);
|
|
+ vnic->irq_enabled = 0;
|
|
+ spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_accel_check_ready(struct net_device *net_dev)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ /* Read of tx_skb is protected by netfront's tx_lock */
|
|
+ return vnic->tx_skb == NULL;
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_accel_get_stats(struct net_device *net_dev,
|
|
+ struct net_device_stats *stats)
|
|
+{
|
|
+ netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev);
|
|
+ struct netfront_accel_netdev_stats now;
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ now.fastpath_rx_pkts = vnic->netdev_stats.fastpath_rx_pkts;
|
|
+ now.fastpath_rx_bytes = vnic->netdev_stats.fastpath_rx_bytes;
|
|
+ now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors;
|
|
+ now.fastpath_tx_pkts = vnic->netdev_stats.fastpath_tx_pkts;
|
|
+ now.fastpath_tx_bytes = vnic->netdev_stats.fastpath_tx_bytes;
|
|
+ now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors;
|
|
+
|
|
+ stats->rx_packets += (now.fastpath_rx_pkts -
|
|
+ vnic->stats_last_read.fastpath_rx_pkts);
|
|
+ stats->rx_bytes += (now.fastpath_rx_bytes -
|
|
+ vnic->stats_last_read.fastpath_rx_bytes);
|
|
+ stats->rx_errors += (now.fastpath_rx_errors -
|
|
+ vnic->stats_last_read.fastpath_rx_errors);
|
|
+ stats->tx_packets += (now.fastpath_tx_pkts -
|
|
+ vnic->stats_last_read.fastpath_tx_pkts);
|
|
+ stats->tx_bytes += (now.fastpath_tx_bytes -
|
|
+ vnic->stats_last_read.fastpath_tx_bytes);
|
|
+ stats->tx_errors += (now.fastpath_tx_errors -
|
|
+ vnic->stats_last_read.fastpath_tx_errors);
|
|
+
|
|
+ vnic->stats_last_read = now;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+struct netfront_accel_hooks accel_hooks = {
|
|
+ .new_device = &netfront_accel_probe,
|
|
+ .remove = &netfront_accel_remove,
|
|
+ .netdev_poll = &netfront_accel_netdev_poll,
|
|
+ .start_xmit = &netfront_accel_netdev_start_xmit,
|
|
+ .start_napi_irq = &netfront_accel_start_napi_interrupts,
|
|
+ .stop_napi_irq = &netfront_accel_stop_napi_interrupts,
|
|
+ .check_ready = &netfront_accel_check_ready,
|
|
+ .get_stats = &netfront_accel_get_stats
|
|
+};
|
|
+
|
|
+
|
|
+unsigned sfc_netfront_max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES;
|
|
+module_param_named (max_pages, sfc_netfront_max_pages, uint, 0644);
|
|
+MODULE_PARM_DESC(max_pages, "Number of buffer pages to request");
|
|
+
|
|
+unsigned sfc_netfront_buffer_split = 2;
|
|
+module_param_named (buffer_split, sfc_netfront_buffer_split, uint, 0644);
|
|
+MODULE_PARM_DESC(buffer_split,
|
|
+ "Fraction of buffers to use for TX, rest for RX");
|
|
+
|
|
+
|
|
+const char *frontend_name = "sfc_netfront";
|
|
+
|
|
+struct workqueue_struct *netfront_accel_workqueue;
|
|
+
|
|
+static int __init netfront_accel_init(void)
|
|
+{
|
|
+ int rc;
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_init(THIS_MODULE);
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * If we're running on dom0, netfront hasn't initialised
|
|
+ * itself, so we need to keep away
|
|
+ */
|
|
+ if (is_initial_xendomain())
|
|
+ return 0;
|
|
+
|
|
+ if (!is_pow2(sizeof(struct net_accel_msg)))
|
|
+ EPRINTK("%s: bad structure size\n", __FUNCTION__);
|
|
+
|
|
+ netfront_accel_workqueue = create_workqueue(frontend_name);
|
|
+
|
|
+ netfront_accel_debugfs_init();
|
|
+
|
|
+ rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION,
|
|
+ frontend_name, &accel_hooks);
|
|
+
|
|
+ if (rc < 0) {
|
|
+ EPRINTK("Xen netfront accelerator version mismatch\n");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (rc > 0) {
|
|
+ /*
|
|
+ * In future may want to add backwards compatibility
|
|
+ * and accept certain subsets of previous versions
|
|
+ */
|
|
+ EPRINTK("Xen netfront accelerator version mismatch\n");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+ fail:
|
|
+ netfront_accel_debugfs_fini();
|
|
+ flush_workqueue(netfront_accel_workqueue);
|
|
+ destroy_workqueue(netfront_accel_workqueue);
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_fini(THIS_MODULE);
|
|
+#endif
|
|
+ return -EINVAL;
|
|
+}
|
|
+module_init(netfront_accel_init);
|
|
+
|
|
+static void __exit netfront_accel_exit(void)
|
|
+{
|
|
+ if (is_initial_xendomain())
|
|
+ return;
|
|
+
|
|
+ DPRINTK("%s: unhooking\n", __FUNCTION__);
|
|
+
|
|
+ /* Unhook from normal netfront */
|
|
+ netfront_accelerator_stop(frontend_name);
|
|
+
|
|
+ DPRINTK("%s: done\n", __FUNCTION__);
|
|
+
|
|
+ netfront_accel_debugfs_fini();
|
|
+
|
|
+ flush_workqueue(netfront_accel_workqueue);
|
|
+
|
|
+ destroy_workqueue(netfront_accel_workqueue);
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+ gcov_provider_fini(THIS_MODULE);
|
|
+#endif
|
|
+ return;
|
|
+}
|
|
+module_exit(netfront_accel_exit);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_ssr.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,308 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/socket.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <linux/list.h>
|
|
+#include <net/ip.h>
|
|
+#include <net/checksum.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_bufs.h"
|
|
+
|
|
+#include "accel_ssr.h"
|
|
+
|
|
+static inline int list_valid(struct list_head *lh) {
|
|
+ return(lh->next != NULL);
|
|
+}
|
|
+
|
|
+static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st,
|
|
+ struct netfront_accel_ssr_conn *c);
|
|
+
|
|
+/** Construct an efx_ssr_state.
|
|
+ *
|
|
+ * @v st The SSR state (per channel per port)
|
|
+ * @v port The port.
|
|
+ */
|
|
+void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) {
|
|
+ unsigned i;
|
|
+
|
|
+ INIT_LIST_HEAD(&st->conns);
|
|
+ INIT_LIST_HEAD(&st->free_conns);
|
|
+ for (i = 0; i < 8; ++i) {
|
|
+ struct netfront_accel_ssr_conn *c =
|
|
+ kmalloc(sizeof(*c), GFP_KERNEL);
|
|
+ if (c == NULL) break;
|
|
+ c->n_in_order_pkts = 0;
|
|
+ c->skb = NULL;
|
|
+ list_add(&c->link, &st->free_conns);
|
|
+ }
|
|
+
|
|
+}
|
|
+
|
|
+
|
|
+/** Destructor for an efx_ssr_state.
|
|
+ *
|
|
+ * @v st The SSR state (per channel per port)
|
|
+ */
|
|
+void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st) {
|
|
+ struct netfront_accel_ssr_conn *c;
|
|
+
|
|
+ /* Return cleanly if efx_ssr_init() not previously called */
|
|
+ BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns));
|
|
+ if (! list_valid(&st->conns))
|
|
+ return;
|
|
+
|
|
+ while ( ! list_empty(&st->free_conns)) {
|
|
+ c = list_entry(st->free_conns.prev,
|
|
+ struct netfront_accel_ssr_conn, link);
|
|
+ list_del(&c->link);
|
|
+ BUG_ON(c->skb != NULL);
|
|
+ kfree(c);
|
|
+ }
|
|
+ while ( ! list_empty(&st->conns)) {
|
|
+ c = list_entry(st->conns.prev,
|
|
+ struct netfront_accel_ssr_conn, link);
|
|
+ list_del(&c->link);
|
|
+ if (c->skb)
|
|
+ netfront_accel_ssr_deliver(vnic, st, c);
|
|
+ kfree(c);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/** Calc IP checksum and deliver to the OS
|
|
+ *
|
|
+ * @v st The SSR state (per channel per port)
|
|
+ * @v c The SSR connection state
|
|
+ */
|
|
+static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st,
|
|
+ struct netfront_accel_ssr_conn *c) {
|
|
+ BUG_ON(c->skb == NULL);
|
|
+
|
|
+ /*
|
|
+ * If we've chained packets together, recalculate the IP
|
|
+ * checksum.
|
|
+ */
|
|
+ if (skb_shinfo(c->skb)->frag_list) {
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts);
|
|
+ c->iph->check = 0;
|
|
+ c->iph->check = ip_fast_csum((unsigned char *) c->iph,
|
|
+ c->iph->ihl);
|
|
+ }
|
|
+
|
|
+ VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len);
|
|
+
|
|
+ netif_receive_skb(c->skb);
|
|
+ c->skb = NULL;
|
|
+}
|
|
+
|
|
+
|
|
+/** Push held skbs down into network stack.
|
|
+ *
|
|
+ * @v st SSR state
|
|
+ *
|
|
+ * Only called if we are tracking one or more connections.
|
|
+ */
|
|
+void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st) {
|
|
+ struct netfront_accel_ssr_conn *c;
|
|
+
|
|
+ BUG_ON(list_empty(&st->conns));
|
|
+
|
|
+ list_for_each_entry(c, &st->conns, link)
|
|
+ if (c->skb)
|
|
+ netfront_accel_ssr_deliver(vnic, st, c);
|
|
+
|
|
+ /* Time-out connections that have received no traffic for 20ms. */
|
|
+ c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn,
|
|
+ link);
|
|
+ if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) {
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream);
|
|
+ list_del(&c->link);
|
|
+ list_add(&c->link, &st->free_conns);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+/** Process SKB and decide whether to dispatch it to the stack now or
|
|
+ * later.
|
|
+ *
|
|
+ * @v st SSR state
|
|
+ * @v skb SKB to exmaine
|
|
+ * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs
|
|
+ * us.
|
|
+ */
|
|
+int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st,
|
|
+ struct sk_buff *skb) {
|
|
+ int data_length, dont_merge;
|
|
+ struct netfront_accel_ssr_conn *c;
|
|
+ struct iphdr *iph;
|
|
+ struct tcphdr *th;
|
|
+ unsigned th_seq;
|
|
+
|
|
+ BUG_ON(skb_shinfo(skb)->frag_list != NULL);
|
|
+ BUG_ON(skb->next != NULL);
|
|
+
|
|
+ /* We're not interested if it isn't TCP over IPv4. */
|
|
+ iph = (struct iphdr *) skb->data;
|
|
+ if (skb->protocol != htons(ETH_P_IP) ||
|
|
+ iph->protocol != IPPROTO_TCP) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Ignore segments that fail csum or are fragmented. */
|
|
+ if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) |
|
|
+ (iph->frag_off & htons(IP_MF | IP_OFFSET)))) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ th = (struct tcphdr*)(skb->data + iph->ihl * 4);
|
|
+ data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4;
|
|
+ th_seq = ntohl(th->seq);
|
|
+ dont_merge = (data_length == 0) | th->urg | th->syn | th->rst;
|
|
+
|
|
+ list_for_each_entry(c, &st->conns, link) {
|
|
+ if ((c->saddr - iph->saddr) |
|
|
+ (c->daddr - iph->daddr) |
|
|
+ (c->source - th->source) |
|
|
+ (c->dest - th->dest ))
|
|
+ continue;
|
|
+
|
|
+ /* Re-insert at head of list to reduce lookup time. */
|
|
+ list_del(&c->link);
|
|
+ list_add(&c->link, &st->conns);
|
|
+ c->last_pkt_jiffies = jiffies;
|
|
+
|
|
+ if (unlikely(th_seq - c->next_seq)) {
|
|
+ /* Out-of-order, so start counting again. */
|
|
+ if (c->skb)
|
|
+ netfront_accel_ssr_deliver(vnic, st, c);
|
|
+ c->n_in_order_pkts = 0;
|
|
+ c->next_seq = th_seq + data_length;
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder);
|
|
+ return 0;
|
|
+ }
|
|
+ c->next_seq = th_seq + data_length;
|
|
+
|
|
+ if (++c->n_in_order_pkts < 300) {
|
|
+ /* May be in slow-start, so don't merge. */
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (unlikely(dont_merge)) {
|
|
+ if (c->skb)
|
|
+ netfront_accel_ssr_deliver(vnic, st, c);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (c->skb) {
|
|
+ c->iph->tot_len = ntohs(c->iph->tot_len);
|
|
+ c->iph->tot_len += data_length;
|
|
+ c->iph->tot_len = htons(c->iph->tot_len);
|
|
+ c->th->ack_seq = th->ack_seq;
|
|
+ c->th->fin |= th->fin;
|
|
+ c->th->psh |= th->psh;
|
|
+ c->th->window = th->window;
|
|
+
|
|
+ /* Remove the headers from this skb. */
|
|
+ skb_pull(skb, skb->len - data_length);
|
|
+
|
|
+ /*
|
|
+ * Tack the new skb onto the head skb's frag_list.
|
|
+ * This is exactly the format that fragmented IP
|
|
+ * datagrams are reassembled into.
|
|
+ */
|
|
+ BUG_ON(skb->next != 0);
|
|
+ if ( ! skb_shinfo(c->skb)->frag_list)
|
|
+ skb_shinfo(c->skb)->frag_list = skb;
|
|
+ else
|
|
+ c->skb_tail->next = skb;
|
|
+ c->skb_tail = skb;
|
|
+ c->skb->len += skb->len;
|
|
+ c->skb->data_len += skb->len;
|
|
+ c->skb->truesize += skb->truesize;
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges);
|
|
+
|
|
+ /*
|
|
+ * If the next packet might push this super-packet
|
|
+ * over the limit for an IP packet, deliver it now.
|
|
+ * This is slightly conservative, but close enough.
|
|
+ */
|
|
+ if (c->skb->len +
|
|
+ (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE)
|
|
+ > 16384)
|
|
+ netfront_accel_ssr_deliver(vnic, st, c);
|
|
+
|
|
+ return 1;
|
|
+ }
|
|
+ else {
|
|
+ c->iph = iph;
|
|
+ c->th = th;
|
|
+ c->skb = skb;
|
|
+ return 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* We're not yet tracking this connection. */
|
|
+
|
|
+ if (dont_merge) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ if (list_empty(&st->free_conns)) {
|
|
+ c = list_entry(st->conns.prev,
|
|
+ struct netfront_accel_ssr_conn,
|
|
+ link);
|
|
+ if (c->skb) {
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ else {
|
|
+ c = list_entry(st->free_conns.next,
|
|
+ struct netfront_accel_ssr_conn,
|
|
+ link);
|
|
+ }
|
|
+ list_del(&c->link);
|
|
+ list_add(&c->link, &st->conns);
|
|
+ c->saddr = iph->saddr;
|
|
+ c->daddr = iph->daddr;
|
|
+ c->source = th->source;
|
|
+ c->dest = th->dest;
|
|
+ c->next_seq = th_seq + data_length;
|
|
+ c->n_in_order_pkts = 0;
|
|
+ BUG_ON(c->skb != NULL);
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream);
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_ssr.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,88 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETFRONT_ACCEL_SSR_H
|
|
+#define NETFRONT_ACCEL_SSR_H
|
|
+
|
|
+#include <linux/skbuff.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <linux/list.h>
|
|
+
|
|
+#include "accel.h"
|
|
+
|
|
+/** State for Soft Segment Reassembly (SSR). */
|
|
+
|
|
+struct netfront_accel_ssr_conn {
|
|
+ struct list_head link;
|
|
+
|
|
+ unsigned saddr, daddr;
|
|
+ unsigned short source, dest;
|
|
+
|
|
+ /** Number of in-order packets we've seen with payload. */
|
|
+ unsigned n_in_order_pkts;
|
|
+
|
|
+ /** Next in-order sequence number. */
|
|
+ unsigned next_seq;
|
|
+
|
|
+ /** Time we last saw a packet on this connection. */
|
|
+ unsigned long last_pkt_jiffies;
|
|
+
|
|
+ /** The SKB we are currently holding. If NULL, then all following
|
|
+ * fields are undefined.
|
|
+ */
|
|
+ struct sk_buff *skb;
|
|
+
|
|
+ /** The tail of the frag_list of SKBs we're holding. Only valid
|
|
+ * after at least one merge.
|
|
+ */
|
|
+ struct sk_buff *skb_tail;
|
|
+
|
|
+ /** The IP header of the skb we are holding. */
|
|
+ struct iphdr *iph;
|
|
+
|
|
+ /** The TCP header of the skb we are holding. */
|
|
+ struct tcphdr *th;
|
|
+};
|
|
+
|
|
+extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st);
|
|
+extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st);
|
|
+
|
|
+extern void
|
|
+__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st);
|
|
+
|
|
+extern int netfront_accel_ssr_skb(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st,
|
|
+ struct sk_buff *skb);
|
|
+
|
|
+static inline void
|
|
+netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_ssr_state *st) {
|
|
+ if ( ! list_empty(&st->conns) )
|
|
+ __netfront_accel_ssr_end_of_burst(vnic, st);
|
|
+}
|
|
+
|
|
+#endif /* NETFRONT_ACCEL_SSR_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_tso.c 2008-02-26 10:54:12.000000000 +0100
|
|
@@ -0,0 +1,511 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/pci.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <linux/ip.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/if_ether.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+#include "accel_tso.h"
|
|
+
|
|
+#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2))
|
|
+#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data)
|
|
+#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data)
|
|
+#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data)
|
|
+
|
|
+/*
|
|
+ * Set a maximum number of buffers in each output packet to make life
|
|
+ * a little simpler - if this is reached it will just move on to
|
|
+ * another packet
|
|
+ */
|
|
+#define ACCEL_TSO_MAX_BUFFERS (6)
|
|
+
|
|
+/** TSO State.
|
|
+ *
|
|
+ * The state used during segmentation. It is put into this data structure
|
|
+ * just to make it easy to pass into inline functions.
|
|
+ */
|
|
+struct netfront_accel_tso_state {
|
|
+ /** bytes of data we've yet to segment */
|
|
+ unsigned remaining_len;
|
|
+
|
|
+ /** current sequence number */
|
|
+ unsigned seqnum;
|
|
+
|
|
+ /** remaining space in current packet */
|
|
+ unsigned packet_space;
|
|
+
|
|
+ /** List of packets to be output, containing the buffers and
|
|
+ * iovecs to describe each packet
|
|
+ */
|
|
+ struct netfront_accel_tso_output_packet *output_packets;
|
|
+
|
|
+ /** Total number of buffers in output_packets */
|
|
+ unsigned buffers;
|
|
+
|
|
+ /** Total number of packets in output_packets */
|
|
+ unsigned packets;
|
|
+
|
|
+ /** Input Fragment Cursor.
|
|
+ *
|
|
+ * Where we are in the current fragment of the incoming SKB. These
|
|
+ * values get updated in place when we split a fragment over
|
|
+ * multiple packets.
|
|
+ */
|
|
+ struct {
|
|
+ /** address of current position */
|
|
+ void *addr;
|
|
+ /** remaining length */
|
|
+ unsigned int len;
|
|
+ } ifc; /* == ifc Input Fragment Cursor */
|
|
+
|
|
+ /** Parameters.
|
|
+ *
|
|
+ * These values are set once at the start of the TSO send and do
|
|
+ * not get changed as the routine progresses.
|
|
+ */
|
|
+ struct {
|
|
+ /* the number of bytes of header */
|
|
+ unsigned int header_length;
|
|
+
|
|
+ /* The number of bytes to put in each outgoing segment. */
|
|
+ int full_packet_size;
|
|
+
|
|
+ /* Current IP ID, host endian. */
|
|
+ unsigned ip_id;
|
|
+
|
|
+ /* Max size of each output packet payload */
|
|
+ int gso_size;
|
|
+ } p;
|
|
+};
|
|
+
|
|
+
|
|
+/**
|
|
+ * Verify that our various assumptions about sk_buffs and the conditions
|
|
+ * under which TSO will be attempted hold true.
|
|
+ *
|
|
+ * @v skb The sk_buff to check.
|
|
+ */
|
|
+static inline void tso_check_safe(struct sk_buff *skb) {
|
|
+ EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
|
|
+ EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
|
|
+ EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
|
|
+ EPRINTK_ON((SKB_TCP_OFF(skb)
|
|
+ + (skb->h.th->doff << 2u)) > skb_headlen(skb));
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+/** Parse the SKB header and initialise state. */
|
|
+static inline void tso_start(struct netfront_accel_tso_state *st,
|
|
+ struct sk_buff *skb) {
|
|
+
|
|
+ /*
|
|
+ * All ethernet/IP/TCP headers combined size is TCP header size
|
|
+ * plus offset of TCP header relative to start of packet.
|
|
+ */
|
|
+ st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
|
|
+ st->p.full_packet_size = (st->p.header_length
|
|
+ + skb_shinfo(skb)->gso_size);
|
|
+ st->p.gso_size = skb_shinfo(skb)->gso_size;
|
|
+
|
|
+ st->p.ip_id = htons(skb->nh.iph->id);
|
|
+ st->seqnum = ntohl(skb->h.th->seq);
|
|
+
|
|
+ EPRINTK_ON(skb->h.th->urg);
|
|
+ EPRINTK_ON(skb->h.th->syn);
|
|
+ EPRINTK_ON(skb->h.th->rst);
|
|
+
|
|
+ st->remaining_len = skb->len - st->p.header_length;
|
|
+
|
|
+ st->output_packets = NULL;
|
|
+ st->buffers = 0;
|
|
+ st->packets = 0;
|
|
+
|
|
+ VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n",
|
|
+ st->p.header_length, st->p.full_packet_size, st->p.gso_size,
|
|
+ st->seqnum, skb->len);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * Add another NIC mapped buffer onto an output packet
|
|
+ */
|
|
+static inline int tso_start_new_buffer(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_tso_state *st,
|
|
+ int first)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct netfront_accel_pkt_desc *buf;
|
|
+
|
|
+ /* Get a mapped packet buffer */
|
|
+ buf = netfront_accel_buf_get(vnic->tx_bufs);
|
|
+ if (buf == NULL) {
|
|
+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* Store a bit of meta-data at the end */
|
|
+ tso_buf =(struct netfront_accel_tso_buffer *)
|
|
+ (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH
|
|
+ + sizeof(struct netfront_accel_tso_output_packet));
|
|
+
|
|
+ tso_buf->buf = buf;
|
|
+
|
|
+ tso_buf->length = 0;
|
|
+
|
|
+ if (first) {
|
|
+ struct netfront_accel_tso_output_packet *output_packet
|
|
+ = (struct netfront_accel_tso_output_packet *)
|
|
+ (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH);
|
|
+ output_packet->next = st->output_packets;
|
|
+ st->output_packets = output_packet;
|
|
+ tso_buf->next = NULL;
|
|
+ st->output_packets->tso_bufs = tso_buf;
|
|
+ st->output_packets->tso_bufs_len = 1;
|
|
+ } else {
|
|
+ tso_buf->next = st->output_packets->tso_bufs;
|
|
+ st->output_packets->tso_bufs = tso_buf;
|
|
+ st->output_packets->tso_bufs_len ++;
|
|
+ }
|
|
+
|
|
+ BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
|
|
+
|
|
+ st->buffers ++;
|
|
+
|
|
+ /*
|
|
+ * Store the context, set to NULL, last packet buffer will get
|
|
+ * non-NULL later
|
|
+ */
|
|
+ tso_buf->buf->skb = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+/* Generate a new header, and prepare for the new packet.
|
|
+ *
|
|
+ * @v vnic VNIC
|
|
+ * @v skb Socket buffer
|
|
+ * @v st TSO state
|
|
+ * @ret rc 0 on success, or -1 if failed to alloc header
|
|
+ */
|
|
+
|
|
+static inline
|
|
+int tso_start_new_packet(netfront_accel_vnic *vnic,
|
|
+ struct sk_buff *skb,
|
|
+ struct netfront_accel_tso_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct iphdr *tsoh_iph;
|
|
+ struct tcphdr *tsoh_th;
|
|
+ unsigned ip_length;
|
|
+
|
|
+ if (tso_start_new_buffer(vnic, st, 1) < 0) {
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* This has been set up by tso_start_new_buffer() */
|
|
+ tso_buf = st->output_packets->tso_bufs;
|
|
+
|
|
+ /* Copy in the header */
|
|
+ memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length);
|
|
+ tso_buf->length = st->p.header_length;
|
|
+
|
|
+ tsoh_th = (struct tcphdr*)
|
|
+ (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb));
|
|
+ tsoh_iph = (struct iphdr*)
|
|
+ (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb));
|
|
+
|
|
+ /* Set to zero to encourage falcon to fill these in */
|
|
+ tsoh_th->check = 0;
|
|
+ tsoh_iph->check = 0;
|
|
+
|
|
+ tsoh_th->seq = htonl(st->seqnum);
|
|
+ st->seqnum += st->p.gso_size;
|
|
+
|
|
+ if (st->remaining_len > st->p.gso_size) {
|
|
+ /* This packet will not finish the TSO burst. */
|
|
+ ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
|
|
+ tsoh_th->fin = 0;
|
|
+ tsoh_th->psh = 0;
|
|
+ } else {
|
|
+ /* This packet will be the last in the TSO burst. */
|
|
+ ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
|
|
+ + st->remaining_len);
|
|
+ tsoh_th->fin = skb->h.th->fin;
|
|
+ tsoh_th->psh = skb->h.th->psh;
|
|
+ }
|
|
+
|
|
+ tsoh_iph->tot_len = htons(ip_length);
|
|
+
|
|
+ /* Linux leaves suitable gaps in the IP ID space for us to fill. */
|
|
+ tsoh_iph->id = st->p.ip_id++;
|
|
+ tsoh_iph->id = htons(tsoh_iph->id);
|
|
+
|
|
+ st->packet_space = st->p.gso_size;
|
|
+
|
|
+ st->packets++;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+static inline void tso_get_fragment(struct netfront_accel_tso_state *st,
|
|
+ int len, void *addr)
|
|
+{
|
|
+ st->ifc.len = len;
|
|
+ st->ifc.addr = addr;
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static inline void tso_unwind(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_tso_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct netfront_accel_tso_output_packet *output_packet;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ while (st->output_packets != NULL) {
|
|
+ output_packet = st->output_packets;
|
|
+ st->output_packets = output_packet->next;
|
|
+ while (output_packet->tso_bufs != NULL) {
|
|
+ tso_buf = output_packet->tso_bufs;
|
|
+ output_packet->tso_bufs = tso_buf->next;
|
|
+
|
|
+ st->buffers --;
|
|
+ output_packet->tso_bufs_len --;
|
|
+
|
|
+ netfront_accel_buf_put(vnic->tx_bufs,
|
|
+ tso_buf->buf->buf_id);
|
|
+ }
|
|
+ }
|
|
+ BUG_ON(st->buffers != 0);
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+static inline
|
|
+void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_tso_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ int n, space;
|
|
+
|
|
+ BUG_ON(st->output_packets == NULL);
|
|
+ BUG_ON(st->output_packets->tso_bufs == NULL);
|
|
+
|
|
+ tso_buf = st->output_packets->tso_bufs;
|
|
+
|
|
+ if (st->ifc.len == 0) return;
|
|
+ if (st->packet_space == 0) return;
|
|
+ if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return;
|
|
+
|
|
+ n = min(st->ifc.len, st->packet_space);
|
|
+
|
|
+ space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length;
|
|
+ n = min(n, space);
|
|
+
|
|
+ st->packet_space -= n;
|
|
+ st->remaining_len -= n;
|
|
+ st->ifc.len -= n;
|
|
+
|
|
+ memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
|
|
+
|
|
+ tso_buf->length += n;
|
|
+
|
|
+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH);
|
|
+
|
|
+ st->ifc.addr += n;
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ struct netfront_accel_tso_state state;
|
|
+ struct netfront_accel_tso_buffer *tso_buf = NULL;
|
|
+ struct netfront_accel_tso_output_packet *reversed_list = NULL;
|
|
+ struct netfront_accel_tso_output_packet *tmp_pkt;
|
|
+ ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS];
|
|
+ int frag_i, rc, dma_id;
|
|
+ skb_frag_t *f;
|
|
+
|
|
+ tso_check_safe(skb);
|
|
+
|
|
+ if (skb->ip_summed != CHECKSUM_HW)
|
|
+ EPRINTK("Trying to TSO send a packet without HW checksum\n");
|
|
+
|
|
+ tso_start(&state, skb);
|
|
+
|
|
+ /*
|
|
+ * Setup the first payload fragment. If the skb header area
|
|
+ * contains exactly the headers and all payload is in the frag
|
|
+ * list things are little simpler
|
|
+ */
|
|
+ if (skb_headlen(skb) == state.p.header_length) {
|
|
+ /* Grab the first payload fragment. */
|
|
+ BUG_ON(skb_shinfo(skb)->nr_frags < 1);
|
|
+ frag_i = 0;
|
|
+ f = &skb_shinfo(skb)->frags[frag_i];
|
|
+ tso_get_fragment(&state, f->size,
|
|
+ page_address(f->page) + f->page_offset);
|
|
+ } else {
|
|
+ int hl = state.p.header_length;
|
|
+ tso_get_fragment(&state, skb_headlen(skb) - hl,
|
|
+ skb->data + hl);
|
|
+ frag_i = -1;
|
|
+ }
|
|
+
|
|
+ if (tso_start_new_packet(vnic, skb, &state) < 0) {
|
|
+ DPRINTK("%s: out of first start-packet memory\n",
|
|
+ __FUNCTION__);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ while (1) {
|
|
+ tso_fill_packet_with_fragment(vnic, &state);
|
|
+
|
|
+ /* Move onto the next fragment? */
|
|
+ if (state.ifc.len == 0) {
|
|
+ if (++frag_i >= skb_shinfo(skb)->nr_frags)
|
|
+ /* End of payload reached. */
|
|
+ break;
|
|
+ f = &skb_shinfo(skb)->frags[frag_i];
|
|
+ tso_get_fragment(&state, f->size,
|
|
+ page_address(f->page) +
|
|
+ f->page_offset);
|
|
+ }
|
|
+
|
|
+ /* Start a new buffer? */
|
|
+ if ((state.output_packets->tso_bufs->length ==
|
|
+ NETFRONT_ACCEL_TSO_BUF_LENGTH) &&
|
|
+ tso_start_new_buffer(vnic, &state, 0)) {
|
|
+ DPRINTK("%s: out of start-buffer memory\n",
|
|
+ __FUNCTION__);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ /* Start at new packet? */
|
|
+ if ((state.packet_space == 0 ||
|
|
+ ((state.output_packets->tso_bufs_len >=
|
|
+ ACCEL_TSO_MAX_BUFFERS) &&
|
|
+ (state.output_packets->tso_bufs->length >=
|
|
+ NETFRONT_ACCEL_TSO_BUF_LENGTH))) &&
|
|
+ tso_start_new_packet(vnic, skb, &state) < 0) {
|
|
+ DPRINTK("%s: out of start-packet memory\n",
|
|
+ __FUNCTION__);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ /* Check for space */
|
|
+ if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
|
|
+ DPRINTK("%s: Not enough TX space (%d)\n",
|
|
+ __FUNCTION__, state.buffers);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Store the skb context in the most recent buffer (i.e. the
|
|
+ * last buffer that will be sent)
|
|
+ */
|
|
+ state.output_packets->tso_bufs->buf->skb = skb;
|
|
+
|
|
+ /* Reverse the list of packets as we construct it on a stack */
|
|
+ while (state.output_packets != NULL) {
|
|
+ tmp_pkt = state.output_packets;
|
|
+ state.output_packets = tmp_pkt->next;
|
|
+ tmp_pkt->next = reversed_list;
|
|
+ reversed_list = tmp_pkt;
|
|
+ }
|
|
+
|
|
+ /* Pass off to hardware */
|
|
+ while (reversed_list != NULL) {
|
|
+ tmp_pkt = reversed_list;
|
|
+ reversed_list = tmp_pkt->next;
|
|
+
|
|
+ BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS);
|
|
+ BUG_ON(tmp_pkt->tso_bufs_len == 0);
|
|
+
|
|
+ dma_id = tmp_pkt->tso_bufs->buf->buf_id;
|
|
+
|
|
+ /*
|
|
+ * Make an iovec of the buffers in the list, reversing
|
|
+ * the buffers as we go as they are constructed on a
|
|
+ * stack
|
|
+ */
|
|
+ tso_buf = tmp_pkt->tso_bufs;
|
|
+ for (frag_i = tmp_pkt->tso_bufs_len - 1;
|
|
+ frag_i >= 0;
|
|
+ frag_i--) {
|
|
+ iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
|
|
+ iovecs[frag_i].iov_len = tso_buf->length;
|
|
+ tso_buf = tso_buf->next;
|
|
+ }
|
|
+
|
|
+ rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len,
|
|
+ dma_id);
|
|
+ /*
|
|
+ * We checked for space already, so it really should
|
|
+ * succeed
|
|
+ */
|
|
+ BUG_ON(rc != 0);
|
|
+ }
|
|
+
|
|
+ /* Track number of tx fastpath stats */
|
|
+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
|
|
+ vnic->netdev_stats.fastpath_tx_pkts += state.packets;
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ {
|
|
+ unsigned n;
|
|
+ n = vnic->netdev_stats.fastpath_tx_pkts -
|
|
+ vnic->stats.fastpath_tx_completions;
|
|
+ if (n > vnic->stats.fastpath_tx_pending_max)
|
|
+ vnic->stats.fastpath_tx_pending_max = n;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ return NETFRONT_ACCEL_STATUS_GOOD;
|
|
+
|
|
+ unwind:
|
|
+ tso_unwind(vnic, &state);
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
|
|
+
|
|
+ return NETFRONT_ACCEL_STATUS_BUSY;
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_tso.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,57 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETFRONT_ACCEL_TSO_H
|
|
+#define NETFRONT_ACCEL_TSO_H
|
|
+
|
|
+#include "accel_bufs.h"
|
|
+
|
|
+/* Track the buffers used in each output packet */
|
|
+struct netfront_accel_tso_buffer {
|
|
+ struct netfront_accel_tso_buffer *next;
|
|
+ struct netfront_accel_pkt_desc *buf;
|
|
+ unsigned length;
|
|
+};
|
|
+
|
|
+/* Track the output packets formed from each input packet */
|
|
+struct netfront_accel_tso_output_packet {
|
|
+ struct netfront_accel_tso_output_packet *next;
|
|
+ struct netfront_accel_tso_buffer *tso_bufs;
|
|
+ unsigned tso_bufs_len;
|
|
+};
|
|
+
|
|
+
|
|
+/*
|
|
+ * Max available space in a buffer for data once meta-data has taken
|
|
+ * its place
|
|
+ */
|
|
+#define NETFRONT_ACCEL_TSO_BUF_LENGTH \
|
|
+ ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
|
|
+ - sizeof(struct netfront_accel_tso_buffer) \
|
|
+ - sizeof(struct netfront_accel_tso_output_packet))
|
|
+
|
|
+int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic,
|
|
+ struct sk_buff *skb);
|
|
+
|
|
+#endif /* NETFRONT_ACCEL_TSO_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_vi.c 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,1202 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/if_ether.h>
|
|
+#include <linux/ip.h>
|
|
+#include <net/checksum.h>
|
|
+#include <asm/io.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_bufs.h"
|
|
+#include "accel_tso.h"
|
|
+#include "accel_ssr.h"
|
|
+#include "netfront.h"
|
|
+
|
|
+#include "etherfabric/ef_vi.h"
|
|
+
|
|
+/*
|
|
+ * Max available space in a buffer for data once meta-data has taken
|
|
+ * its place
|
|
+ */
|
|
+#define NETFRONT_ACCEL_TX_BUF_LENGTH \
|
|
+ ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \
|
|
+ - sizeof(struct netfront_accel_tso_buffer))
|
|
+
|
|
+#define ACCEL_TX_MAX_BUFFERS (6)
|
|
+#define ACCEL_VI_POLL_EVENTS (8)
|
|
+
|
|
+static
|
|
+int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic,
|
|
+ struct net_accel_msg_hw *hw_msg)
|
|
+{
|
|
+ struct ef_vi_nic_type nic_type;
|
|
+ struct net_accel_hw_falcon_b *hw_info;
|
|
+ void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva;
|
|
+ u32 *evq_gnts;
|
|
+ u32 evq_order;
|
|
+ int vi_state_size;
|
|
+ u8 vi_data[VI_MAPPINGS_SIZE];
|
|
+
|
|
+ if (hw_msg == NULL)
|
|
+ goto fini;
|
|
+
|
|
+ /* And create the local macs table lock */
|
|
+ spin_lock_init(&vnic->table_lock);
|
|
+
|
|
+ /* Create fastpath table, initial size 8, key length 8 */
|
|
+ if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) {
|
|
+ EPRINTK("failed to allocate fastpath table\n");
|
|
+ goto fail_cuckoo;
|
|
+ }
|
|
+
|
|
+ vnic->hw.falcon.type = hw_msg->type;
|
|
+
|
|
+ switch (hw_msg->type) {
|
|
+ case NET_ACCEL_MSG_HWTYPE_FALCON_A:
|
|
+ hw_info = &hw_msg->resources.falcon_a.common;
|
|
+ /* Need the extra rptr register page on A1 */
|
|
+ io_kva = net_accel_map_iomem_page
|
|
+ (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt,
|
|
+ &vnic->hw.falcon.evq_rptr_mapping);
|
|
+ if (io_kva == NULL) {
|
|
+ EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__);
|
|
+ goto evq_rptr_fail;
|
|
+ }
|
|
+
|
|
+ vnic->hw.falcon.evq_rptr = io_kva +
|
|
+ (hw_info->evq_rptr & (PAGE_SIZE - 1));
|
|
+ break;
|
|
+ case NET_ACCEL_MSG_HWTYPE_FALCON_B:
|
|
+ case NET_ACCEL_MSG_HWTYPE_SIENA_A:
|
|
+ hw_info = &hw_msg->resources.falcon_b;
|
|
+ break;
|
|
+ default:
|
|
+ goto bad_type;
|
|
+ }
|
|
+
|
|
+ /**** Event Queue ****/
|
|
+
|
|
+ /* Map the event queue pages */
|
|
+ evq_gnts = hw_info->evq_mem_gnts;
|
|
+ evq_order = hw_info->evq_order;
|
|
+
|
|
+ EPRINTK_ON(hw_info->evq_offs != 0);
|
|
+
|
|
+ DPRINTK("Will map evq %d pages\n", 1 << evq_order);
|
|
+
|
|
+ evq_base =
|
|
+ net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order,
|
|
+ &vnic->evq_mapping);
|
|
+ if (evq_base == NULL) {
|
|
+ EPRINTK("%s: evq_base failed\n", __FUNCTION__);
|
|
+ goto evq_fail;
|
|
+ }
|
|
+
|
|
+ /**** Doorbells ****/
|
|
+ /* Set up the doorbell mappings. */
|
|
+ doorbell_kva =
|
|
+ net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt,
|
|
+ &vnic->hw.falcon.doorbell_mapping);
|
|
+ if (doorbell_kva == NULL) {
|
|
+ EPRINTK("%s: doorbell permission failed\n", __FUNCTION__);
|
|
+ goto doorbell_fail;
|
|
+ }
|
|
+ vnic->hw.falcon.doorbell = doorbell_kva;
|
|
+
|
|
+ /* On Falcon_B and Siena we get the rptr from the doorbell page */
|
|
+ if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B ||
|
|
+ hw_msg->type == NET_ACCEL_MSG_HWTYPE_SIENA_A) {
|
|
+ vnic->hw.falcon.evq_rptr =
|
|
+ (u32 *)((char *)vnic->hw.falcon.doorbell
|
|
+ + hw_info->evq_rptr);
|
|
+ }
|
|
+
|
|
+ /**** DMA Queue ****/
|
|
+
|
|
+ /* Set up the DMA Queues from the message. */
|
|
+ tx_dma_kva = net_accel_map_grants_contig
|
|
+ (vnic->dev, &(hw_info->txdmaq_gnt), 1,
|
|
+ &vnic->hw.falcon.txdmaq_mapping);
|
|
+ if (tx_dma_kva == NULL) {
|
|
+ EPRINTK("%s: TX dma failed\n", __FUNCTION__);
|
|
+ goto tx_dma_fail;
|
|
+ }
|
|
+
|
|
+ rx_dma_kva = net_accel_map_grants_contig
|
|
+ (vnic->dev, &(hw_info->rxdmaq_gnt), 1,
|
|
+ &vnic->hw.falcon.rxdmaq_mapping);
|
|
+ if (rx_dma_kva == NULL) {
|
|
+ EPRINTK("%s: RX dma failed\n", __FUNCTION__);
|
|
+ goto rx_dma_fail;
|
|
+ }
|
|
+
|
|
+ /* Full confession */
|
|
+ DPRINTK("Mapped H/W"
|
|
+ " Tx DMAQ grant %x -> %p\n"
|
|
+ " Rx DMAQ grant %x -> %p\n"
|
|
+ " EVQ grant %x -> %p\n",
|
|
+ hw_info->txdmaq_gnt, tx_dma_kva,
|
|
+ hw_info->rxdmaq_gnt, rx_dma_kva,
|
|
+ evq_gnts[0], evq_base
|
|
+ );
|
|
+
|
|
+ memset(vi_data, 0, sizeof(vi_data));
|
|
+
|
|
+ /* TODO BUG11305: convert efhw_arch to ef_vi_arch
|
|
+ * e.g.
|
|
+ * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch);
|
|
+ * assert(arch >= 0);
|
|
+ * nic_type.arch = arch;
|
|
+ */
|
|
+ nic_type.arch = (unsigned char)hw_info->nic_arch;
|
|
+ nic_type.variant = (char)hw_info->nic_variant;
|
|
+ nic_type.revision = (unsigned char)hw_info->nic_revision;
|
|
+
|
|
+ ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance,
|
|
+ 1 << (evq_order + PAGE_SHIFT), evq_base,
|
|
+ (void *)0xdeadbeef);
|
|
+
|
|
+ ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity,
|
|
+ hw_info->tx_capacity, hw_info->instance,
|
|
+ doorbell_kva, rx_dma_kva, tx_dma_kva, 0);
|
|
+
|
|
+ vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity,
|
|
+ hw_info->tx_capacity);
|
|
+ vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL);
|
|
+ if (vnic->vi_state == NULL) {
|
|
+ EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__);
|
|
+ goto vi_state_fail;
|
|
+ }
|
|
+ ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0);
|
|
+
|
|
+ ef_eventq_state_init(&vnic->vi);
|
|
+
|
|
+ ef_vi_state_init(&vnic->vi);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fini:
|
|
+ kfree(vnic->vi_state);
|
|
+ vnic->vi_state = NULL;
|
|
+vi_state_fail:
|
|
+ net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping);
|
|
+rx_dma_fail:
|
|
+ net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping);
|
|
+tx_dma_fail:
|
|
+ net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping);
|
|
+ vnic->hw.falcon.doorbell = NULL;
|
|
+doorbell_fail:
|
|
+ net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping);
|
|
+evq_fail:
|
|
+ if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A)
|
|
+ net_accel_unmap_iomem_page(vnic->dev,
|
|
+ vnic->hw.falcon.evq_rptr_mapping);
|
|
+ vnic->hw.falcon.evq_rptr = NULL;
|
|
+evq_rptr_fail:
|
|
+bad_type:
|
|
+ cuckoo_hash_destroy(&vnic->fastpath_table);
|
|
+fail_cuckoo:
|
|
+ return -EIO;
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_vi_ctor(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ /* Just mark the VI as uninitialised. */
|
|
+ vnic->vi_state = NULL;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg)
|
|
+{
|
|
+ BUG_ON(hw_msg == NULL);
|
|
+ return netfront_accel_vi_init_fini(vnic, hw_msg);
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_vi_dtor(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ if (vnic->vi_state != NULL)
|
|
+ netfront_accel_vi_init_fini(vnic, NULL);
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id,
|
|
+ netfront_accel_pkt_desc *buf)
|
|
+{
|
|
+
|
|
+ int idx = vnic->rx_dma_batched;
|
|
+
|
|
+#if 0
|
|
+ VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n",
|
|
+ id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi));
|
|
+#endif
|
|
+ /* Set up a virtual buffer descriptor */
|
|
+ ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id,
|
|
+ /*rx_bytes=max*/0);
|
|
+
|
|
+ idx++;
|
|
+
|
|
+ vnic->rx_dma_level++;
|
|
+
|
|
+ /*
|
|
+ * Only push the descriptor to the card if we've reached the
|
|
+ * batch size. Otherwise, the descriptors can sit around for
|
|
+ * a while. There will be plenty available.
|
|
+ */
|
|
+ if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH ||
|
|
+ vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) {
|
|
+#if 0
|
|
+ VPRINTK("Flushing %d rx descriptors.\n", idx);
|
|
+#endif
|
|
+
|
|
+ /* Push buffer to hardware */
|
|
+ ef_vi_receive_push(&vnic->vi);
|
|
+
|
|
+ idx = 0;
|
|
+ }
|
|
+
|
|
+ vnic->rx_dma_batched = idx;
|
|
+}
|
|
+
|
|
+
|
|
+inline
|
|
+void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id,
|
|
+ netfront_accel_pkt_desc *buf)
|
|
+{
|
|
+
|
|
+ VPRINTK("%s: %d\n", __FUNCTION__, id);
|
|
+
|
|
+ if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) {
|
|
+ VPRINTK("RX space is full\n");
|
|
+ netfront_accel_buf_put(vnic->rx_bufs, id);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ VPRINTK("Completed buffer %d is reposted\n", id);
|
|
+ netfront_accel_vi_post_rx(vnic, id, buf);
|
|
+
|
|
+ /*
|
|
+ * Let's see if there's any more to be pushed out to the NIC
|
|
+ * while we're here
|
|
+ */
|
|
+ while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
|
|
+ /* Try to allocate a buffer. */
|
|
+ buf = netfront_accel_buf_get(vnic->rx_bufs);
|
|
+ if (buf == NULL)
|
|
+ break;
|
|
+
|
|
+ /* Add it to the rx dma queue. */
|
|
+ netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx)
|
|
+{
|
|
+
|
|
+ while (is_rx &&
|
|
+ ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) {
|
|
+ netfront_accel_pkt_desc *buf;
|
|
+
|
|
+ VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level);
|
|
+
|
|
+ /* Try to allocate a buffer. */
|
|
+ buf = netfront_accel_buf_get(vnic->rx_bufs);
|
|
+
|
|
+ if (buf == NULL)
|
|
+ break;
|
|
+
|
|
+ /* Add it to the rx dma queue. */
|
|
+ netfront_accel_vi_post_rx(vnic, buf->buf_id, buf);
|
|
+ }
|
|
+
|
|
+ VPRINTK("%s: done\n", __FUNCTION__);
|
|
+}
|
|
+
|
|
+
|
|
+struct netfront_accel_multi_state {
|
|
+ unsigned remaining_len;
|
|
+
|
|
+ unsigned buffers;
|
|
+
|
|
+ struct netfront_accel_tso_buffer *output_buffers;
|
|
+
|
|
+ /* Where we are in the current fragment of the SKB. */
|
|
+ struct {
|
|
+ /* address of current position */
|
|
+ void *addr;
|
|
+ /* remaining length */
|
|
+ unsigned int len;
|
|
+ } ifc; /* == Input Fragment Cursor */
|
|
+};
|
|
+
|
|
+
|
|
+static inline void multi_post_start(struct netfront_accel_multi_state *st,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ st->remaining_len = skb->len;
|
|
+ st->output_buffers = NULL;
|
|
+ st->buffers = 0;
|
|
+ st->ifc.len = skb_headlen(skb);
|
|
+ st->ifc.addr = skb->data;
|
|
+}
|
|
+
|
|
+static int multi_post_start_new_buffer(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_multi_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct netfront_accel_pkt_desc *buf;
|
|
+
|
|
+ /* Get a mapped packet buffer */
|
|
+ buf = netfront_accel_buf_get(vnic->tx_bufs);
|
|
+ if (buf == NULL) {
|
|
+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* Store a bit of meta-data at the end */
|
|
+ tso_buf = (struct netfront_accel_tso_buffer *)
|
|
+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
|
|
+
|
|
+ tso_buf->buf = buf;
|
|
+
|
|
+ tso_buf->length = 0;
|
|
+
|
|
+ tso_buf->next = st->output_buffers;
|
|
+ st->output_buffers = tso_buf;
|
|
+ st->buffers++;
|
|
+
|
|
+ BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS);
|
|
+
|
|
+ /*
|
|
+ * Store the context, set to NULL, last packet buffer will get
|
|
+ * non-NULL later
|
|
+ */
|
|
+ tso_buf->buf->skb = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void
|
|
+multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_multi_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ unsigned n, space;
|
|
+
|
|
+ BUG_ON(st->output_buffers == NULL);
|
|
+ tso_buf = st->output_buffers;
|
|
+
|
|
+ if (st->ifc.len == 0) return;
|
|
+ if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return;
|
|
+
|
|
+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
|
|
+
|
|
+ space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length;
|
|
+ n = min(st->ifc.len, space);
|
|
+
|
|
+ memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n);
|
|
+
|
|
+ st->remaining_len -= n;
|
|
+ st->ifc.len -= n;
|
|
+ tso_buf->length += n;
|
|
+ st->ifc.addr += n;
|
|
+
|
|
+ BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static inline void multi_post_unwind(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_multi_state *st)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ while (st->output_buffers != NULL) {
|
|
+ tso_buf = st->output_buffers;
|
|
+ st->output_buffers = tso_buf->next;
|
|
+ st->buffers--;
|
|
+ netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
|
|
+ }
|
|
+ BUG_ON(st->buffers != 0);
|
|
+}
|
|
+
|
|
+
|
|
+static enum netfront_accel_post_status
|
|
+netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct netfront_accel_multi_state state;
|
|
+ ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS];
|
|
+ skb_frag_t *f;
|
|
+ int frag_i, rc, dma_id;
|
|
+
|
|
+ multi_post_start(&state, skb);
|
|
+
|
|
+ frag_i = -1;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ /* Set to zero to encourage falcon to work it out for us */
|
|
+ *(u16*)(skb->h.raw + skb->csum) = 0;
|
|
+ }
|
|
+
|
|
+ if (multi_post_start_new_buffer(vnic, &state)) {
|
|
+ DPRINTK("%s: out of buffers\n", __FUNCTION__);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ while (1) {
|
|
+ multi_post_fill_buffer_with_fragment(vnic, &state);
|
|
+
|
|
+ /* Move onto the next fragment? */
|
|
+ if (state.ifc.len == 0) {
|
|
+ if (++frag_i >= skb_shinfo(skb)->nr_frags)
|
|
+ /* End of payload reached. */
|
|
+ break;
|
|
+ f = &skb_shinfo(skb)->frags[frag_i];
|
|
+ state.ifc.len = f->size;
|
|
+ state.ifc.addr = page_address(f->page) + f->page_offset;
|
|
+ }
|
|
+
|
|
+ /* Start a new buffer? */
|
|
+ if ((state.output_buffers->length ==
|
|
+ NETFRONT_ACCEL_TX_BUF_LENGTH) &&
|
|
+ multi_post_start_new_buffer(vnic, &state)) {
|
|
+ DPRINTK("%s: out of buffers\n", __FUNCTION__);
|
|
+ goto unwind;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Check for space */
|
|
+ if (ef_vi_transmit_space(&vnic->vi) < state.buffers) {
|
|
+ DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers);
|
|
+ goto unwind;
|
|
+ }
|
|
+
|
|
+ /* Store the skb in what will be the last buffer's context */
|
|
+ state.output_buffers->buf->skb = skb;
|
|
+ /* Remember dma_id of what will be the last buffer */
|
|
+ dma_id = state.output_buffers->buf->buf_id;
|
|
+
|
|
+ /*
|
|
+ * Make an iovec of the buffers in the list, reversing the
|
|
+ * buffers as we go as they are constructed on a stack
|
|
+ */
|
|
+ tso_buf = state.output_buffers;
|
|
+ for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) {
|
|
+ iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr;
|
|
+ iovecs[frag_i].iov_len = tso_buf->length;
|
|
+ tso_buf = tso_buf->next;
|
|
+ }
|
|
+
|
|
+ rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id);
|
|
+
|
|
+ /* Track number of tx fastpath stats */
|
|
+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
|
|
+ vnic->netdev_stats.fastpath_tx_pkts ++;
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ {
|
|
+ u32 n;
|
|
+ n = vnic->netdev_stats.fastpath_tx_pkts -
|
|
+ (u32)vnic->stats.fastpath_tx_completions;
|
|
+ if (n > vnic->stats.fastpath_tx_pending_max)
|
|
+ vnic->stats.fastpath_tx_pending_max = n;
|
|
+ }
|
|
+#endif
|
|
+ return NETFRONT_ACCEL_STATUS_GOOD;
|
|
+
|
|
+unwind:
|
|
+ multi_post_unwind(vnic, &state);
|
|
+
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
|
|
+
|
|
+ return NETFRONT_ACCEL_STATUS_BUSY;
|
|
+}
|
|
+
|
|
+
|
|
+static enum netfront_accel_post_status
|
|
+netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ struct netfront_accel_pkt_desc *buf;
|
|
+ u8 *kva;
|
|
+ int rc;
|
|
+
|
|
+ if (ef_vi_transmit_space(&vnic->vi) < 1) {
|
|
+ DPRINTK("%s: No TX space\n", __FUNCTION__);
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
|
|
+ return NETFRONT_ACCEL_STATUS_BUSY;
|
|
+ }
|
|
+
|
|
+ buf = netfront_accel_buf_get(vnic->tx_bufs);
|
|
+ if (buf == NULL) {
|
|
+ DPRINTK("%s: No buffer for TX\n", __FUNCTION__);
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++);
|
|
+ return NETFRONT_ACCEL_STATUS_BUSY;
|
|
+ }
|
|
+
|
|
+ /* Track number of tx fastpath stats */
|
|
+ vnic->netdev_stats.fastpath_tx_pkts++;
|
|
+ vnic->netdev_stats.fastpath_tx_bytes += skb->len;
|
|
+
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ {
|
|
+ u32 n;
|
|
+ n = vnic->netdev_stats.fastpath_tx_pkts -
|
|
+ (u32)vnic->stats.fastpath_tx_completions;
|
|
+ if (n > vnic->stats.fastpath_tx_pending_max)
|
|
+ vnic->stats.fastpath_tx_pending_max = n;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /* Store the context */
|
|
+ buf->skb = skb;
|
|
+
|
|
+ kva = buf->pkt_kva;
|
|
+
|
|
+ if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ /* Set to zero to encourage falcon to work it out for us */
|
|
+ *(u16*)(skb->h.raw + skb->csum) = 0;
|
|
+ }
|
|
+ NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
|
|
+ (skb, idx, frag_data, frag_len, {
|
|
+ /* Copy in payload */
|
|
+ VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva);
|
|
+ memcpy(kva, frag_data, frag_len);
|
|
+ kva += frag_len;
|
|
+ });
|
|
+
|
|
+ VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__,
|
|
+ buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr);
|
|
+
|
|
+
|
|
+ /* Set up the TSO meta-data for a single buffer/packet */
|
|
+ tso_buf = (struct netfront_accel_tso_buffer *)
|
|
+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
|
|
+ tso_buf->next = NULL;
|
|
+ tso_buf->buf = buf;
|
|
+ tso_buf->length = skb->len;
|
|
+
|
|
+ rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len,
|
|
+ buf->buf_id);
|
|
+ /* We checked for space already, so it really should succeed */
|
|
+ BUG_ON(rc != 0);
|
|
+
|
|
+ return NETFRONT_ACCEL_STATUS_GOOD;
|
|
+}
|
|
+
|
|
+
|
|
+enum netfront_accel_post_status
|
|
+netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb)
|
|
+{
|
|
+ struct ethhdr *pkt_eth_hdr;
|
|
+ struct iphdr *pkt_ipv4_hdr;
|
|
+ int value, try_fastpath;
|
|
+
|
|
+ /*
|
|
+ * This assumes that the data field points to the dest mac
|
|
+ * address.
|
|
+ */
|
|
+ cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data);
|
|
+
|
|
+ /*
|
|
+ * NB very important that all things that could return "CANT"
|
|
+ * are tested before things that return "BUSY" as if it it
|
|
+ * returns "BUSY" it is assumed that it won't return "CANT"
|
|
+ * next time it is tried
|
|
+ */
|
|
+
|
|
+ /*
|
|
+ * Do a fastpath send if fast path table lookup returns true.
|
|
+ * We do this without the table lock and so may get the wrong
|
|
+ * answer, but current opinion is that's not a big problem
|
|
+ */
|
|
+ try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table,
|
|
+ (cuckoo_hash_key *)(&key), &value);
|
|
+
|
|
+ if (!try_fastpath) {
|
|
+ VPRINTK("try fast path false for mac: " MAC_FMT "\n",
|
|
+ MAC_ARG(skb->data));
|
|
+
|
|
+ return NETFRONT_ACCEL_STATUS_CANT;
|
|
+ }
|
|
+
|
|
+ /* Check to see if the packet can be sent. */
|
|
+ if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) {
|
|
+ EPRINTK("%s: Packet header is too small\n", __FUNCTION__);
|
|
+ return NETFRONT_ACCEL_STATUS_CANT;
|
|
+ }
|
|
+
|
|
+ pkt_eth_hdr = (void*)skb->data;
|
|
+ pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1);
|
|
+
|
|
+ if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) {
|
|
+ DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__,
|
|
+ be16_to_cpu(pkt_eth_hdr->h_proto));
|
|
+ return NETFRONT_ACCEL_STATUS_CANT;
|
|
+ }
|
|
+
|
|
+ if (pkt_ipv4_hdr->protocol != IPPROTO_TCP &&
|
|
+ pkt_ipv4_hdr->protocol != IPPROTO_UDP) {
|
|
+ DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n",
|
|
+ __FUNCTION__, pkt_ipv4_hdr->protocol);
|
|
+ return NETFRONT_ACCEL_STATUS_CANT;
|
|
+ }
|
|
+
|
|
+ VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len,
|
|
+ skb_shinfo(skb)->gso_size);
|
|
+
|
|
+ if (skb_shinfo(skb)->gso_size) {
|
|
+ return netfront_accel_enqueue_skb_tso(vnic, skb);
|
|
+ }
|
|
+
|
|
+ if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) {
|
|
+ return netfront_accel_enqueue_skb_single(vnic, skb);
|
|
+ }
|
|
+
|
|
+ return netfront_accel_enqueue_skb_multi(vnic, skb);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Copy the data to required end destination. NB. len is the total new
|
|
+ * length of the socket buffer, not the amount of data to copy
|
|
+ */
|
|
+inline
|
|
+int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb,
|
|
+ struct netfront_accel_pkt_desc *buf, int len)
|
|
+{
|
|
+ int i, extra = len - skb->len;
|
|
+ char c;
|
|
+ int pkt_stride = vnic->rx_pkt_stride;
|
|
+ int skb_stride = vnic->rx_skb_stride;
|
|
+ char *skb_start;
|
|
+
|
|
+ /*
|
|
+ * This pulls stuff into the cache - have seen performance
|
|
+ * benefit in this, but disabled by default
|
|
+ */
|
|
+ skb_start = skb->data;
|
|
+ if (pkt_stride) {
|
|
+ for (i = 0; i < len; i += pkt_stride) {
|
|
+ c += ((volatile char*)(buf->pkt_kva))[i];
|
|
+ }
|
|
+ }
|
|
+ if (skb_stride) {
|
|
+ for (i = skb->len; i < len ; i += skb_stride) {
|
|
+ c += ((volatile char*)(skb_start))[i];
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (skb_tailroom(skb) >= extra) {
|
|
+ memcpy(skb_put(skb, extra), buf->pkt_kva, extra);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -ENOSPC;
|
|
+}
|
|
+
|
|
+
|
|
+static void discard_jumbo_state(netfront_accel_vnic *vnic)
|
|
+{
|
|
+
|
|
+ if (vnic->jumbo_state.skb != NULL) {
|
|
+ dev_kfree_skb_any(vnic->jumbo_state.skb);
|
|
+
|
|
+ vnic->jumbo_state.skb = NULL;
|
|
+ }
|
|
+ vnic->jumbo_state.in_progress = 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic,
|
|
+ struct sk_buff *skb)
|
|
+{
|
|
+ cuckoo_hash_mac_key key;
|
|
+ unsigned long flags;
|
|
+ int value;
|
|
+ struct net_device *net_dev;
|
|
+
|
|
+
|
|
+ key = cuckoo_mac_to_key(skb->data + ETH_ALEN);
|
|
+
|
|
+ /*
|
|
+ * If this is a MAC address that we want to do fast path TX
|
|
+ * to, and we don't already, add it to the fastpath table.
|
|
+ * The initial lookup is done without the table lock and so
|
|
+ * may get the wrong answer, but current opinion is that's not
|
|
+ * a big problem
|
|
+ */
|
|
+ if (is_valid_ether_addr(skb->data + ETH_ALEN) &&
|
|
+ !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key,
|
|
+ &value)) {
|
|
+ spin_lock_irqsave(&vnic->table_lock, flags);
|
|
+
|
|
+ cuckoo_hash_add_check(&vnic->fastpath_table,
|
|
+ (cuckoo_hash_key *)&key,
|
|
+ 1, 1);
|
|
+
|
|
+ spin_unlock_irqrestore(&vnic->table_lock, flags);
|
|
+ }
|
|
+
|
|
+ if (compare_ether_addr(skb->data, vnic->mac)) {
|
|
+ struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
|
|
+ u16 port;
|
|
+
|
|
+ DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n",
|
|
+ __FUNCTION__, MAC_ARG(skb->data));
|
|
+
|
|
+ if (ip->protocol == IPPROTO_TCP) {
|
|
+ struct tcphdr *tcp = (struct tcphdr *)
|
|
+ ((char *)ip + 4 * ip->ihl);
|
|
+ port = tcp->dest;
|
|
+ } else {
|
|
+ struct udphdr *udp = (struct udphdr *)
|
|
+ ((char *)ip + 4 * ip->ihl);
|
|
+ EPRINTK_ON(ip->protocol != IPPROTO_UDP);
|
|
+ port = udp->dest;
|
|
+ }
|
|
+
|
|
+ netfront_accel_msg_tx_fastpath(vnic, skb->data,
|
|
+ ip->daddr, port,
|
|
+ ip->protocol);
|
|
+ }
|
|
+
|
|
+ net_dev = vnic->net_dev;
|
|
+ skb->dev = net_dev;
|
|
+ skb->protocol = eth_type_trans(skb, net_dev);
|
|
+ /* CHECKSUM_UNNECESSARY as hardware has done it already */
|
|
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
+
|
|
+ if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb))
|
|
+ netif_receive_skb(skb);
|
|
+}
|
|
+
|
|
+
|
|
+static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic,
|
|
+ ef_event *ev)
|
|
+{
|
|
+ struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs;
|
|
+ struct netfront_accel_pkt_desc *buf = NULL;
|
|
+ struct sk_buff *skb;
|
|
+ int id, len, sop = 0, cont = 0;
|
|
+
|
|
+ VPRINTK("Rx event.\n");
|
|
+ /*
|
|
+ * Complete the receive operation, and get the request id of
|
|
+ * the buffer
|
|
+ */
|
|
+ id = ef_vi_receive_done(&vnic->vi, ev);
|
|
+
|
|
+ if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) {
|
|
+ EPRINTK("Rx packet %d is invalid\n", id);
|
|
+ /* Carry on round the loop if more events */
|
|
+ goto bad_packet;
|
|
+ }
|
|
+ /* Get our buffer descriptor */
|
|
+ buf = netfront_accel_buf_find(bufinfo, id);
|
|
+
|
|
+ len = EF_EVENT_RX_BYTES(*ev);
|
|
+
|
|
+ /* An RX buffer has been removed from the DMA ring. */
|
|
+ vnic->rx_dma_level--;
|
|
+
|
|
+ if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) {
|
|
+ sop = EF_EVENT_RX_SOP(*ev);
|
|
+ cont = EF_EVENT_RX_CONT(*ev);
|
|
+
|
|
+ skb = vnic->jumbo_state.skb;
|
|
+
|
|
+ VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n",
|
|
+ id, len, sop, cont);
|
|
+
|
|
+ if (sop) {
|
|
+ if (!vnic->jumbo_state.in_progress) {
|
|
+ vnic->jumbo_state.in_progress = 1;
|
|
+ BUG_ON(vnic->jumbo_state.skb != NULL);
|
|
+ } else {
|
|
+ /*
|
|
+ * This fragment shows a missing tail in
|
|
+ * previous one, but is itself possibly OK
|
|
+ */
|
|
+ DPRINTK("sop and in_progress => no tail\n");
|
|
+
|
|
+ /* Release the socket buffer we already had */
|
|
+ discard_jumbo_state(vnic);
|
|
+
|
|
+ /* Now start processing this fragment */
|
|
+ vnic->jumbo_state.in_progress = 1;
|
|
+ skb = NULL;
|
|
+ }
|
|
+ } else if (!vnic->jumbo_state.in_progress) {
|
|
+ DPRINTK("!sop and !in_progress => missing head\n");
|
|
+ goto missing_head;
|
|
+ }
|
|
+
|
|
+ if (!cont) {
|
|
+ /* Update state for next time */
|
|
+ vnic->jumbo_state.in_progress = 0;
|
|
+ vnic->jumbo_state.skb = NULL;
|
|
+ } else if (!vnic->jumbo_state.in_progress) {
|
|
+ DPRINTK("cont and !in_progress => missing head\n");
|
|
+ goto missing_head;
|
|
+ }
|
|
+
|
|
+ if (skb == NULL) {
|
|
+ BUG_ON(!sop);
|
|
+
|
|
+ if (!cont)
|
|
+ skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC);
|
|
+ else
|
|
+ skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN,
|
|
+ GFP_ATOMIC);
|
|
+
|
|
+ if (skb == NULL) {
|
|
+ DPRINTK("%s: Couldn't get an rx skb.\n",
|
|
+ __FUNCTION__);
|
|
+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
|
|
+ /*
|
|
+ * Dropping this fragment means we
|
|
+ * should discard the rest too
|
|
+ */
|
|
+ discard_jumbo_state(vnic);
|
|
+
|
|
+ /* Carry on round the loop if more events */
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ /* Copy the data to required end destination */
|
|
+ if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) {
|
|
+ /*
|
|
+ * No space in the skb - suggests > MTU packet
|
|
+ * received
|
|
+ */
|
|
+ EPRINTK("%s: Rx packet too large (%d)\n",
|
|
+ __FUNCTION__, len);
|
|
+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
|
|
+ discard_jumbo_state(vnic);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ /* Put the buffer back in the DMA queue. */
|
|
+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
|
|
+
|
|
+ if (cont) {
|
|
+ vnic->jumbo_state.skb = skb;
|
|
+
|
|
+ return 0;
|
|
+ } else {
|
|
+ /* Track number of rx fastpath packets */
|
|
+ vnic->netdev_stats.fastpath_rx_pkts++;
|
|
+ vnic->netdev_stats.fastpath_rx_bytes += len;
|
|
+
|
|
+ netfront_accel_vi_rx_complete(vnic, skb);
|
|
+
|
|
+ return 1;
|
|
+ }
|
|
+ } else {
|
|
+ BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD);
|
|
+
|
|
+ if (EF_EVENT_RX_DISCARD_TYPE(*ev)
|
|
+ == EF_EVENT_RX_DISCARD_TRUNC) {
|
|
+ DPRINTK("%s: " EF_EVENT_FMT
|
|
+ " buffer %d FRM_TRUNC q_id %d\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
|
|
+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc);
|
|
+ } else if (EF_EVENT_RX_DISCARD_TYPE(*ev)
|
|
+ == EF_EVENT_RX_DISCARD_OTHER) {
|
|
+ DPRINTK("%s: " EF_EVENT_FMT
|
|
+ " buffer %d RX_DISCARD_OTHER q_id %d\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
|
|
+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_discard_other);
|
|
+ } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
|
|
+ EF_EVENT_RX_DISCARD_CSUM_BAD) {
|
|
+ DPRINTK("%s: " EF_EVENT_FMT
|
|
+ " buffer %d DISCARD CSUM_BAD q_id %d\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
|
|
+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_csum_bad);
|
|
+ } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) ==
|
|
+ EF_EVENT_RX_DISCARD_CRC_BAD) {
|
|
+ DPRINTK("%s: " EF_EVENT_FMT
|
|
+ " buffer %d DISCARD CRC_BAD q_id %d\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
|
|
+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_crc_bad);
|
|
+ } else {
|
|
+ BUG_ON(EF_EVENT_RX_DISCARD_TYPE(*ev) !=
|
|
+ EF_EVENT_RX_DISCARD_RIGHTS);
|
|
+ DPRINTK("%s: " EF_EVENT_FMT
|
|
+ " buffer %d DISCARD RIGHTS q_id %d\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id,
|
|
+ EF_EVENT_RX_DISCARD_Q_ID(*ev) );
|
|
+ NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_rights_bad);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* discard type drops through here */
|
|
+
|
|
+bad_packet:
|
|
+ /* Release the socket buffer we already had */
|
|
+ discard_jumbo_state(vnic);
|
|
+
|
|
+missing_head:
|
|
+ BUG_ON(vnic->jumbo_state.in_progress != 0);
|
|
+ BUG_ON(vnic->jumbo_state.skb != NULL);
|
|
+
|
|
+ if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE)
|
|
+ /* Put the buffer back in the DMA queue. */
|
|
+ netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf);
|
|
+
|
|
+ vnic->netdev_stats.fastpath_rx_errors++;
|
|
+
|
|
+ DPRINTK("%s experienced bad packet/missing fragment error: %d \n",
|
|
+ __FUNCTION__, ev->rx.flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct netfront_info *np = ((struct netfront_info *)
|
|
+ netdev_priv(vnic->net_dev));
|
|
+ int handled;
|
|
+ unsigned long flags;
|
|
+
|
|
+ /*
|
|
+ * We hold the vnic tx_lock which is sufficient to exclude
|
|
+ * writes to tx_skb
|
|
+ */
|
|
+
|
|
+ if (vnic->tx_skb != NULL) {
|
|
+ DPRINTK("%s trying to send spare buffer\n", __FUNCTION__);
|
|
+
|
|
+ handled = netfront_accel_vi_tx_post(vnic, vnic->tx_skb);
|
|
+
|
|
+ if (handled != NETFRONT_ACCEL_STATUS_BUSY) {
|
|
+ DPRINTK("%s restarting tx\n", __FUNCTION__);
|
|
+
|
|
+ /* Need netfront tx_lock and vnic tx_lock to
|
|
+ * write tx_skb */
|
|
+ spin_lock_irqsave(&np->tx_lock, flags);
|
|
+
|
|
+ vnic->tx_skb = NULL;
|
|
+
|
|
+ if (netfront_check_queue_ready(vnic->net_dev)) {
|
|
+ netif_wake_queue(vnic->net_dev);
|
|
+ NETFRONT_ACCEL_STATS_OP
|
|
+ (vnic->stats.queue_wakes++);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&np->tx_lock, flags);
|
|
+
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Should never get a CANT, as it checks that before
|
|
+ * deciding it was BUSY first time round
|
|
+ */
|
|
+ BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT);
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic,
|
|
+ struct netfront_accel_tso_buffer *tso_buf,
|
|
+ int is_last)
|
|
+{
|
|
+ struct netfront_accel_tso_buffer *next;
|
|
+
|
|
+ /*
|
|
+ * We get a single completion for every call to
|
|
+ * ef_vi_transmitv so handle any other buffers which are part
|
|
+ * of the same packet
|
|
+ */
|
|
+ while (tso_buf != NULL) {
|
|
+ if (tso_buf->buf->skb != NULL) {
|
|
+ dev_kfree_skb_any(tso_buf->buf->skb);
|
|
+ tso_buf->buf->skb = NULL;
|
|
+ }
|
|
+
|
|
+ next = tso_buf->next;
|
|
+
|
|
+ netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id);
|
|
+
|
|
+ tso_buf = next;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * If this was the last one in the batch, we try and send any
|
|
+ * pending tx_skb. There should now be buffers and
|
|
+ * descriptors
|
|
+ */
|
|
+ if (is_last)
|
|
+ netfront_accel_vi_not_busy(vnic);
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic,
|
|
+ ef_event *ev)
|
|
+{
|
|
+ struct netfront_accel_pkt_desc *buf;
|
|
+ struct netfront_accel_tso_buffer *tso_buf;
|
|
+ ef_request_id ids[EF_VI_TRANSMIT_BATCH];
|
|
+ int i, n_ids;
|
|
+ unsigned long flags;
|
|
+
|
|
+ /* Get the request ids for this tx completion event. */
|
|
+ n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids);
|
|
+
|
|
+ /* Take the tx buffer spin lock and hold for the duration */
|
|
+ spin_lock_irqsave(&vnic->tx_lock, flags);
|
|
+
|
|
+ for (i = 0; i < n_ids; ++i) {
|
|
+ VPRINTK("Tx packet %d complete\n", ids[i]);
|
|
+ buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]);
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++);
|
|
+
|
|
+ tso_buf = (struct netfront_accel_tso_buffer *)
|
|
+ (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH);
|
|
+ BUG_ON(tso_buf->buf != buf);
|
|
+
|
|
+ netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1));
|
|
+ }
|
|
+
|
|
+ spin_unlock_irqrestore(&vnic->tx_lock, flags);
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets)
|
|
+{
|
|
+ ef_event ev[ACCEL_VI_POLL_EVENTS];
|
|
+ int rx_remain = rx_packets, rc, events, i;
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0;
|
|
+#endif
|
|
+ BUG_ON(rx_packets <= 0);
|
|
+
|
|
+ events = ef_eventq_poll(&vnic->vi, ev,
|
|
+ min(rx_remain, ACCEL_VI_POLL_EVENTS));
|
|
+ i = 0;
|
|
+ NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
|
|
+
|
|
+ VPRINTK("%s: %d events\n", __FUNCTION__, events);
|
|
+
|
|
+ /* Loop over each event */
|
|
+ while (events) {
|
|
+ VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__,
|
|
+ EF_EVENT_PRI_ARG(ev[i]),
|
|
+ (unsigned long)(vnic->vi.evq_state->evq_ptr));
|
|
+
|
|
+ if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) ||
|
|
+ (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) {
|
|
+ rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]);
|
|
+ rx_remain -= rc;
|
|
+ BUG_ON(rx_remain < 0);
|
|
+ NETFRONT_ACCEL_STATS_OP(rx_evs_polled++);
|
|
+ } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) {
|
|
+ netfront_accel_vi_poll_process_tx(vnic, &ev[i]);
|
|
+ NETFRONT_ACCEL_STATS_OP(tx_evs_polled++);
|
|
+ } else if (EF_EVENT_TYPE(ev[i]) ==
|
|
+ EF_EVENT_TYPE_RX_NO_DESC_TRUNC) {
|
|
+ DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n",
|
|
+ __FUNCTION__, EF_EVENT_PRI_ARG(ev[i]));
|
|
+ discard_jumbo_state(vnic);
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++);
|
|
+ } else {
|
|
+ EPRINTK("Unexpected event " EF_EVENT_FMT "\n",
|
|
+ EF_EVENT_PRI_ARG(ev[i]));
|
|
+ NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++);
|
|
+ }
|
|
+
|
|
+ i++;
|
|
+
|
|
+ /* Carry on round the loop if more events and more space */
|
|
+ if (i == events) {
|
|
+ if (rx_remain == 0)
|
|
+ break;
|
|
+
|
|
+ events = ef_eventq_poll(&vnic->vi, ev,
|
|
+ min(rx_remain,
|
|
+ ACCEL_VI_POLL_EVENTS));
|
|
+ i = 0;
|
|
+ NETFRONT_ACCEL_STATS_OP(n_evs_polled += events);
|
|
+ }
|
|
+ }
|
|
+
|
|
+#if NETFRONT_ACCEL_STATS
|
|
+ vnic->stats.event_count += n_evs_polled;
|
|
+ vnic->stats.event_count_since_irq += n_evs_polled;
|
|
+ if (n_evs_polled > vnic->stats.events_per_poll_max)
|
|
+ vnic->stats.events_per_poll_max = n_evs_polled;
|
|
+ if (rx_evs_polled > vnic->stats.events_per_poll_rx_max)
|
|
+ vnic->stats.events_per_poll_rx_max = rx_evs_polled;
|
|
+ if (tx_evs_polled > vnic->stats.events_per_poll_tx_max)
|
|
+ vnic->stats.events_per_poll_tx_max = tx_evs_polled;
|
|
+#endif
|
|
+
|
|
+ return rx_packets - rx_remain;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ u32 sw_evq_ptr;
|
|
+
|
|
+ VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state);
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+ BUG_ON(vnic->vi.evq_state == NULL);
|
|
+
|
|
+ /* Do a quick check for an event. */
|
|
+ if (ef_eventq_has_event(&vnic->vi)) {
|
|
+ VPRINTK("%s: found event\n", __FUNCTION__);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n",
|
|
+ vnic->evq_state.evq_ptr, vnic->vi.evq_mask);
|
|
+
|
|
+ /* Request a wakeup from the hardware. */
|
|
+ sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask;
|
|
+
|
|
+ BUG_ON(vnic->hw.falcon.evq_rptr == NULL);
|
|
+
|
|
+ VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr,
|
|
+ vnic->hw.falcon.evq_rptr);
|
|
+ *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/accel_xenbus.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,776 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/stddef.h>
|
|
+#include <linux/errno.h>
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+#include "accel.h"
|
|
+#include "accel_util.h"
|
|
+#include "accel_msg_iface.h"
|
|
+#include "accel_bufs.h"
|
|
+#include "accel_ssr.h"
|
|
+/* drivers/xen/netfront/netfront.h */
|
|
+#include "netfront.h"
|
|
+
|
|
+void netfront_accel_set_closing(netfront_accel_vnic *vnic)
|
|
+{
|
|
+
|
|
+ vnic->frontend_state = XenbusStateClosing;
|
|
+ net_accel_update_state(vnic->dev, XenbusStateClosing);
|
|
+}
|
|
+
|
|
+
|
|
+static void mac_address_change(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ netfront_accel_vnic *vnic;
|
|
+ struct xenbus_device *dev;
|
|
+ int rc;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ vnic = container_of(watch, netfront_accel_vnic,
|
|
+ mac_address_watch);
|
|
+ dev = vnic->dev;
|
|
+
|
|
+ rc = net_accel_xen_net_read_mac(dev, vnic->mac);
|
|
+
|
|
+ if (rc != 0)
|
|
+ EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc);
|
|
+}
|
|
+
|
|
+
|
|
+static int setup_mac_address_watch(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac");
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->nodename, "mac",
|
|
+ &vnic->mac_address_watch,
|
|
+ mac_address_change);
|
|
+ if (err) {
|
|
+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+ fail:
|
|
+ vnic->mac_address_watch.node = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+/* Grant access to some pages and publish through xenbus */
|
|
+static int make_named_grant(struct xenbus_device *dev, void *page,
|
|
+ const char *name, grant_ref_t *gnt_ref)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+ grant_ref_t gnt;
|
|
+
|
|
+ gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0);
|
|
+ if (gnt < 0)
|
|
+ return gnt;
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: transaction start failed %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_printf(tr, dev->nodename, name, "%d", gnt);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__,
|
|
+ err);
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ *gnt_ref = gnt;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int remove_named_grant(struct xenbus_device *dev,
|
|
+ const char *name, grant_ref_t gnt_ref)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+
|
|
+ net_accel_ungrant_page(gnt_ref);
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: transaction start failed %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_rm(tr, dev->nodename, name);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__,
|
|
+ err);
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ return err;
|
|
+ }
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static
|
|
+netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev,
|
|
+ struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *np =
|
|
+ (struct netfront_info *)netdev_priv(net_dev);
|
|
+ netfront_accel_vnic *vnic;
|
|
+ int err;
|
|
+
|
|
+ /*
|
|
+ * A bug in earlier versions of Xen accel plugin system meant
|
|
+ * you could be probed twice for the same device on suspend
|
|
+ * cancel. Be tolerant of that.
|
|
+ */
|
|
+ if (np->accel_priv != NULL)
|
|
+ return ERR_PTR(-EALREADY);
|
|
+
|
|
+ /* Alloc mem for state */
|
|
+ vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL);
|
|
+ if (vnic == NULL) {
|
|
+ EPRINTK("%s: no memory for vnic state\n", __FUNCTION__);
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+ }
|
|
+
|
|
+ spin_lock_init(&vnic->tx_lock);
|
|
+
|
|
+ mutex_init(&vnic->vnic_mutex);
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ /* Store so state can be retrieved from device */
|
|
+ BUG_ON(np->accel_priv != NULL);
|
|
+ np->accel_priv = vnic;
|
|
+ vnic->dev = dev;
|
|
+ vnic->net_dev = net_dev;
|
|
+ spin_lock_init(&vnic->irq_enabled_lock);
|
|
+ netfront_accel_ssr_init(&vnic->ssr_state);
|
|
+
|
|
+ init_waitqueue_head(&vnic->state_wait_queue);
|
|
+ vnic->backend_state = XenbusStateUnknown;
|
|
+ vnic->frontend_state = XenbusStateClosed;
|
|
+ vnic->removing = 0;
|
|
+ vnic->domU_state_is_setup = 0;
|
|
+ vnic->dom0_state_is_setup = 0;
|
|
+ vnic->poll_enabled = 0;
|
|
+ vnic->tx_enabled = 0;
|
|
+ vnic->tx_skb = NULL;
|
|
+
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
+ INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend);
|
|
+#else
|
|
+ INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic);
|
|
+#endif
|
|
+
|
|
+ netfront_accel_debugfs_create(vnic);
|
|
+
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ err = net_accel_xen_net_read_mac(dev, vnic->mac);
|
|
+ if (err)
|
|
+ goto fail_mac;
|
|
+
|
|
+ /* Setup a watch on the frontend's MAC address */
|
|
+ err = setup_mac_address_watch(dev, vnic);
|
|
+ if (err)
|
|
+ goto fail_mac;
|
|
+
|
|
+ return vnic;
|
|
+
|
|
+fail_mac:
|
|
+
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ netfront_accel_debugfs_remove(vnic);
|
|
+
|
|
+ netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
|
|
+
|
|
+ EPRINTK_ON(vnic->tx_skb != NULL);
|
|
+
|
|
+ vnic->frontend_state = XenbusStateUnknown;
|
|
+ net_accel_update_state(dev, XenbusStateUnknown);
|
|
+
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ np->accel_priv = NULL;
|
|
+ kfree(vnic);
|
|
+
|
|
+ return ERR_PTR(err);
|
|
+}
|
|
+
|
|
+
|
|
+static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct net_device *net_dev = vnic->net_dev;
|
|
+ struct netfront_info *np =
|
|
+ (struct netfront_info *)netdev_priv(net_dev);
|
|
+
|
|
+ /*
|
|
+ * Now we don't hold the lock any more it is safe to remove
|
|
+ * this watch and synchonrise with the completion of
|
|
+ * watches
|
|
+ */
|
|
+ DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__);
|
|
+ unregister_xenbus_watch(&vnic->mac_address_watch);
|
|
+ kfree(vnic->mac_address_watch.node);
|
|
+
|
|
+ flush_workqueue(netfront_accel_workqueue);
|
|
+
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ netfront_accel_debugfs_remove(vnic);
|
|
+
|
|
+ netfront_accel_ssr_fini(vnic, &vnic->ssr_state);
|
|
+
|
|
+ EPRINTK_ON(vnic->tx_skb != NULL);
|
|
+
|
|
+ vnic->frontend_state = XenbusStateUnknown;
|
|
+ net_accel_update_state(vnic->dev, XenbusStateUnknown);
|
|
+
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ np->accel_priv = NULL;
|
|
+ kfree(vnic);
|
|
+}
|
|
+
|
|
+
|
|
+static int vnic_setup_domU_shared_state(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+ int msgs_per_queue;
|
|
+
|
|
+
|
|
+ DPRINTK("Setting up domU shared state.\n");
|
|
+
|
|
+ msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg);
|
|
+
|
|
+ /* Allocate buffer state */
|
|
+ vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock);
|
|
+ if (vnic->tx_bufs == NULL) {
|
|
+ err = -ENOMEM;
|
|
+ EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__);
|
|
+ goto fail_tx_bufs;
|
|
+ }
|
|
+
|
|
+ vnic->rx_bufs = netfront_accel_init_bufs(NULL);
|
|
+ if (vnic->rx_bufs == NULL) {
|
|
+ err = -ENOMEM;
|
|
+ EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__);
|
|
+ goto fail_rx_bufs;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * This allocates two pages, one for the shared page and one
|
|
+ * for the message queue.
|
|
+ */
|
|
+ vnic->shared_page = (struct net_accel_shared_page *)
|
|
+ __get_free_pages(GFP_KERNEL, 1);
|
|
+ if (vnic->shared_page == NULL) {
|
|
+ EPRINTK("%s: no memory for shared pages\n", __FUNCTION__);
|
|
+ err = -ENOMEM;
|
|
+ goto fail_shared_page;
|
|
+ }
|
|
+
|
|
+ net_accel_msg_init_queue
|
|
+ (&vnic->from_dom0, &vnic->shared_page->queue0,
|
|
+ (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE),
|
|
+ msgs_per_queue);
|
|
+
|
|
+ net_accel_msg_init_queue
|
|
+ (&vnic->to_dom0, &vnic->shared_page->queue1,
|
|
+ (struct net_accel_msg *)((u8*)vnic->shared_page +
|
|
+ (3 * PAGE_SIZE / 2)),
|
|
+ msgs_per_queue);
|
|
+
|
|
+ vnic->msg_state = NETFRONT_ACCEL_MSG_NONE;
|
|
+
|
|
+ err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page",
|
|
+ &vnic->ctrl_page_gnt);
|
|
+ if (err) {
|
|
+ EPRINTK("couldn't make ctrl-page named grant\n");
|
|
+ goto fail_ctrl_page_grant;
|
|
+ }
|
|
+
|
|
+ err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE,
|
|
+ "accel-msg-page", &vnic->msg_page_gnt);
|
|
+ if (err) {
|
|
+ EPRINTK("couldn't make msg-page named grant\n");
|
|
+ goto fail_msg_page_grant;
|
|
+ }
|
|
+
|
|
+ /* Create xenbus msg event channel */
|
|
+ err = bind_listening_port_to_irqhandler
|
|
+ (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
|
|
+ SA_SAMPLE_RANDOM, "vnicctrl", vnic);
|
|
+ if (err < 0) {
|
|
+ EPRINTK("Couldn't bind msg event channel\n");
|
|
+ goto fail_msg_irq;
|
|
+ }
|
|
+ vnic->msg_channel_irq = err;
|
|
+ vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq);
|
|
+
|
|
+ /* Create xenbus net event channel */
|
|
+ err = bind_listening_port_to_irqhandler
|
|
+ (dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
|
|
+ SA_SAMPLE_RANDOM, "vnicfront", vnic);
|
|
+ if (err < 0) {
|
|
+ EPRINTK("Couldn't bind net event channel\n");
|
|
+ goto fail_net_irq;
|
|
+ }
|
|
+ vnic->net_channel_irq = err;
|
|
+ vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq);
|
|
+ /* Want to ensure we don't get interrupts before we're ready */
|
|
+ netfront_accel_disable_net_interrupts(vnic);
|
|
+
|
|
+ DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n",
|
|
+ dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq,
|
|
+ vnic->net_channel, vnic->net_channel_irq);
|
|
+
|
|
+ do {
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: Transaction start failed %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(tr, dev->nodename, "accel-msg-channel",
|
|
+ "%u", vnic->msg_channel);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: event channel xenbus write failed %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ goto fail_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(tr, dev->nodename, "accel-net-channel",
|
|
+ "%u", vnic->net_channel);
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: net channel xenbus write failed %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ goto fail_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ } while (err == -EAGAIN);
|
|
+
|
|
+ if (err != 0) {
|
|
+ EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err);
|
|
+ goto fail_transaction;
|
|
+ }
|
|
+
|
|
+ DPRINTK("Completed setting up domU shared state\n");
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_transaction:
|
|
+
|
|
+ unbind_from_irqhandler(vnic->net_channel_irq, vnic);
|
|
+fail_net_irq:
|
|
+
|
|
+ unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
|
|
+fail_msg_irq:
|
|
+
|
|
+ remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
|
|
+fail_msg_page_grant:
|
|
+
|
|
+ remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
|
|
+fail_ctrl_page_grant:
|
|
+
|
|
+ free_pages((unsigned long)vnic->shared_page, 1);
|
|
+ vnic->shared_page = NULL;
|
|
+fail_shared_page:
|
|
+
|
|
+ netfront_accel_fini_bufs(vnic->rx_bufs);
|
|
+fail_rx_bufs:
|
|
+
|
|
+ netfront_accel_fini_bufs(vnic->tx_bufs);
|
|
+fail_tx_bufs:
|
|
+
|
|
+ /* Undo the memory allocation created when we got the HELLO */
|
|
+ netfront_accel_free_buffer_mem(&vnic->bufpages,
|
|
+ vnic->rx_bufs,
|
|
+ vnic->tx_bufs);
|
|
+
|
|
+ DPRINTK("Failed to setup domU shared state with code %d\n", err);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static void vnic_remove_domU_shared_state(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+
|
|
+ /*
|
|
+ * Don't remove any watches because we currently hold the
|
|
+ * mutex and the watches take the mutex.
|
|
+ */
|
|
+
|
|
+ DPRINTK("%s: removing event channel irq handlers %d %d\n",
|
|
+ __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq);
|
|
+ do {
|
|
+ if (xenbus_transaction_start(&tr) != 0)
|
|
+ break;
|
|
+ xenbus_rm(tr, dev->nodename, "accel-msg-channel");
|
|
+ xenbus_rm(tr, dev->nodename, "accel-net-channel");
|
|
+ } while (xenbus_transaction_end(tr, 0) == -EAGAIN);
|
|
+
|
|
+ unbind_from_irqhandler(vnic->net_channel_irq, vnic);
|
|
+ unbind_from_irqhandler(vnic->msg_channel_irq, vnic);
|
|
+
|
|
+ /* ungrant pages for msg channel */
|
|
+ remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt);
|
|
+ remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt);
|
|
+ free_pages((unsigned long)vnic->shared_page, 1);
|
|
+ vnic->shared_page = NULL;
|
|
+
|
|
+ /* ungrant pages for buffers, and free buffer memory */
|
|
+ netfront_accel_free_buffer_mem(&vnic->bufpages,
|
|
+ vnic->rx_bufs,
|
|
+ vnic->tx_bufs);
|
|
+ netfront_accel_fini_bufs(vnic->rx_bufs);
|
|
+ netfront_accel_fini_bufs(vnic->tx_bufs);
|
|
+}
|
|
+
|
|
+
|
|
+static void vnic_setup_dom0_shared_state(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ DPRINTK("Setting up dom0 shared state\n");
|
|
+
|
|
+ netfront_accel_vi_ctor(vnic);
|
|
+
|
|
+ /*
|
|
+ * Message processing will be enabled when this function
|
|
+ * returns, but we might have missed an interrupt. Schedule a
|
|
+ * check just in case.
|
|
+ */
|
|
+ queue_work(netfront_accel_workqueue, &vnic->msg_from_bend);
|
|
+}
|
|
+
|
|
+
|
|
+static void vnic_remove_dom0_shared_state(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ DPRINTK("Removing dom0 shared state\n");
|
|
+
|
|
+ vnic_stop_fastpath(vnic);
|
|
+
|
|
+ netfront_accel_vi_dtor(vnic);
|
|
+}
|
|
+
|
|
+
|
|
+/*************************************************************************/
|
|
+
|
|
+/*
|
|
+ * The following code handles accelstate changes between the frontend
|
|
+ * and the backend. In response to transitions, calls the following
|
|
+ * functions in matching pairs:
|
|
+ *
|
|
+ * vnic_setup_domU_shared_state
|
|
+ * vnic_remove_domU_shared_state
|
|
+ *
|
|
+ * vnic_setup_dom0_shared_state
|
|
+ * vnic_remove_dom0_shared_state
|
|
+ *
|
|
+ * Valid state transitions for DomU are as follows:
|
|
+ *
|
|
+ * Closed->Init on probe or in response to Init from dom0
|
|
+ *
|
|
+ * Init->Connected in response to Init from dom0
|
|
+ * Init->Closing on error providing dom0 is in Init
|
|
+ * Init->Closed on remove or in response to Closing from dom0
|
|
+ *
|
|
+ * Connected->Closing on error/remove
|
|
+ * Connected->Closed in response to Closing from dom0
|
|
+ *
|
|
+ * Closing->Closed in response to Closing from dom0
|
|
+ *
|
|
+ */
|
|
+
|
|
+
|
|
+/* Function to deal with Xenbus accel state change in backend */
|
|
+static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic,
|
|
+ XenbusState backend_state)
|
|
+{
|
|
+ struct xenbus_device *dev = vnic->dev;
|
|
+ XenbusState frontend_state;
|
|
+ int state;
|
|
+
|
|
+ DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n",
|
|
+ __FUNCTION__, xenbus_strstate(vnic->backend_state),
|
|
+ xenbus_strstate(backend_state), dev->nodename, dev->otherend);
|
|
+
|
|
+ /*
|
|
+ * Ignore duplicate state changes. This can happen if the
|
|
+ * backend changes state twice in quick succession and the
|
|
+ * first watch fires in the frontend after the second
|
|
+ * transition has completed.
|
|
+ */
|
|
+ if (vnic->backend_state == backend_state)
|
|
+ return;
|
|
+
|
|
+ vnic->backend_state = backend_state;
|
|
+ frontend_state = vnic->frontend_state;
|
|
+
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ /*
|
|
+ * It's possible for us to miss the closed state from
|
|
+ * dom0, so do the work here.
|
|
+ */
|
|
+ if (vnic->domU_state_is_setup) {
|
|
+ vnic_remove_domU_shared_state(dev, vnic);
|
|
+ vnic->domU_state_is_setup = 0;
|
|
+ }
|
|
+
|
|
+ if (frontend_state != XenbusStateInitialising) {
|
|
+ /* Make sure the backend doesn't go away. */
|
|
+ frontend_state = XenbusStateInitialising;
|
|
+ net_accel_update_state(dev, frontend_state);
|
|
+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
|
|
+ backend_state = (XenbusState)state;
|
|
+ if (backend_state != XenbusStateInitialising)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ /* Start the new connection. */
|
|
+ if (!vnic->removing) {
|
|
+ BUG_ON(vnic->domU_state_is_setup);
|
|
+ if (vnic_setup_domU_shared_state(dev, vnic) == 0) {
|
|
+ vnic->domU_state_is_setup = 1;
|
|
+ frontend_state = XenbusStateConnected;
|
|
+ } else
|
|
+ frontend_state = XenbusStateClosing;
|
|
+ }
|
|
+ break;
|
|
+ case XenbusStateConnected:
|
|
+ if (vnic->domU_state_is_setup &&
|
|
+ !vnic->dom0_state_is_setup) {
|
|
+ vnic_setup_dom0_shared_state(dev, vnic);
|
|
+ vnic->dom0_state_is_setup = 1;
|
|
+ }
|
|
+ break;
|
|
+ default:
|
|
+ case XenbusStateClosing:
|
|
+ if (vnic->dom0_state_is_setup) {
|
|
+ vnic_remove_dom0_shared_state(dev, vnic);
|
|
+ vnic->dom0_state_is_setup = 0;
|
|
+ }
|
|
+ frontend_state = XenbusStateClosed;
|
|
+ break;
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ if (vnic->domU_state_is_setup) {
|
|
+ vnic_remove_domU_shared_state(dev, vnic);
|
|
+ vnic->domU_state_is_setup = 0;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (frontend_state != vnic->frontend_state) {
|
|
+ DPRINTK("Switching from state %s (%d) to %s (%d)\n",
|
|
+ xenbus_strstate(vnic->frontend_state),
|
|
+ vnic->frontend_state,
|
|
+ xenbus_strstate(frontend_state), frontend_state);
|
|
+ vnic->frontend_state = frontend_state;
|
|
+ net_accel_update_state(dev, frontend_state);
|
|
+ }
|
|
+
|
|
+ wake_up(&vnic->state_wait_queue);
|
|
+}
|
|
+
|
|
+
|
|
+static void backend_accel_state_change(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int state;
|
|
+ netfront_accel_vnic *vnic;
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ DPRINTK("%s\n", __FUNCTION__);
|
|
+
|
|
+ vnic = container_of(watch, struct netfront_accel_vnic,
|
|
+ backend_accel_watch);
|
|
+
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ dev = vnic->dev;
|
|
+
|
|
+ state = (int)XenbusStateUnknown;
|
|
+ xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state);
|
|
+ netfront_accel_backend_accel_changed(vnic, state);
|
|
+
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+}
|
|
+
|
|
+
|
|
+static int setup_dom0_accel_watch(struct xenbus_device *dev,
|
|
+ netfront_accel_vnic *vnic)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate");
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->otherend, "accelstate",
|
|
+ &vnic->backend_accel_watch,
|
|
+ backend_accel_state_change);
|
|
+ if (err) {
|
|
+ EPRINTK("%s: Failed to register xenbus watch: %d\n",
|
|
+ __FUNCTION__, err);
|
|
+ goto fail;
|
|
+ }
|
|
+ return 0;
|
|
+ fail:
|
|
+ vnic->backend_accel_watch.node = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev)
|
|
+{
|
|
+ netfront_accel_vnic *vnic;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("Probe passed device %s\n", dev->nodename);
|
|
+
|
|
+ vnic = netfront_accel_vnic_ctor(net_dev, dev);
|
|
+ if (IS_ERR(vnic))
|
|
+ return PTR_ERR(vnic);
|
|
+
|
|
+ /*
|
|
+ * Setup a watch on the backend accel state. This sets things
|
|
+ * going.
|
|
+ */
|
|
+ err = setup_dom0_accel_watch(dev, vnic);
|
|
+ if (err) {
|
|
+ netfront_accel_vnic_dtor(vnic);
|
|
+ EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Indicate to the other end that we're ready to start unless
|
|
+ * the watch has already fired.
|
|
+ */
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+ VPRINTK("setup success, updating accelstate\n");
|
|
+ if (vnic->frontend_state == XenbusStateClosed) {
|
|
+ vnic->frontend_state = XenbusStateInitialising;
|
|
+ net_accel_update_state(dev, XenbusStateInitialising);
|
|
+ }
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ DPRINTK("Probe done device %s\n", dev->nodename);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int netfront_accel_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct netfront_info *np =
|
|
+ (struct netfront_info *)dev->dev.driver_data;
|
|
+ netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
|
|
+
|
|
+ DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
|
|
+
|
|
+ BUG_ON(vnic == NULL);
|
|
+
|
|
+ mutex_lock(&vnic->vnic_mutex);
|
|
+
|
|
+ /* Reject any attempts to connect. */
|
|
+ vnic->removing = 1;
|
|
+
|
|
+ /* Close any existing connection. */
|
|
+ if (vnic->frontend_state == XenbusStateConnected) {
|
|
+ vnic->frontend_state = XenbusStateClosing;
|
|
+ net_accel_update_state(dev, XenbusStateClosing);
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&vnic->vnic_mutex);
|
|
+
|
|
+ DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename);
|
|
+
|
|
+ /*
|
|
+ * Wait for the xenbus watch to release the shared resources.
|
|
+ * This indicates that dom0 has made the transition
|
|
+ * Closing->Closed or that dom0 was in Closed or Init and no
|
|
+ * resources were mapped.
|
|
+ */
|
|
+ wait_event(vnic->state_wait_queue,
|
|
+ !vnic->domU_state_is_setup);
|
|
+
|
|
+ /*
|
|
+ * Now we don't need this watch anymore it is safe to remove
|
|
+ * it (and so synchronise with it completing if outstanding)
|
|
+ */
|
|
+ DPRINTK("%s: unregistering xenbus accel watch\n",
|
|
+ __FUNCTION__);
|
|
+ unregister_xenbus_watch(&vnic->backend_accel_watch);
|
|
+ kfree(vnic->backend_accel_watch.node);
|
|
+
|
|
+ netfront_accel_vnic_dtor(vnic);
|
|
+
|
|
+ DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,172 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author slp
|
|
+ * \brief Falcon specific definitions
|
|
+ * \date 2004/08
|
|
+ */
|
|
+
|
|
+#ifndef __EF_VI_FALCON_H__
|
|
+#define __EF_VI_FALCON_H__
|
|
+
|
|
+#define EFHW_4K 0x00001000u
|
|
+#define EFHW_8K 0x00002000u
|
|
+
|
|
+/* include the autogenerated register definitions */
|
|
+
|
|
+#include "ef_vi_falcon_core.h"
|
|
+#include "ef_vi_falcon_desc.h"
|
|
+#include "ef_vi_falcon_event.h"
|
|
+
|
|
+
|
|
+/*----------------------------------------------------------------------------
|
|
+ *
|
|
+ * Helpers to turn bit shifts into dword shifts and check that the bit fields
|
|
+ * haven't overflown the dword etc. Aim is to preserve consistency with the
|
|
+ * autogenerated headers - once stable we could hard code.
|
|
+ *
|
|
+ *---------------------------------------------------------------------------*/
|
|
+
|
|
+/* mask constructors */
|
|
+#define __FALCON_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1)
|
|
+#define __EFVI_MASK32(WIDTH) __FALCON_MASK((WIDTH),uint32_t)
|
|
+#define __EFVI_MASK64(WIDTH) __FALCON_MASK((WIDTH),uint64_t)
|
|
+
|
|
+#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH) ((uint32_t) \
|
|
+ (__EFVI_MASK32(WIDTH) << (LBN)))
|
|
+
|
|
+/* constructors for fields which span the first and second dwords */
|
|
+#define __LW(LBN) (32 - LBN)
|
|
+#define LOW(v, LBN, WIDTH) ((uint32_t) \
|
|
+ (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN)))
|
|
+#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \
|
|
+ __EFVI_MASK64((WIDTH - __LW((LBN))))))
|
|
+/* constructors for fields within the second dword */
|
|
+#define __DW2(LBN) ((LBN) - 32)
|
|
+
|
|
+/* constructors for fields which span the second and third dwords */
|
|
+#define __LW2(LBN) (64 - LBN)
|
|
+#define LOW2(v, LBN, WIDTH) ((uint32_t) \
|
|
+ (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32)))
|
|
+#define HIGH2(v, LBN, WIDTH) ((uint32_t) \
|
|
+ (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN))))))
|
|
+
|
|
+/* constructors for fields within the third dword */
|
|
+#define __DW3(LBN) ((LBN) - 64)
|
|
+
|
|
+
|
|
+/* constructors for fields which span the third and fourth dwords */
|
|
+#define __LW3(LBN) (96 - LBN)
|
|
+#define LOW3(v, LBN, WIDTH) ((uint32_t) \
|
|
+ (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64)))
|
|
+#define HIGH3(v, LBN, WIDTH) ((unit32_t) \
|
|
+ (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN))))))
|
|
+
|
|
+/* constructors for fields within the fourth dword */
|
|
+#define __DW4(LBN) ((LBN) - 96)
|
|
+
|
|
+/* checks that the autogenerated headers our consistent with our model */
|
|
+#define WIDTHCHCK(a, b) ef_assert((a) == (b))
|
|
+#define RANGECHCK(v, WIDTH) \
|
|
+ ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0)
|
|
+
|
|
+/* fields within the first dword */
|
|
+#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32))
|
|
+
|
|
+/* fields which span the first and second dwords */
|
|
+#define LWCHK(LBN, WIDTH) ef_assert(WIDTH >= __LW(LBN))
|
|
+
|
|
+/*----------------------------------------------------------------------------
|
|
+ *
|
|
+ * Buffer virtual addresses (4K buffers)
|
|
+ *
|
|
+ *---------------------------------------------------------------------------*/
|
|
+
|
|
+/* Form a buffer virtual address from buffer ID and offset. If the offset
|
|
+** is larger than the buffer size, then the buffer indexed will be
|
|
+** calculated appropriately. It is the responsibility of the caller to
|
|
+** ensure that they have valid buffers programmed at that address.
|
|
+*/
|
|
+#define EFVI_FALCON_VADDR_4K_S (12)
|
|
+#define EFVI_FALCON_VADDR_M 0xfffff /* post shift mask */
|
|
+
|
|
+
|
|
+#define EFVI_FALCON_BUFFER_4K_ADDR(id,off) \
|
|
+ (((id) << EFVI_FALCON_VADDR_4K_S) + (off))
|
|
+
|
|
+#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr) \
|
|
+ (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M)
|
|
+
|
|
+#define EFVI_FALCON_BUFFER_4K_OFF(vaddr) \
|
|
+ ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S))
|
|
+
|
|
+
|
|
+/*----------------------------------------------------------------------------
|
|
+ *
|
|
+ * Masks
|
|
+ *
|
|
+ *---------------------------------------------------------------------------*/
|
|
+
|
|
+#define EFVI_FALCON_CLOCK_ASIC_HZ (125000)
|
|
+#define EFVI_FALCON_CLOCK_FPGA_HZ (62500)
|
|
+#define EFVI_FALCON_CLOCK_HZ EFVI_FALCON_CLOCK_ASIC_HZ
|
|
+
|
|
+
|
|
+/*----------------------------------------------------------------------------
|
|
+ *
|
|
+ * Timers
|
|
+ *
|
|
+ *---------------------------------------------------------------------------*/
|
|
+
|
|
+/* Event-Queue Timer granularity - measured in us
|
|
+ Given by: 4096 * 3 cycle * clock period */
|
|
+
|
|
+#define EFVI_FALCON_EVQTIMER_PERIOD_US ((4096 * 3 * 1000) / EFVI_FALCON_CLOCK_HZ)
|
|
+
|
|
+/* mode bits */
|
|
+#define EFVI_FALCON_TIMER_MODE_DIS 0 /* disabled */
|
|
+#define EFVI_FALCON_TIMER_MODE_RUN 1 /* started counting right away */
|
|
+#define EFVI_FALCON_TIMER_MODE_HOLD 2 /* trigger mode (user queues) */
|
|
+
|
|
+#define EFVI_FALCON_EVQTIMER_HOLD (EFVI_FALCON_TIMER_MODE_HOLD << TIMER_MODE_LBN)
|
|
+#define EFVI_FALCON_EVQTIMER_RUN (EFVI_FALCON_TIMER_MODE_RUN << TIMER_MODE_LBN)
|
|
+#define EFVI_FALCON_EVQTIMER_DISABLE (EFVI_FALCON_TIMER_MODE_DIS << TIMER_MODE_LBN)
|
|
+
|
|
+
|
|
+/* ---- ef_vi_event helpers --- */
|
|
+
|
|
+#define EFVI_FALCON_EVENT_CODE(evp) \
|
|
+ ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK)
|
|
+
|
|
+#define EFVI_FALCON_EVENT_SW_DATA_MASK 0x0000ffff
|
|
+
|
|
+#define __EFVI_FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1)
|
|
+
|
|
+#define EFVI_FALCON_EVENT_CODE_MASK \
|
|
+ (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN)
|
|
+
|
|
+
|
|
+#endif /* __EF_VI_FALCON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_core.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,1075 @@
|
|
+
|
|
+#define EFVI_FALCON_EXTENDED_P_BAR 1
|
|
+
|
|
+//////////////---- Bus Interface Unit Registers C Header ----//////////////
|
|
+#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register
|
|
+ #define IOM_AUTO_ADR_INC_EN_LBN 16
|
|
+ #define IOM_AUTO_ADR_INC_EN_WIDTH 1
|
|
+ #define IOM_IND_ADR_LBN 0
|
|
+ #define IOM_IND_ADR_WIDTH 16
|
|
+#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register
|
|
+ #define IOM_IND_DAT_LBN 0
|
|
+ #define IOM_IND_DAT_WIDTH 32
|
|
+#define ADR_REGION_REG_KER_OFST 0x0 // Address region register
|
|
+#define ADR_REGION_REG_OFST 0x0 // Address region register
|
|
+ #define ADR_REGION3_LBN 96
|
|
+ #define ADR_REGION3_WIDTH 18
|
|
+ #define ADR_REGION2_LBN 64
|
|
+ #define ADR_REGION2_WIDTH 18
|
|
+ #define ADR_REGION1_LBN 32
|
|
+ #define ADR_REGION1_WIDTH 18
|
|
+ #define ADR_REGION0_LBN 0
|
|
+ #define ADR_REGION0_WIDTH 18
|
|
+#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register
|
|
+ #define KER_INT_CHAR_LBN 4
|
|
+ #define KER_INT_CHAR_WIDTH 1
|
|
+ #define KER_INT_KER_LBN 3
|
|
+ #define KER_INT_KER_WIDTH 1
|
|
+ #define ILL_ADR_ERR_INT_EN_KER_LBN 2
|
|
+ #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1
|
|
+ #define SRM_PERR_INT_EN_KER_LBN 1
|
|
+ #define SRM_PERR_INT_EN_KER_WIDTH 1
|
|
+ #define DRV_INT_EN_KER_LBN 0
|
|
+ #define DRV_INT_EN_KER_WIDTH 1
|
|
+#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register
|
|
+ #define CHAR_INT_CHAR_LBN 4
|
|
+ #define CHAR_INT_CHAR_WIDTH 1
|
|
+ #define CHAR_INT_KER_LBN 3
|
|
+ #define CHAR_INT_KER_WIDTH 1
|
|
+ #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2
|
|
+ #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1
|
|
+ #define SRM_PERR_INT_EN_CHAR_LBN 1
|
|
+ #define SRM_PERR_INT_EN_CHAR_WIDTH 1
|
|
+ #define DRV_INT_EN_CHAR_LBN 0
|
|
+ #define DRV_INT_EN_CHAR_WIDTH 1
|
|
+#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver
|
|
+ #define INT_ADR_KER_LBN 0
|
|
+ #define INT_ADR_KER_WIDTH 64
|
|
+ #define DRV_INT_KER_LBN 32
|
|
+ #define DRV_INT_KER_WIDTH 1
|
|
+ #define EV_FF_HALF_INT_KER_LBN 3
|
|
+ #define EV_FF_HALF_INT_KER_WIDTH 1
|
|
+ #define EV_FF_FULL_INT_KER_LBN 2
|
|
+ #define EV_FF_FULL_INT_KER_WIDTH 1
|
|
+ #define ILL_ADR_ERR_INT_KER_LBN 1
|
|
+ #define ILL_ADR_ERR_INT_KER_WIDTH 1
|
|
+ #define SRAM_PERR_INT_KER_LBN 0
|
|
+ #define SRAM_PERR_INT_KER_WIDTH 1
|
|
+#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver
|
|
+ #define INT_ADR_CHAR_LBN 0
|
|
+ #define INT_ADR_CHAR_WIDTH 64
|
|
+ #define DRV_INT_CHAR_LBN 32
|
|
+ #define DRV_INT_CHAR_WIDTH 1
|
|
+ #define EV_FF_HALF_INT_CHAR_LBN 3
|
|
+ #define EV_FF_HALF_INT_CHAR_WIDTH 1
|
|
+ #define EV_FF_FULL_INT_CHAR_LBN 2
|
|
+ #define EV_FF_FULL_INT_CHAR_WIDTH 1
|
|
+ #define ILL_ADR_ERR_INT_CHAR_LBN 1
|
|
+ #define ILL_ADR_ERR_INT_CHAR_WIDTH 1
|
|
+ #define SRAM_PERR_INT_CHAR_LBN 0
|
|
+ #define SRAM_PERR_INT_CHAR_WIDTH 1
|
|
+#define INT_ISR0_B0_OFST 0x90 // B0 only
|
|
+#define INT_ISR1_B0_OFST 0xA0
|
|
+#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register
|
|
+ #define RESERVED_LBN 0
|
|
+ #define RESERVED_WIDTH 32
|
|
+#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register
|
|
+ #define RESERVED_LBN 0
|
|
+ #define RESERVED_WIDTH 32
|
|
+//////////////---- Global CSR Registers C Header ----//////////////
|
|
+#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register
|
|
+#define STRAP_REG_OFST 0x200 // ASIC strap status register
|
|
+ #define ONCHIP_SRAM_LBN 16
|
|
+ #define ONCHIP_SRAM_WIDTH 0
|
|
+ #define STRAP_ISCSI_EN_LBN 3
|
|
+ #define STRAP_ISCSI_EN_WIDTH 1
|
|
+ #define STRAP_PINS_LBN 0
|
|
+ #define STRAP_PINS_WIDTH 3
|
|
+#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register
|
|
+#define GPIO_CTL_REG_OFST 0x210 // GPIO control register
|
|
+ #define GPIO_OEN_LBN 24
|
|
+ #define GPIO_OEN_WIDTH 4
|
|
+ #define GPIO_OUT_LBN 16
|
|
+ #define GPIO_OUT_WIDTH 4
|
|
+ #define GPIO_IN_LBN 8
|
|
+ #define GPIO_IN_WIDTH 4
|
|
+ #define GPIO_PWRUP_VALUE_LBN 0
|
|
+ #define GPIO_PWRUP_VALUE_WIDTH 4
|
|
+#define GLB_CTL_REG_KER_OFST 0x220 // Global control register
|
|
+#define GLB_CTL_REG_OFST 0x220 // Global control register
|
|
+ #define SWRST_LBN 0
|
|
+ #define SWRST_WIDTH 1
|
|
+#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel
|
|
+ #define PCI_BUSERR_INT_KER_EN_LBN 43
|
|
+ #define PCI_BUSERR_INT_KER_EN_WIDTH 1
|
|
+ #define SRAM_OOB_INT_KER_EN_LBN 42
|
|
+ #define SRAM_OOB_INT_KER_EN_WIDTH 1
|
|
+ #define BUFID_OOB_INT_KER_EN_LBN 41
|
|
+ #define BUFID_OOB_INT_KER_EN_WIDTH 1
|
|
+ #define MEM_PERR_INT_KER_EN_LBN 40
|
|
+ #define MEM_PERR_INT_KER_EN_WIDTH 1
|
|
+ #define RBUF_OWN_INT_KER_EN_LBN 39
|
|
+ #define RBUF_OWN_INT_KER_EN_WIDTH 1
|
|
+ #define TBUF_OWN_INT_KER_EN_LBN 38
|
|
+ #define TBUF_OWN_INT_KER_EN_WIDTH 1
|
|
+ #define RDESCQ_OWN_INT_KER_EN_LBN 37
|
|
+ #define RDESCQ_OWN_INT_KER_EN_WIDTH 1
|
|
+ #define TDESCQ_OWN_INT_KER_EN_LBN 36
|
|
+ #define TDESCQ_OWN_INT_KER_EN_WIDTH 1
|
|
+ #define EVQ_OWN_INT_KER_EN_LBN 35
|
|
+ #define EVQ_OWN_INT_KER_EN_WIDTH 1
|
|
+ #define EVFF_OFLO_INT_KER_EN_LBN 34
|
|
+ #define EVFF_OFLO_INT_KER_EN_WIDTH 1
|
|
+ #define ILL_ADR_INT_KER_EN_LBN 33
|
|
+ #define ILL_ADR_INT_KER_EN_WIDTH 1
|
|
+ #define SRM_PERR_INT_KER_EN_LBN 32
|
|
+ #define SRM_PERR_INT_KER_EN_WIDTH 1
|
|
+ #define PCI_BUSERR_INT_KER_LBN 11
|
|
+ #define PCI_BUSERR_INT_KER_WIDTH 1
|
|
+ #define SRAM_OOB_INT_KER_LBN 10
|
|
+ #define SRAM_OOB_INT_KER_WIDTH 1
|
|
+ #define BUFID_OOB_INT_KER_LBN 9
|
|
+ #define BUFID_OOB_INT_KER_WIDTH 1
|
|
+ #define MEM_PERR_INT_KER_LBN 8
|
|
+ #define MEM_PERR_INT_KER_WIDTH 1
|
|
+ #define RBUF_OWN_INT_KER_LBN 7
|
|
+ #define RBUF_OWN_INT_KER_WIDTH 1
|
|
+ #define TBUF_OWN_INT_KER_LBN 6
|
|
+ #define TBUF_OWN_INT_KER_WIDTH 1
|
|
+ #define RDESCQ_OWN_INT_KER_LBN 5
|
|
+ #define RDESCQ_OWN_INT_KER_WIDTH 1
|
|
+ #define TDESCQ_OWN_INT_KER_LBN 4
|
|
+ #define TDESCQ_OWN_INT_KER_WIDTH 1
|
|
+ #define EVQ_OWN_INT_KER_LBN 3
|
|
+ #define EVQ_OWN_INT_KER_WIDTH 1
|
|
+ #define EVFF_OFLO_INT_KER_LBN 2
|
|
+ #define EVFF_OFLO_INT_KER_WIDTH 1
|
|
+ #define ILL_ADR_INT_KER_LBN 1
|
|
+ #define ILL_ADR_INT_KER_WIDTH 1
|
|
+ #define SRM_PERR_INT_KER_LBN 0
|
|
+ #define SRM_PERR_INT_KER_WIDTH 1
|
|
+#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char
|
|
+ #define PCI_BUSERR_INT_CHAR_EN_LBN 43
|
|
+ #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1
|
|
+ #define SRAM_OOB_INT_CHAR_EN_LBN 42
|
|
+ #define SRAM_OOB_INT_CHAR_EN_WIDTH 1
|
|
+ #define BUFID_OOB_INT_CHAR_EN_LBN 41
|
|
+ #define BUFID_OOB_INT_CHAR_EN_WIDTH 1
|
|
+ #define MEM_PERR_INT_CHAR_EN_LBN 40
|
|
+ #define MEM_PERR_INT_CHAR_EN_WIDTH 1
|
|
+ #define RBUF_OWN_INT_CHAR_EN_LBN 39
|
|
+ #define RBUF_OWN_INT_CHAR_EN_WIDTH 1
|
|
+ #define TBUF_OWN_INT_CHAR_EN_LBN 38
|
|
+ #define TBUF_OWN_INT_CHAR_EN_WIDTH 1
|
|
+ #define RDESCQ_OWN_INT_CHAR_EN_LBN 37
|
|
+ #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
|
|
+ #define TDESCQ_OWN_INT_CHAR_EN_LBN 36
|
|
+ #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
|
|
+ #define EVQ_OWN_INT_CHAR_EN_LBN 35
|
|
+ #define EVQ_OWN_INT_CHAR_EN_WIDTH 1
|
|
+ #define EVFF_OFLO_INT_CHAR_EN_LBN 34
|
|
+ #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1
|
|
+ #define ILL_ADR_INT_CHAR_EN_LBN 33
|
|
+ #define ILL_ADR_INT_CHAR_EN_WIDTH 1
|
|
+ #define SRM_PERR_INT_CHAR_EN_LBN 32
|
|
+ #define SRM_PERR_INT_CHAR_EN_WIDTH 1
|
|
+ #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL
|
|
+ #define PCI_BUSERR_INT_CHAR_LBN 11
|
|
+ #define PCI_BUSERR_INT_CHAR_WIDTH 1
|
|
+ #define SRAM_OOB_INT_CHAR_LBN 10
|
|
+ #define SRAM_OOB_INT_CHAR_WIDTH 1
|
|
+ #define BUFID_OOB_INT_CHAR_LBN 9
|
|
+ #define BUFID_OOB_INT_CHAR_WIDTH 1
|
|
+ #define MEM_PERR_INT_CHAR_LBN 8
|
|
+ #define MEM_PERR_INT_CHAR_WIDTH 1
|
|
+ #define RBUF_OWN_INT_CHAR_LBN 7
|
|
+ #define RBUF_OWN_INT_CHAR_WIDTH 1
|
|
+ #define TBUF_OWN_INT_CHAR_LBN 6
|
|
+ #define TBUF_OWN_INT_CHAR_WIDTH 1
|
|
+ #define RDESCQ_OWN_INT_CHAR_LBN 5
|
|
+ #define RDESCQ_OWN_INT_CHAR_WIDTH 1
|
|
+ #define TDESCQ_OWN_INT_CHAR_LBN 4
|
|
+ #define TDESCQ_OWN_INT_CHAR_WIDTH 1
|
|
+ #define EVQ_OWN_INT_CHAR_LBN 3
|
|
+ #define EVQ_OWN_INT_CHAR_WIDTH 1
|
|
+ #define EVFF_OFLO_INT_CHAR_LBN 2
|
|
+ #define EVFF_OFLO_INT_CHAR_WIDTH 1
|
|
+ #define ILL_ADR_INT_CHAR_LBN 1
|
|
+ #define ILL_ADR_INT_CHAR_WIDTH 1
|
|
+ #define SRM_PERR_INT_CHAR_LBN 0
|
|
+ #define SRM_PERR_INT_CHAR_WIDTH 1
|
|
+#define DP_CTRL_REG_OFST 0x250 // Datapath control register
|
|
+ #define FLS_EVQ_ID_LBN 0
|
|
+ #define FLS_EVQ_ID_WIDTH 12
|
|
+#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register
|
|
+#define MEM_STAT_REG_OFST 0x260 // Memory status register
|
|
+ #define MEM_PERR_VEC_LBN 53
|
|
+ #define MEM_PERR_VEC_WIDTH 38
|
|
+ #define MBIST_CORR_LBN 38
|
|
+ #define MBIST_CORR_WIDTH 15
|
|
+ #define MBIST_ERR_LBN 0
|
|
+ #define MBIST_ERR_WIDTH 38
|
|
+#define DEBUG_REG_KER_OFST 0x270 // Debug register
|
|
+#define DEBUG_REG_OFST 0x270 // Debug register
|
|
+ #define DEBUG_BLK_SEL2_LBN 47
|
|
+ #define DEBUG_BLK_SEL2_WIDTH 3
|
|
+ #define DEBUG_BLK_SEL1_LBN 44
|
|
+ #define DEBUG_BLK_SEL1_WIDTH 3
|
|
+ #define DEBUG_BLK_SEL0_LBN 41
|
|
+ #define DEBUG_BLK_SEL0_WIDTH 3
|
|
+ #define MISC_DEBUG_ADDR_LBN 36
|
|
+ #define MISC_DEBUG_ADDR_WIDTH 5
|
|
+ #define SERDES_DEBUG_ADDR_LBN 31
|
|
+ #define SERDES_DEBUG_ADDR_WIDTH 5
|
|
+ #define EM_DEBUG_ADDR_LBN 26
|
|
+ #define EM_DEBUG_ADDR_WIDTH 5
|
|
+ #define SR_DEBUG_ADDR_LBN 21
|
|
+ #define SR_DEBUG_ADDR_WIDTH 5
|
|
+ #define EV_DEBUG_ADDR_LBN 16
|
|
+ #define EV_DEBUG_ADDR_WIDTH 5
|
|
+ #define RX_DEBUG_ADDR_LBN 11
|
|
+ #define RX_DEBUG_ADDR_WIDTH 5
|
|
+ #define TX_DEBUG_ADDR_LBN 6
|
|
+ #define TX_DEBUG_ADDR_WIDTH 5
|
|
+ #define BIU_DEBUG_ADDR_LBN 1
|
|
+ #define BIU_DEBUG_ADDR_WIDTH 5
|
|
+ #define DEBUG_EN_LBN 0
|
|
+ #define DEBUG_EN_WIDTH 1
|
|
+#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0
|
|
+#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0
|
|
+ #define DRIVER_DW0_LBN 0
|
|
+ #define DRIVER_DW0_WIDTH 32
|
|
+#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1
|
|
+#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1
|
|
+ #define DRIVER_DW1_LBN 0
|
|
+ #define DRIVER_DW1_WIDTH 32
|
|
+#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2
|
|
+#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2
|
|
+ #define DRIVER_DW2_LBN 0
|
|
+ #define DRIVER_DW2_WIDTH 32
|
|
+#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3
|
|
+#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3
|
|
+ #define DRIVER_DW3_LBN 0
|
|
+ #define DRIVER_DW3_WIDTH 32
|
|
+#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4
|
|
+#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4
|
|
+ #define DRIVER_DW4_LBN 0
|
|
+ #define DRIVER_DW4_WIDTH 32
|
|
+#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5
|
|
+#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5
|
|
+ #define DRIVER_DW5_LBN 0
|
|
+ #define DRIVER_DW5_WIDTH 32
|
|
+#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6
|
|
+#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6
|
|
+ #define DRIVER_DW6_LBN 0
|
|
+ #define DRIVER_DW6_WIDTH 32
|
|
+#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7
|
|
+#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7
|
|
+ #define DRIVER_DW7_LBN 0
|
|
+ #define DRIVER_DW7_WIDTH 32
|
|
+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
|
|
+#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register
|
|
+ #define ALTERA_BUILD_VER_LBN 0
|
|
+ #define ALTERA_BUILD_VER_WIDTH 32
|
|
+
|
|
+/* so called CSR spare register
|
|
+ - contains separate parity enable bits for the various internal memory blocks */
|
|
+#define MEM_PARITY_ERR_EN_REG_KER 0x310
|
|
+#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64
|
|
+#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38
|
|
+#define MEM_PARITY_TX_DATA_EN_LBN 72
|
|
+#define MEM_PARITY_TX_DATA_EN_WIDTH 2
|
|
+
|
|
+//////////////---- Event & Timer Module Registers C Header ----//////////////
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register
|
|
+#else
|
|
+#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register
|
|
+#endif
|
|
+
|
|
+#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array.
|
|
+ #define EVQ_RPTR_LBN 0
|
|
+ #define EVQ_RPTR_WIDTH 15
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel access
|
|
+#else
|
|
+#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel access
|
|
+#endif
|
|
+
|
|
+#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char direct access
|
|
+ #define EVQ_WKUP_OR_INT_EN_LBN 39
|
|
+ #define EVQ_WKUP_OR_INT_EN_WIDTH 1
|
|
+ #define EVQ_NXT_WPTR_LBN 24
|
|
+ #define EVQ_NXT_WPTR_WIDTH 15
|
|
+ #define EVQ_EN_LBN 23
|
|
+ #define EVQ_EN_WIDTH 1
|
|
+ #define EVQ_SIZE_LBN 20
|
|
+ #define EVQ_SIZE_WIDTH 3
|
|
+ #define EVQ_BUF_BASE_ID_LBN 0
|
|
+ #define EVQ_BUF_BASE_ID_WIDTH 20
|
|
+#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. Page-mapped
|
|
+#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. Page-mapped. For lowest 1K queues.
|
|
+#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level access. Page-mapped. For upper 3K queues.
|
|
+#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access
|
|
+ #define TIMER_MODE_LBN 12
|
|
+ #define TIMER_MODE_WIDTH 2
|
|
+ #define TIMER_VAL_LBN 0
|
|
+ #define TIMER_VAL_WIDTH 12
|
|
+ #define TIMER_MODE_INT_HLDOFF 2
|
|
+ #define EVQ_BUF_SIZE_LBN 0
|
|
+ #define EVQ_BUF_SIZE_WIDTH 1
|
|
+#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register
|
|
+#define DRV_EV_REG_OFST 0x440 // Driver generated event register
|
|
+ #define DRV_EV_QID_LBN 64
|
|
+ #define DRV_EV_QID_WIDTH 12
|
|
+ #define DRV_EV_DATA_LBN 0
|
|
+ #define DRV_EV_DATA_WIDTH 64
|
|
+#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register
|
|
+#define EVQ_CTL_REG_OFST 0x450 // Event queue control register
|
|
+ #define RX_EVQ_WAKEUP_MASK_B0_LBN 15
|
|
+ #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6
|
|
+ #define EVQ_OWNERR_CTL_LBN 14
|
|
+ #define EVQ_OWNERR_CTL_WIDTH 1
|
|
+ #define EVQ_FIFO_AF_TH_LBN 8
|
|
+ #define EVQ_FIFO_AF_TH_WIDTH 6
|
|
+ #define EVQ_FIFO_NOTAF_TH_LBN 0
|
|
+ #define EVQ_FIFO_NOTAF_TH_WIDTH 6
|
|
+//////////////---- SRAM Module Registers C Header ----//////////////
|
|
+#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register
|
|
+#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register
|
|
+ #define BUF_TBL_MODE_LBN 3
|
|
+ #define BUF_TBL_MODE_WIDTH 1
|
|
+#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache configuration register
|
|
+#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache configuration register
|
|
+ #define SRM_RX_DC_BASE_ADR_LBN 0
|
|
+ #define SRM_RX_DC_BASE_ADR_WIDTH 21
|
|
+#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache configuration register
|
|
+#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache configuration register
|
|
+ #define SRM_TX_DC_BASE_ADR_LBN 0
|
|
+ #define SRM_TX_DC_BASE_ADR_WIDTH 21
|
|
+#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register
|
|
+#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register
|
|
+ #define SRAM_OOB_ADR_INTEN_LBN 5
|
|
+ #define SRAM_OOB_ADR_INTEN_WIDTH 1
|
|
+ #define SRAM_OOB_BUF_INTEN_LBN 4
|
|
+ #define SRAM_OOB_BUF_INTEN_WIDTH 1
|
|
+ #define SRAM_BT_INIT_EN_LBN 3
|
|
+ #define SRAM_BT_INIT_EN_WIDTH 1
|
|
+ #define SRM_NUM_BANK_LBN 2
|
|
+ #define SRM_NUM_BANK_WIDTH 1
|
|
+ #define SRM_BANK_SIZE_LBN 0
|
|
+ #define SRM_BANK_SIZE_WIDTH 2
|
|
+#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register
|
|
+#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register
|
|
+ #define BUF_UPD_CMD_LBN 63
|
|
+ #define BUF_UPD_CMD_WIDTH 1
|
|
+ #define BUF_CLR_CMD_LBN 62
|
|
+ #define BUF_CLR_CMD_WIDTH 1
|
|
+ #define BUF_CLR_END_ID_LBN 32
|
|
+ #define BUF_CLR_END_ID_WIDTH 20
|
|
+ #define BUF_CLR_START_ID_LBN 0
|
|
+ #define BUF_CLR_START_ID_WIDTH 20
|
|
+#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register
|
|
+#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register
|
|
+ #define SRM_UPD_EVQ_ID_LBN 0
|
|
+ #define SRM_UPD_EVQ_ID_WIDTH 12
|
|
+#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register.
|
|
+#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register.
|
|
+ #define FORCE_SRAM_PERR_LBN 0
|
|
+ #define FORCE_SRAM_PERR_WIDTH 1
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table mode direct access by kernel driver
|
|
+#else
|
|
+#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode direct access by kernel driver
|
|
+#endif
|
|
+
|
|
+
|
|
+#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode direct access by char driver
|
|
+ #define BUF_ADR_HBUF_ODD_LBN 44
|
|
+ #define BUF_ADR_HBUF_ODD_WIDTH 20
|
|
+ #define BUF_OWNER_ID_HBUF_ODD_LBN 32
|
|
+ #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12
|
|
+ #define BUF_ADR_HBUF_EVEN_LBN 12
|
|
+ #define BUF_ADR_HBUF_EVEN_WIDTH 20
|
|
+ #define BUF_OWNER_ID_HBUF_EVEN_LBN 0
|
|
+ #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
|
|
+
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table mode direct access by kernel driver
|
|
+#else
|
|
+#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode direct access by kernel driver
|
|
+#endif
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode direct access by char driver
|
|
+ #define IP_DAT_BUF_SIZE_LBN 50
|
|
+ #define IP_DAT_BUF_SIZE_WIDTH 1
|
|
+ #define BUF_ADR_REGION_LBN 48
|
|
+ #define BUF_ADR_REGION_WIDTH 2
|
|
+ #define BUF_ADR_FBUF_LBN 14
|
|
+ #define BUF_ADR_FBUF_WIDTH 34
|
|
+ #define BUF_OWNER_ID_FBUF_LBN 0
|
|
+ #define BUF_OWNER_ID_FBUF_WIDTH 14
|
|
+#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access
|
|
+ #define SRM_DBG_LBN 0
|
|
+ #define SRM_DBG_WIDTH 64
|
|
+//////////////---- RX Datapath Registers C Header ----//////////////
|
|
+
|
|
+#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register
|
|
+#define RX_CFG_REG_OFST 0x800 // Receive configuration register
|
|
+
|
|
+#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029)
|
|
+# if !defined(FALCON_128K_RXFIFO)
|
|
+# define FALCON_128K_RXFIFO
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#if defined(FALCON_128K_RXFIFO)
|
|
+
|
|
+/* new for B0 */
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
|
|
+ #define RX_INGR_EN_B0_LBN 47
|
|
+ #define RX_INGR_EN_B0_WIDTH 1
|
|
+ #define RX_TOEP_IPV4_B0_LBN 46
|
|
+ #define RX_TOEP_IPV4_B0_WIDTH 1
|
|
+ #define RX_HASH_ALG_B0_LBN 45
|
|
+ #define RX_HASH_ALG_B0_WIDTH 1
|
|
+ #define RX_HASH_INSERT_HDR_B0_LBN 44
|
|
+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
|
|
+/* moved for B0 */
|
|
+ #define RX_DESC_PUSH_EN_B0_LBN 43
|
|
+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
|
|
+ #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */
|
|
+ #define RX_RDW_PATCH_EN_WIDTH 1
|
|
+ #define RX_PCI_BURST_SIZE_B0_LBN 39
|
|
+ #define RX_PCI_BURST_SIZE_B0_WIDTH 3
|
|
+ #define RX_OWNERR_CTL_B0_LBN 38
|
|
+ #define RX_OWNERR_CTL_B0_WIDTH 1
|
|
+ #define RX_XON_TX_TH_B0_LBN 33
|
|
+ #define RX_XON_TX_TH_B0_WIDTH 5
|
|
+ #define RX_XOFF_TX_TH_B0_LBN 28
|
|
+ #define RX_XOFF_TX_TH_B0_WIDTH 5
|
|
+ #define RX_USR_BUF_SIZE_B0_LBN 19
|
|
+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
|
|
+ #define RX_XON_MAC_TH_B0_LBN 10
|
|
+ #define RX_XON_MAC_TH_B0_WIDTH 9
|
|
+ #define RX_XOFF_MAC_TH_B0_LBN 1
|
|
+ #define RX_XOFF_MAC_TH_B0_WIDTH 9
|
|
+ #define RX_XOFF_MAC_EN_B0_LBN 0
|
|
+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
|
|
+
|
|
+#elif !defined(FALCON_PRE_02020029)
|
|
+/* new for B0 */
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
|
|
+ #define RX_INGR_EN_B0_LBN 45
|
|
+ #define RX_INGR_EN_B0_WIDTH 1
|
|
+ #define RX_TOEP_IPV4_B0_LBN 44
|
|
+ #define RX_TOEP_IPV4_B0_WIDTH 1
|
|
+ #define RX_HASH_ALG_B0_LBN 43
|
|
+ #define RX_HASH_ALG_B0_WIDTH 41
|
|
+ #define RX_HASH_INSERT_HDR_B0_LBN 42
|
|
+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
|
|
+/* moved for B0 */
|
|
+ #define RX_DESC_PUSH_EN_B0_LBN 41
|
|
+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
|
|
+ #define RX_PCI_BURST_SIZE_B0_LBN 37
|
|
+ #define RX_PCI_BURST_SIZE_B0_WIDTH 3
|
|
+ #define RX_OWNERR_CTL_B0_LBN 36
|
|
+ #define RX_OWNERR_CTL_B0_WIDTH 1
|
|
+ #define RX_XON_TX_TH_B0_LBN 31
|
|
+ #define RX_XON_TX_TH_B0_WIDTH 5
|
|
+ #define RX_XOFF_TX_TH_B0_LBN 26
|
|
+ #define RX_XOFF_TX_TH_B0_WIDTH 5
|
|
+ #define RX_USR_BUF_SIZE_B0_LBN 17
|
|
+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
|
|
+ #define RX_XON_MAC_TH_B0_LBN 9
|
|
+ #define RX_XON_MAC_TH_B0_WIDTH 8
|
|
+ #define RX_XOFF_MAC_TH_B0_LBN 1
|
|
+ #define RX_XOFF_MAC_TH_B0_WIDTH 8
|
|
+ #define RX_XOFF_MAC_EN_B0_LBN 0
|
|
+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
|
|
+
|
|
+#else
|
|
+/* new for B0 */
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44
|
|
+ #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1
|
|
+ #define RX_INGR_EN_B0_LBN 43
|
|
+ #define RX_INGR_EN_B0_WIDTH 1
|
|
+ #define RX_TOEP_IPV4_B0_LBN 42
|
|
+ #define RX_TOEP_IPV4_B0_WIDTH 1
|
|
+ #define RX_HASH_ALG_B0_LBN 41
|
|
+ #define RX_HASH_ALG_B0_WIDTH 41
|
|
+ #define RX_HASH_INSERT_HDR_B0_LBN 40
|
|
+ #define RX_HASH_INSERT_HDR_B0_WIDTH 1
|
|
+/* moved for B0 */
|
|
+ #define RX_DESC_PUSH_EN_B0_LBN 35
|
|
+ #define RX_DESC_PUSH_EN_B0_WIDTH 1
|
|
+ #define RX_PCI_BURST_SIZE_B0_LBN 35
|
|
+ #define RX_PCI_BURST_SIZE_B0_WIDTH 2
|
|
+ #define RX_OWNERR_CTL_B0_LBN 34
|
|
+ #define RX_OWNERR_CTL_B0_WIDTH 1
|
|
+ #define RX_XON_TX_TH_B0_LBN 29
|
|
+ #define RX_XON_TX_TH_B0_WIDTH 5
|
|
+ #define RX_XOFF_TX_TH_B0_LBN 24
|
|
+ #define RX_XOFF_TX_TH_B0_WIDTH 5
|
|
+ #define RX_USR_BUF_SIZE_B0_LBN 15
|
|
+ #define RX_USR_BUF_SIZE_B0_WIDTH 9
|
|
+ #define RX_XON_MAC_TH_B0_LBN 8
|
|
+ #define RX_XON_MAC_TH_B0_WIDTH 7
|
|
+ #define RX_XOFF_MAC_TH_B0_LBN 1
|
|
+ #define RX_XOFF_MAC_TH_B0_WIDTH 7
|
|
+ #define RX_XOFF_MAC_EN_B0_LBN 0
|
|
+ #define RX_XOFF_MAC_EN_B0_WIDTH 1
|
|
+
|
|
+#endif
|
|
+
|
|
+/* A0/A1 */
|
|
+ #define RX_PUSH_EN_A1_LBN 35
|
|
+ #define RX_PUSH_EN_A1_WIDTH 1
|
|
+ #define RX_PCI_BURST_SIZE_A1_LBN 31
|
|
+ #define RX_PCI_BURST_SIZE_A1_WIDTH 3
|
|
+ #define RX_OWNERR_CTL_A1_LBN 30
|
|
+ #define RX_OWNERR_CTL_A1_WIDTH 1
|
|
+ #define RX_XON_TX_TH_A1_LBN 25
|
|
+ #define RX_XON_TX_TH_A1_WIDTH 5
|
|
+ #define RX_XOFF_TX_TH_A1_LBN 20
|
|
+ #define RX_XOFF_TX_TH_A1_WIDTH 5
|
|
+ #define RX_USR_BUF_SIZE_A1_LBN 11
|
|
+ #define RX_USR_BUF_SIZE_A1_WIDTH 9
|
|
+ #define RX_XON_MAC_TH_A1_LBN 6
|
|
+ #define RX_XON_MAC_TH_A1_WIDTH 5
|
|
+ #define RX_XOFF_MAC_TH_A1_LBN 1
|
|
+ #define RX_XOFF_MAC_TH_A1_WIDTH 5
|
|
+ #define RX_XOFF_MAC_EN_A1_LBN 0
|
|
+ #define RX_XOFF_MAC_EN_A1_WIDTH 1
|
|
+
|
|
+#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers
|
|
+ #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40
|
|
+ #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1
|
|
+ #define UDP_FULL_SRCH_LIMIT_LBN 32
|
|
+ #define UDP_FULL_SRCH_LIMIT_WIDTH 8
|
|
+ #define NUM_KER_LBN 24
|
|
+ #define NUM_KER_WIDTH 2
|
|
+ #define UDP_WILD_SRCH_LIMIT_LBN 16
|
|
+ #define UDP_WILD_SRCH_LIMIT_WIDTH 8
|
|
+ #define TCP_WILD_SRCH_LIMIT_LBN 8
|
|
+ #define TCP_WILD_SRCH_LIMIT_WIDTH 8
|
|
+ #define TCP_FULL_SRCH_LIMIT_LBN 0
|
|
+ #define TCP_FULL_SRCH_LIMIT_WIDTH 8
|
|
+#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue register
|
|
+#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue register
|
|
+ #define RX_FLUSH_DESCQ_CMD_LBN 24
|
|
+ #define RX_FLUSH_DESCQ_CMD_WIDTH 1
|
|
+ #define RX_FLUSH_EVQ_ID_LBN 12
|
|
+ #define RX_FLUSH_EVQ_ID_WIDTH 12
|
|
+ #define RX_FLUSH_DESCQ_LBN 0
|
|
+ #define RX_FLUSH_DESCQ_WIDTH 12
|
|
+#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel receive descriptor update register. Page-mapped
|
|
+#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor update register. Page-mapped. For lowest 1K queues.
|
|
+#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive descriptor update register. Page-mapped. For upper 3K queues.
|
|
+ #define RX_DESC_WPTR_LBN 96
|
|
+ #define RX_DESC_WPTR_WIDTH 12
|
|
+ #define RX_DESC_PUSH_CMD_LBN 95
|
|
+ #define RX_DESC_PUSH_CMD_WIDTH 1
|
|
+ #define RX_DESC_LBN 0
|
|
+ #define RX_DESC_WIDTH 64
|
|
+ #define RX_KER_DESC_LBN 0
|
|
+ #define RX_KER_DESC_WIDTH 64
|
|
+ #define RX_USR_DESC_LBN 0
|
|
+ #define RX_USR_DESC_WIDTH 32
|
|
+#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration register
|
|
+#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration register
|
|
+ #define RX_DC_SIZE_LBN 0
|
|
+ #define RX_DC_SIZE_WIDTH 2
|
|
+#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
|
|
+#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch watermark register
|
|
+ #define RX_DC_PF_LWM_LO_LBN 0
|
|
+ #define RX_DC_PF_LWM_LO_WIDTH 6
|
|
+
|
|
+#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only)
|
|
+
|
|
+#define RX_NODESC_DROP_REG 0x880
|
|
+ #define RX_NODESC_DROP_CNT_LBN 0
|
|
+ #define RX_NODESC_DROP_CNT_WIDTH 16
|
|
+
|
|
+#define XM_TX_CFG_REG_OFST 0x1230
|
|
+ #define XM_AUTO_PAD_LBN 5
|
|
+ #define XM_AUTO_PAD_WIDTH 1
|
|
+
|
|
+#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries
|
|
+ #define RSS_EN_0_B0_LBN 110
|
|
+ #define RSS_EN_0_B0_WIDTH 1
|
|
+ #define SCATTER_EN_0_B0_LBN 109
|
|
+ #define SCATTER_EN_0_B0_WIDTH 1
|
|
+ #define TCP_UDP_0_LBN 108
|
|
+ #define TCP_UDP_0_WIDTH 1
|
|
+ #define RXQ_ID_0_LBN 96
|
|
+ #define RXQ_ID_0_WIDTH 12
|
|
+ #define DEST_IP_0_LBN 64
|
|
+ #define DEST_IP_0_WIDTH 32
|
|
+ #define DEST_PORT_TCP_0_LBN 48
|
|
+ #define DEST_PORT_TCP_0_WIDTH 16
|
|
+ #define SRC_IP_0_LBN 16
|
|
+ #define SRC_IP_0_WIDTH 32
|
|
+ #define SRC_TCP_DEST_UDP_0_LBN 0
|
|
+ #define SRC_TCP_DEST_UDP_0_WIDTH 16
|
|
+#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries
|
|
+ #define RSS_EN_1_B0_LBN 110
|
|
+ #define RSS_EN_1_B0_WIDTH 1
|
|
+ #define SCATTER_EN_1_B0_LBN 109
|
|
+ #define SCATTER_EN_1_B0_WIDTH 1
|
|
+ #define TCP_UDP_1_LBN 108
|
|
+ #define TCP_UDP_1_WIDTH 1
|
|
+ #define RXQ_ID_1_LBN 96
|
|
+ #define RXQ_ID_1_WIDTH 12
|
|
+ #define DEST_IP_1_LBN 64
|
|
+ #define DEST_IP_1_WIDTH 32
|
|
+ #define DEST_PORT_TCP_1_LBN 48
|
|
+ #define DEST_PORT_TCP_1_WIDTH 16
|
|
+ #define SRC_IP_1_LBN 16
|
|
+ #define SRC_IP_1_WIDTH 32
|
|
+ #define SRC_TCP_DEST_UDP_1_LBN 0
|
|
+ #define SRC_TCP_DEST_UDP_1_WIDTH 16
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel access
|
|
+#else
|
|
+#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel access
|
|
+#endif
|
|
+
|
|
+
|
|
+#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table
|
|
+ #define RX_ISCSI_DDIG_EN_LBN 88
|
|
+ #define RX_ISCSI_DDIG_EN_WIDTH 1
|
|
+ #define RX_ISCSI_HDIG_EN_LBN 87
|
|
+ #define RX_ISCSI_HDIG_EN_WIDTH 1
|
|
+ #define RX_DESC_PREF_ACT_LBN 86
|
|
+ #define RX_DESC_PREF_ACT_WIDTH 1
|
|
+ #define RX_DC_HW_RPTR_LBN 80
|
|
+ #define RX_DC_HW_RPTR_WIDTH 6
|
|
+ #define RX_DESCQ_HW_RPTR_LBN 68
|
|
+ #define RX_DESCQ_HW_RPTR_WIDTH 12
|
|
+ #define RX_DESCQ_SW_WPTR_LBN 56
|
|
+ #define RX_DESCQ_SW_WPTR_WIDTH 12
|
|
+ #define RX_DESCQ_BUF_BASE_ID_LBN 36
|
|
+ #define RX_DESCQ_BUF_BASE_ID_WIDTH 20
|
|
+ #define RX_DESCQ_EVQ_ID_LBN 24
|
|
+ #define RX_DESCQ_EVQ_ID_WIDTH 12
|
|
+ #define RX_DESCQ_OWNER_ID_LBN 10
|
|
+ #define RX_DESCQ_OWNER_ID_WIDTH 14
|
|
+ #define RX_DESCQ_LABEL_LBN 5
|
|
+ #define RX_DESCQ_LABEL_WIDTH 5
|
|
+ #define RX_DESCQ_SIZE_LBN 3
|
|
+ #define RX_DESCQ_SIZE_WIDTH 2
|
|
+ #define RX_DESCQ_TYPE_LBN 2
|
|
+ #define RX_DESCQ_TYPE_WIDTH 1
|
|
+ #define RX_DESCQ_JUMBO_LBN 1
|
|
+ #define RX_DESCQ_JUMBO_WIDTH 1
|
|
+ #define RX_DESCQ_EN_LBN 0
|
|
+ #define RX_DESCQ_EN_WIDTH 1
|
|
+
|
|
+
|
|
+#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only)
|
|
+ #define RX_RSS_INDIR_ENT_B0_LBN 0
|
|
+ #define RX_RSS_INDIR_ENT_B0_WIDTH 6
|
|
+
|
|
+//////////////---- TX Datapath Registers C Header ----//////////////
|
|
+#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue register
|
|
+#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue register
|
|
+ #define TX_FLUSH_DESCQ_CMD_LBN 12
|
|
+ #define TX_FLUSH_DESCQ_CMD_WIDTH 1
|
|
+ #define TX_FLUSH_DESCQ_LBN 0
|
|
+ #define TX_FLUSH_DESCQ_WIDTH 12
|
|
+#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update register. Page-mapped
|
|
+#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor update register. Page-mapped
|
|
+#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit descriptor update register. Page-mapped
|
|
+ #define TX_DESC_WPTR_LBN 96
|
|
+ #define TX_DESC_WPTR_WIDTH 12
|
|
+ #define TX_DESC_PUSH_CMD_LBN 95
|
|
+ #define TX_DESC_PUSH_CMD_WIDTH 1
|
|
+ #define TX_DESC_LBN 0
|
|
+ #define TX_DESC_WIDTH 95
|
|
+ #define TX_KER_DESC_LBN 0
|
|
+ #define TX_KER_DESC_WIDTH 64
|
|
+ #define TX_USR_DESC_LBN 0
|
|
+ #define TX_USR_DESC_WIDTH 64
|
|
+#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache configuration register
|
|
+#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration register
|
|
+ #define TX_DC_SIZE_LBN 0
|
|
+ #define TX_DC_SIZE_WIDTH 2
|
|
+
|
|
+#if EFVI_FALCON_EXTENDED_P_BAR
|
|
+#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer.
|
|
+#else
|
|
+#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer.
|
|
+#endif
|
|
+
|
|
+
|
|
+#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer
|
|
+ #define TX_NON_IP_DROP_DIS_B0_LBN 91
|
|
+ #define TX_NON_IP_DROP_DIS_B0_WIDTH 1
|
|
+ #define TX_IP_CHKSM_DIS_B0_LBN 90
|
|
+ #define TX_IP_CHKSM_DIS_B0_WIDTH 1
|
|
+ #define TX_TCP_CHKSM_DIS_B0_LBN 89
|
|
+ #define TX_TCP_CHKSM_DIS_B0_WIDTH 1
|
|
+ #define TX_DESCQ_EN_LBN 88
|
|
+ #define TX_DESCQ_EN_WIDTH 1
|
|
+ #define TX_ISCSI_DDIG_EN_LBN 87
|
|
+ #define TX_ISCSI_DDIG_EN_WIDTH 1
|
|
+ #define TX_ISCSI_HDIG_EN_LBN 86
|
|
+ #define TX_ISCSI_HDIG_EN_WIDTH 1
|
|
+ #define TX_DC_HW_RPTR_LBN 80
|
|
+ #define TX_DC_HW_RPTR_WIDTH 6
|
|
+ #define TX_DESCQ_HW_RPTR_LBN 68
|
|
+ #define TX_DESCQ_HW_RPTR_WIDTH 12
|
|
+ #define TX_DESCQ_SW_WPTR_LBN 56
|
|
+ #define TX_DESCQ_SW_WPTR_WIDTH 12
|
|
+ #define TX_DESCQ_BUF_BASE_ID_LBN 36
|
|
+ #define TX_DESCQ_BUF_BASE_ID_WIDTH 20
|
|
+ #define TX_DESCQ_EVQ_ID_LBN 24
|
|
+ #define TX_DESCQ_EVQ_ID_WIDTH 12
|
|
+ #define TX_DESCQ_OWNER_ID_LBN 10
|
|
+ #define TX_DESCQ_OWNER_ID_WIDTH 14
|
|
+ #define TX_DESCQ_LABEL_LBN 5
|
|
+ #define TX_DESCQ_LABEL_WIDTH 5
|
|
+ #define TX_DESCQ_SIZE_LBN 3
|
|
+ #define TX_DESCQ_SIZE_WIDTH 2
|
|
+ #define TX_DESCQ_TYPE_LBN 1
|
|
+ #define TX_DESCQ_TYPE_WIDTH 2
|
|
+ #define TX_DESCQ_FLUSH_LBN 0
|
|
+ #define TX_DESCQ_FLUSH_WIDTH 1
|
|
+#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register
|
|
+#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register
|
|
+ #define TX_IP_ID_P1_OFS_LBN 32
|
|
+ #define TX_IP_ID_P1_OFS_WIDTH 15
|
|
+ #define TX_IP_ID_P0_OFS_LBN 16
|
|
+ #define TX_IP_ID_P0_OFS_WIDTH 15
|
|
+ #define TX_TURBO_EN_LBN 3
|
|
+ #define TX_TURBO_EN_WIDTH 1
|
|
+ #define TX_OWNERR_CTL_LBN 2
|
|
+ #define TX_OWNERR_CTL_WIDTH 2
|
|
+ #define TX_NON_IP_DROP_DIS_LBN 1
|
|
+ #define TX_NON_IP_DROP_DIS_WIDTH 1
|
|
+ #define TX_IP_ID_REP_EN_LBN 0
|
|
+ #define TX_IP_ID_REP_EN_WIDTH 1
|
|
+#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register
|
|
+#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register
|
|
+ #define TX_CSR_PUSH_EN_LBN 89
|
|
+ #define TX_CSR_PUSH_EN_WIDTH 1
|
|
+ #define TX_RX_SPACER_LBN 64
|
|
+ #define TX_RX_SPACER_WIDTH 8
|
|
+ #define TX_SW_EV_EN_LBN 59
|
|
+ #define TX_SW_EV_EN_WIDTH 1
|
|
+ #define TX_RX_SPACER_EN_LBN 57
|
|
+ #define TX_RX_SPACER_EN_WIDTH 1
|
|
+ #define TX_CSR_PREF_WD_TMR_LBN 24
|
|
+ #define TX_CSR_PREF_WD_TMR_WIDTH 16
|
|
+ #define TX_CSR_ONLY1TAG_LBN 21
|
|
+ #define TX_CSR_ONLY1TAG_WIDTH 1
|
|
+ #define TX_PREF_THRESHOLD_LBN 19
|
|
+ #define TX_PREF_THRESHOLD_WIDTH 2
|
|
+ #define TX_ONE_PKT_PER_Q_LBN 18
|
|
+ #define TX_ONE_PKT_PER_Q_WIDTH 1
|
|
+ #define TX_DIS_NON_IP_EV_LBN 17
|
|
+ #define TX_DIS_NON_IP_EV_WIDTH 1
|
|
+ #define TX_DMA_SPACER_LBN 8
|
|
+ #define TX_DMA_SPACER_WIDTH 8
|
|
+ #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7
|
|
+ #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1
|
|
+ #define TX_TCP_DIS_A1_LBN 7
|
|
+ #define TX_TCP_DIS_A1_WIDTH 1
|
|
+ #define TX_IP_DIS_A1_LBN 6
|
|
+ #define TX_IP_DIS_A1_WIDTH 1
|
|
+ #define TX_MAX_CPL_LBN 2
|
|
+ #define TX_MAX_CPL_WIDTH 2
|
|
+ #define TX_MAX_PREF_LBN 0
|
|
+ #define TX_MAX_PREF_WIDTH 2
|
|
+#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register
|
|
+ #define TX_VLAN_EN_LBN 127
|
|
+ #define TX_VLAN_EN_WIDTH 1
|
|
+ #define TX_VLAN7_PORT1_EN_LBN 125
|
|
+ #define TX_VLAN7_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN7_PORT0_EN_LBN 124
|
|
+ #define TX_VLAN7_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN7_LBN 112
|
|
+ #define TX_VLAN7_WIDTH 12
|
|
+ #define TX_VLAN6_PORT1_EN_LBN 109
|
|
+ #define TX_VLAN6_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN6_PORT0_EN_LBN 108
|
|
+ #define TX_VLAN6_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN6_LBN 96
|
|
+ #define TX_VLAN6_WIDTH 12
|
|
+ #define TX_VLAN5_PORT1_EN_LBN 93
|
|
+ #define TX_VLAN5_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN5_PORT0_EN_LBN 92
|
|
+ #define TX_VLAN5_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN5_LBN 80
|
|
+ #define TX_VLAN5_WIDTH 12
|
|
+ #define TX_VLAN4_PORT1_EN_LBN 77
|
|
+ #define TX_VLAN4_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN4_PORT0_EN_LBN 76
|
|
+ #define TX_VLAN4_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN4_LBN 64
|
|
+ #define TX_VLAN4_WIDTH 12
|
|
+ #define TX_VLAN3_PORT1_EN_LBN 61
|
|
+ #define TX_VLAN3_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN3_PORT0_EN_LBN 60
|
|
+ #define TX_VLAN3_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN3_LBN 48
|
|
+ #define TX_VLAN3_WIDTH 12
|
|
+ #define TX_VLAN2_PORT1_EN_LBN 45
|
|
+ #define TX_VLAN2_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN2_PORT0_EN_LBN 44
|
|
+ #define TX_VLAN2_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN2_LBN 32
|
|
+ #define TX_VLAN2_WIDTH 12
|
|
+ #define TX_VLAN1_PORT1_EN_LBN 29
|
|
+ #define TX_VLAN1_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN1_PORT0_EN_LBN 28
|
|
+ #define TX_VLAN1_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN1_LBN 16
|
|
+ #define TX_VLAN1_WIDTH 12
|
|
+ #define TX_VLAN0_PORT1_EN_LBN 13
|
|
+ #define TX_VLAN0_PORT1_EN_WIDTH 1
|
|
+ #define TX_VLAN0_PORT0_EN_LBN 12
|
|
+ #define TX_VLAN0_PORT0_EN_WIDTH 1
|
|
+ #define TX_VLAN0_LBN 0
|
|
+ #define TX_VLAN0_WIDTH 12
|
|
+#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register
|
|
+ #define TX_MADR1_FIL_EN_LBN 65
|
|
+ #define TX_MADR1_FIL_EN_WIDTH 1
|
|
+ #define TX_MADR0_FIL_EN_LBN 64
|
|
+ #define TX_MADR0_FIL_EN_WIDTH 1
|
|
+ #define TX_IPFIL31_PORT1_EN_LBN 63
|
|
+ #define TX_IPFIL31_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL31_PORT0_EN_LBN 62
|
|
+ #define TX_IPFIL31_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL30_PORT1_EN_LBN 61
|
|
+ #define TX_IPFIL30_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL30_PORT0_EN_LBN 60
|
|
+ #define TX_IPFIL30_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL29_PORT1_EN_LBN 59
|
|
+ #define TX_IPFIL29_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL29_PORT0_EN_LBN 58
|
|
+ #define TX_IPFIL29_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL28_PORT1_EN_LBN 57
|
|
+ #define TX_IPFIL28_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL28_PORT0_EN_LBN 56
|
|
+ #define TX_IPFIL28_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL27_PORT1_EN_LBN 55
|
|
+ #define TX_IPFIL27_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL27_PORT0_EN_LBN 54
|
|
+ #define TX_IPFIL27_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL26_PORT1_EN_LBN 53
|
|
+ #define TX_IPFIL26_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL26_PORT0_EN_LBN 52
|
|
+ #define TX_IPFIL26_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL25_PORT1_EN_LBN 51
|
|
+ #define TX_IPFIL25_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL25_PORT0_EN_LBN 50
|
|
+ #define TX_IPFIL25_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL24_PORT1_EN_LBN 49
|
|
+ #define TX_IPFIL24_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL24_PORT0_EN_LBN 48
|
|
+ #define TX_IPFIL24_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL23_PORT1_EN_LBN 47
|
|
+ #define TX_IPFIL23_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL23_PORT0_EN_LBN 46
|
|
+ #define TX_IPFIL23_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL22_PORT1_EN_LBN 45
|
|
+ #define TX_IPFIL22_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL22_PORT0_EN_LBN 44
|
|
+ #define TX_IPFIL22_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL21_PORT1_EN_LBN 43
|
|
+ #define TX_IPFIL21_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL21_PORT0_EN_LBN 42
|
|
+ #define TX_IPFIL21_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL20_PORT1_EN_LBN 41
|
|
+ #define TX_IPFIL20_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL20_PORT0_EN_LBN 40
|
|
+ #define TX_IPFIL20_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL19_PORT1_EN_LBN 39
|
|
+ #define TX_IPFIL19_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL19_PORT0_EN_LBN 38
|
|
+ #define TX_IPFIL19_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL18_PORT1_EN_LBN 37
|
|
+ #define TX_IPFIL18_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL18_PORT0_EN_LBN 36
|
|
+ #define TX_IPFIL18_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL17_PORT1_EN_LBN 35
|
|
+ #define TX_IPFIL17_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL17_PORT0_EN_LBN 34
|
|
+ #define TX_IPFIL17_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL16_PORT1_EN_LBN 33
|
|
+ #define TX_IPFIL16_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL16_PORT0_EN_LBN 32
|
|
+ #define TX_IPFIL16_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL15_PORT1_EN_LBN 31
|
|
+ #define TX_IPFIL15_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL15_PORT0_EN_LBN 30
|
|
+ #define TX_IPFIL15_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL14_PORT1_EN_LBN 29
|
|
+ #define TX_IPFIL14_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL14_PORT0_EN_LBN 28
|
|
+ #define TX_IPFIL14_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL13_PORT1_EN_LBN 27
|
|
+ #define TX_IPFIL13_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL13_PORT0_EN_LBN 26
|
|
+ #define TX_IPFIL13_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL12_PORT1_EN_LBN 25
|
|
+ #define TX_IPFIL12_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL12_PORT0_EN_LBN 24
|
|
+ #define TX_IPFIL12_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL11_PORT1_EN_LBN 23
|
|
+ #define TX_IPFIL11_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL11_PORT0_EN_LBN 22
|
|
+ #define TX_IPFIL11_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL10_PORT1_EN_LBN 21
|
|
+ #define TX_IPFIL10_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL10_PORT0_EN_LBN 20
|
|
+ #define TX_IPFIL10_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL9_PORT1_EN_LBN 19
|
|
+ #define TX_IPFIL9_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL9_PORT0_EN_LBN 18
|
|
+ #define TX_IPFIL9_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL8_PORT1_EN_LBN 17
|
|
+ #define TX_IPFIL8_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL8_PORT0_EN_LBN 16
|
|
+ #define TX_IPFIL8_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL7_PORT1_EN_LBN 15
|
|
+ #define TX_IPFIL7_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL7_PORT0_EN_LBN 14
|
|
+ #define TX_IPFIL7_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL6_PORT1_EN_LBN 13
|
|
+ #define TX_IPFIL6_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL6_PORT0_EN_LBN 12
|
|
+ #define TX_IPFIL6_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL5_PORT1_EN_LBN 11
|
|
+ #define TX_IPFIL5_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL5_PORT0_EN_LBN 10
|
|
+ #define TX_IPFIL5_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL4_PORT1_EN_LBN 9
|
|
+ #define TX_IPFIL4_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL4_PORT0_EN_LBN 8
|
|
+ #define TX_IPFIL4_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL3_PORT1_EN_LBN 7
|
|
+ #define TX_IPFIL3_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL3_PORT0_EN_LBN 6
|
|
+ #define TX_IPFIL3_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL2_PORT1_EN_LBN 5
|
|
+ #define TX_IPFIL2_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL2_PORT0_EN_LBN 4
|
|
+ #define TX_IPFIL2_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL1_PORT1_EN_LBN 3
|
|
+ #define TX_IPFIL1_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL1_PORT0_EN_LBN 2
|
|
+ #define TX_IPFIL1_PORT0_EN_WIDTH 1
|
|
+ #define TX_IPFIL0_PORT1_EN_LBN 1
|
|
+ #define TX_IPFIL0_PORT1_EN_WIDTH 1
|
|
+ #define TX_IPFIL0_PORT0_EN_LBN 0
|
|
+ #define TX_IPFIL0_PORT0_EN_WIDTH 1
|
|
+#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table
|
|
+ #define TX_IPFIL_MASK_LBN 32
|
|
+ #define TX_IPFIL_MASK_WIDTH 32
|
|
+ #define TX_IP_SRC_ADR_LBN 0
|
|
+ #define TX_IP_SRC_ADR_WIDTH 32
|
|
+#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register
|
|
+#define TX_PACE_REG_B0_OFST 0xA90 // Transmit pace control register
|
|
+ #define TX_PACE_SB_AF_LBN 19
|
|
+ #define TX_PACE_SB_AF_WIDTH 10
|
|
+ #define TX_PACE_SB_NOTAF_LBN 9
|
|
+ #define TX_PACE_SB_NOTAF_WIDTH 10
|
|
+ #define TX_PACE_FB_BASE_LBN 5
|
|
+ #define TX_PACE_FB_BASE_WIDTH 4
|
|
+ #define TX_PACE_BIN_TH_LBN 0
|
|
+ #define TX_PACE_BIN_TH_WIDTH 5
|
|
+#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table
|
|
+#define TX_PACE_TBL_FIRST_QUEUE_A1 4
|
|
+#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table
|
|
+#define TX_PACE_TBL_FIRST_QUEUE_B0 0
|
|
+ #define TX_PACE_LBN 0
|
|
+ #define TX_PACE_WIDTH 5
|
|
+
|
|
+//////////////---- EE/Flash Registers C Header ----//////////////
|
|
+#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register
|
|
+#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register
|
|
+ #define EE_SPI_HCMD_CMD_EN_LBN 31
|
|
+ #define EE_SPI_HCMD_CMD_EN_WIDTH 1
|
|
+ #define EE_WR_TIMER_ACTIVE_LBN 28
|
|
+ #define EE_WR_TIMER_ACTIVE_WIDTH 1
|
|
+ #define EE_SPI_HCMD_SF_SEL_LBN 24
|
|
+ #define EE_SPI_HCMD_SF_SEL_WIDTH 1
|
|
+ #define EE_SPI_HCMD_DABCNT_LBN 16
|
|
+ #define EE_SPI_HCMD_DABCNT_WIDTH 5
|
|
+ #define EE_SPI_HCMD_READ_LBN 15
|
|
+ #define EE_SPI_HCMD_READ_WIDTH 1
|
|
+ #define EE_SPI_HCMD_DUBCNT_LBN 12
|
|
+ #define EE_SPI_HCMD_DUBCNT_WIDTH 2
|
|
+ #define EE_SPI_HCMD_ADBCNT_LBN 8
|
|
+ #define EE_SPI_HCMD_ADBCNT_WIDTH 2
|
|
+ #define EE_SPI_HCMD_ENC_LBN 0
|
|
+ #define EE_SPI_HCMD_ENC_WIDTH 8
|
|
+#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register
|
|
+#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register
|
|
+ #define EE_SPI_HADR_DUBYTE_LBN 24
|
|
+ #define EE_SPI_HADR_DUBYTE_WIDTH 8
|
|
+ #define EE_SPI_HADR_ADR_LBN 0
|
|
+ #define EE_SPI_HADR_ADR_WIDTH 24
|
|
+#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register
|
|
+#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register
|
|
+ #define EE_SPI_HDATA3_LBN 96
|
|
+ #define EE_SPI_HDATA3_WIDTH 32
|
|
+ #define EE_SPI_HDATA2_LBN 64
|
|
+ #define EE_SPI_HDATA2_WIDTH 32
|
|
+ #define EE_SPI_HDATA1_LBN 32
|
|
+ #define EE_SPI_HDATA1_WIDTH 32
|
|
+ #define EE_SPI_HDATA0_LBN 0
|
|
+ #define EE_SPI_HDATA0_WIDTH 32
|
|
+#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register
|
|
+#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register
|
|
+ #define EE_EXP_ROM_WINDOW_BASE_LBN 16
|
|
+ #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13
|
|
+ #define EE_EXPROM_MASK_LBN 0
|
|
+ #define EE_EXPROM_MASK_WIDTH 13
|
|
+#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register
|
|
+#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register
|
|
+ #define EE_SF_FASTRD_EN_LBN 127
|
|
+ #define EE_SF_FASTRD_EN_WIDTH 1
|
|
+ #define EE_SF_CLOCK_DIV_LBN 120
|
|
+ #define EE_SF_CLOCK_DIV_WIDTH 7
|
|
+ #define EE_VPD_WIP_POLL_LBN 119
|
|
+ #define EE_VPD_WIP_POLL_WIDTH 1
|
|
+ #define EE_VPDW_LENGTH_LBN 80
|
|
+ #define EE_VPDW_LENGTH_WIDTH 15
|
|
+ #define EE_VPDW_BASE_LBN 64
|
|
+ #define EE_VPDW_BASE_WIDTH 15
|
|
+ #define EE_VPD_WR_CMD_EN_LBN 56
|
|
+ #define EE_VPD_WR_CMD_EN_WIDTH 8
|
|
+ #define EE_VPD_BASE_LBN 32
|
|
+ #define EE_VPD_BASE_WIDTH 24
|
|
+ #define EE_VPD_LENGTH_LBN 16
|
|
+ #define EE_VPD_LENGTH_WIDTH 13
|
|
+ #define EE_VPD_AD_SIZE_LBN 8
|
|
+ #define EE_VPD_AD_SIZE_WIDTH 5
|
|
+ #define EE_VPD_ACCESS_ON_LBN 5
|
|
+ #define EE_VPD_ACCESS_ON_WIDTH 1
|
|
+#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register
|
|
+#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register
|
|
+ #define EE_VPD_CYCLE_PENDING_LBN 31
|
|
+ #define EE_VPD_CYCLE_PENDING_WIDTH 1
|
|
+ #define EE_VPD_CYC_WRITE_LBN 28
|
|
+ #define EE_VPD_CYC_WRITE_WIDTH 1
|
|
+ #define EE_VPD_CYC_ADR_LBN 0
|
|
+ #define EE_VPD_CYC_ADR_WIDTH 15
|
|
+#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register
|
|
+#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register
|
|
+ #define EE_VPD_CYC_DAT_LBN 0
|
|
+ #define EE_VPD_CYC_DAT_WIDTH 32
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,43 @@
|
|
+//////////////---- Descriptors C Headers ----//////////////
|
|
+// Receive Kernel IP Descriptor
|
|
+ #define RX_KER_BUF_SIZE_LBN 48
|
|
+ #define RX_KER_BUF_SIZE_WIDTH 14
|
|
+ #define RX_KER_BUF_REGION_LBN 46
|
|
+ #define RX_KER_BUF_REGION_WIDTH 2
|
|
+ #define RX_KER_BUF_REGION0_DECODE 0
|
|
+ #define RX_KER_BUF_REGION1_DECODE 1
|
|
+ #define RX_KER_BUF_REGION2_DECODE 2
|
|
+ #define RX_KER_BUF_REGION3_DECODE 3
|
|
+ #define RX_KER_BUF_ADR_LBN 0
|
|
+ #define RX_KER_BUF_ADR_WIDTH 46
|
|
+// Receive User IP Descriptor
|
|
+ #define RX_USR_2BYTE_OFS_LBN 20
|
|
+ #define RX_USR_2BYTE_OFS_WIDTH 12
|
|
+ #define RX_USR_BUF_ID_LBN 0
|
|
+ #define RX_USR_BUF_ID_WIDTH 20
|
|
+// Transmit Kernel IP Descriptor
|
|
+ #define TX_KER_PORT_LBN 63
|
|
+ #define TX_KER_PORT_WIDTH 1
|
|
+ #define TX_KER_CONT_LBN 62
|
|
+ #define TX_KER_CONT_WIDTH 1
|
|
+ #define TX_KER_BYTE_CNT_LBN 48
|
|
+ #define TX_KER_BYTE_CNT_WIDTH 14
|
|
+ #define TX_KER_BUF_REGION_LBN 46
|
|
+ #define TX_KER_BUF_REGION_WIDTH 2
|
|
+ #define TX_KER_BUF_REGION0_DECODE 0
|
|
+ #define TX_KER_BUF_REGION1_DECODE 1
|
|
+ #define TX_KER_BUF_REGION2_DECODE 2
|
|
+ #define TX_KER_BUF_REGION3_DECODE 3
|
|
+ #define TX_KER_BUF_ADR_LBN 0
|
|
+ #define TX_KER_BUF_ADR_WIDTH 46
|
|
+// Transmit User IP Descriptor
|
|
+ #define TX_USR_PORT_LBN 47
|
|
+ #define TX_USR_PORT_WIDTH 1
|
|
+ #define TX_USR_CONT_LBN 46
|
|
+ #define TX_USR_CONT_WIDTH 1
|
|
+ #define TX_USR_BYTE_CNT_LBN 33
|
|
+ #define TX_USR_BYTE_CNT_WIDTH 13
|
|
+ #define TX_USR_BUF_ID_LBN 13
|
|
+ #define TX_USR_BUF_ID_WIDTH 20
|
|
+ #define TX_USR_BYTE_OFS_LBN 0
|
|
+ #define TX_USR_BYTE_OFS_WIDTH 13
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/ef_vi_falcon_event.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,123 @@
|
|
+//////////////---- Events Format C Header ----//////////////
|
|
+//////////////---- Event entry ----//////////////
|
|
+ #define EV_CODE_LBN 60
|
|
+ #define EV_CODE_WIDTH 4
|
|
+ #define RX_IP_EV_DECODE 0
|
|
+ #define TX_IP_EV_DECODE 2
|
|
+ #define DRIVER_EV_DECODE 5
|
|
+ #define GLOBAL_EV_DECODE 6
|
|
+ #define DRV_GEN_EV_DECODE 7
|
|
+ #define EV_DATA_LBN 0
|
|
+ #define EV_DATA_WIDTH 60
|
|
+//////////////---- Receive IP events for both Kernel & User event queues ----//////////////
|
|
+ #define RX_EV_PKT_OK_LBN 56
|
|
+ #define RX_EV_PKT_OK_WIDTH 1
|
|
+ #define RX_EV_BUF_OWNER_ID_ERR_LBN 54
|
|
+ #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
|
|
+ #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
|
|
+ #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
|
|
+ #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
|
|
+ #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
|
|
+ #define RX_EV_ETH_CRC_ERR_LBN 50
|
|
+ #define RX_EV_ETH_CRC_ERR_WIDTH 1
|
|
+ #define RX_EV_FRM_TRUNC_LBN 49
|
|
+ #define RX_EV_FRM_TRUNC_WIDTH 1
|
|
+ #define RX_EV_DRIB_NIB_LBN 48
|
|
+ #define RX_EV_DRIB_NIB_WIDTH 1
|
|
+ #define RX_EV_TOBE_DISC_LBN 47
|
|
+ #define RX_EV_TOBE_DISC_WIDTH 1
|
|
+ #define RX_EV_PKT_TYPE_LBN 44
|
|
+ #define RX_EV_PKT_TYPE_WIDTH 3
|
|
+ #define RX_EV_PKT_TYPE_ETH_DECODE 0
|
|
+ #define RX_EV_PKT_TYPE_LLC_DECODE 1
|
|
+ #define RX_EV_PKT_TYPE_JUMBO_DECODE 2
|
|
+ #define RX_EV_PKT_TYPE_VLAN_DECODE 3
|
|
+ #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4
|
|
+ #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5
|
|
+ #define RX_EV_HDR_TYPE_LBN 42
|
|
+ #define RX_EV_HDR_TYPE_WIDTH 2
|
|
+ #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0
|
|
+ #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1
|
|
+ #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2
|
|
+ #define RX_EV_HDR_TYPE_NON_IP_DECODE 3
|
|
+ #define RX_EV_DESC_Q_EMPTY_LBN 41
|
|
+ #define RX_EV_DESC_Q_EMPTY_WIDTH 1
|
|
+ #define RX_EV_MCAST_HASH_MATCH_LBN 40
|
|
+ #define RX_EV_MCAST_HASH_MATCH_WIDTH 1
|
|
+ #define RX_EV_MCAST_PKT_LBN 39
|
|
+ #define RX_EV_MCAST_PKT_WIDTH 1
|
|
+ #define RX_EV_Q_LABEL_LBN 32
|
|
+ #define RX_EV_Q_LABEL_WIDTH 5
|
|
+ #define RX_JUMBO_CONT_LBN 31
|
|
+ #define RX_JUMBO_CONT_WIDTH 1
|
|
+ #define RX_SOP_LBN 15
|
|
+ #define RX_SOP_WIDTH 1
|
|
+ #define RX_PORT_LBN 30
|
|
+ #define RX_PORT_WIDTH 1
|
|
+ #define RX_EV_BYTE_CNT_LBN 16
|
|
+ #define RX_EV_BYTE_CNT_WIDTH 14
|
|
+ #define RX_iSCSI_PKT_OK_LBN 14
|
|
+ #define RX_iSCSI_PKT_OK_WIDTH 1
|
|
+ #define RX_ISCSI_DDIG_ERR_LBN 13
|
|
+ #define RX_ISCSI_DDIG_ERR_WIDTH 1
|
|
+ #define RX_ISCSI_HDIG_ERR_LBN 12
|
|
+ #define RX_ISCSI_HDIG_ERR_WIDTH 1
|
|
+ #define RX_EV_DESC_PTR_LBN 0
|
|
+ #define RX_EV_DESC_PTR_WIDTH 12
|
|
+//////////////---- Transmit IP events for both Kernel & User event queues ----//////////////
|
|
+ #define TX_EV_PKT_ERR_LBN 38
|
|
+ #define TX_EV_PKT_ERR_WIDTH 1
|
|
+ #define TX_EV_PKT_TOO_BIG_LBN 37
|
|
+ #define TX_EV_PKT_TOO_BIG_WIDTH 1
|
|
+ #define TX_EV_Q_LABEL_LBN 32
|
|
+ #define TX_EV_Q_LABEL_WIDTH 5
|
|
+ #define TX_EV_PORT_LBN 16
|
|
+ #define TX_EV_PORT_WIDTH 1
|
|
+ #define TX_EV_WQ_FF_FULL_LBN 15
|
|
+ #define TX_EV_WQ_FF_FULL_WIDTH 1
|
|
+ #define TX_EV_BUF_OWNER_ID_ERR_LBN 14
|
|
+ #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
|
|
+ #define TX_EV_COMP_LBN 12
|
|
+ #define TX_EV_COMP_WIDTH 1
|
|
+ #define TX_EV_DESC_PTR_LBN 0
|
|
+ #define TX_EV_DESC_PTR_WIDTH 12
|
|
+//////////////---- Char or Kernel driver events ----//////////////
|
|
+ #define DRIVER_EV_SUB_CODE_LBN 56
|
|
+ #define DRIVER_EV_SUB_CODE_WIDTH 4
|
|
+ #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0
|
|
+ #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1
|
|
+ #define EVQ_INIT_DONE_EV_DECODE 0x2
|
|
+ #define EVQ_NOT_EN_EV_DECODE 0x3
|
|
+ #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4
|
|
+ #define SRM_UPD_DONE_EV_DECODE 0x5
|
|
+ #define WAKE_UP_EV_DECODE 0x6
|
|
+ #define TX_PKT_NON_TCP_UDP_DECODE 0x9
|
|
+ #define TIMER_EV_DECODE 0xA
|
|
+ #define RX_DSC_ERROR_EV_DECODE 0xE
|
|
+ #define DRIVER_EV_TX_DESCQ_ID_LBN 0
|
|
+ #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12
|
|
+ #define DRIVER_EV_RX_DESCQ_ID_LBN 0
|
|
+ #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12
|
|
+ #define DRIVER_EV_EVQ_ID_LBN 0
|
|
+ #define DRIVER_EV_EVQ_ID_WIDTH 12
|
|
+ #define DRIVER_TMR_ID_LBN 0
|
|
+ #define DRIVER_TMR_ID_WIDTH 12
|
|
+ #define DRIVER_EV_SRM_UPD_LBN 0
|
|
+ #define DRIVER_EV_SRM_UPD_WIDTH 2
|
|
+ #define SRM_CLR_EV_DECODE 0
|
|
+ #define SRM_UPD_EV_DECODE 1
|
|
+ #define SRM_ILLCLR_EV_DECODE 2
|
|
+//////////////---- Global events. Sent to both event queue 0 and 4. ----//////////////
|
|
+ #define XFP_PHY_INTR_LBN 10
|
|
+ #define XFP_PHY_INTR_WIDTH 1
|
|
+ #define XG_PHY_INTR_LBN 9
|
|
+ #define XG_PHY_INTR_WIDTH 1
|
|
+ #define G_PHY1_INTR_LBN 8
|
|
+ #define G_PHY1_INTR_WIDTH 1
|
|
+ #define G_PHY0_INTR_LBN 7
|
|
+ #define G_PHY0_INTR_WIDTH 1
|
|
+//////////////---- Driver generated events ----//////////////
|
|
+ #define DRV_GEN_EV_CODE_LBN 60
|
|
+ #define DRV_GEN_EV_CODE_WIDTH 4
|
|
+ #define DRV_GEN_EV_DATA_LBN 0
|
|
+ #define DRV_GEN_EV_DATA_WIDTH 60
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/ef_vi_internal.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,256 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Really-and-truely-honestly internal stuff for libef.
|
|
+ * \date 2004/06/13
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_ul */
|
|
+#ifndef __CI_EF_VI_INTERNAL_H__
|
|
+#define __CI_EF_VI_INTERNAL_H__
|
|
+
|
|
+
|
|
+/* These flags share space with enum ef_vi_flags. */
|
|
+#define EF_VI_BUG5692_WORKAROUND 0x10000
|
|
+
|
|
+
|
|
+/* ***********************************************************************
|
|
+ * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls)
|
|
+ */
|
|
+
|
|
+#define EF_VI_DO_MAGIC_CHECKS 1
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Headers
|
|
+ */
|
|
+
|
|
+#include <etherfabric/ef_vi.h>
|
|
+#include "sysdep.h"
|
|
+#include "ef_vi_falcon.h"
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Debugging.
|
|
+ */
|
|
+
|
|
+#ifndef NDEBUG
|
|
+
|
|
+# define _ef_assert(exp, file, line) BUG_ON(!(exp));
|
|
+
|
|
+# define _ef_assert2(exp, x, y, file, line) do { \
|
|
+ if (unlikely(!(exp))) \
|
|
+ BUG(); \
|
|
+ } while (0)
|
|
+
|
|
+#else
|
|
+
|
|
+# define _ef_assert(exp, file, line)
|
|
+# define _ef_assert2(e, x, y, file, line)
|
|
+
|
|
+#endif
|
|
+
|
|
+#define ef_assert(a) do{ _ef_assert((a),__FILE__,__LINE__); } while(0)
|
|
+#define ef_assert_equal(a,b) _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__)
|
|
+#define ef_assert_eq ef_assert_equal
|
|
+#define ef_assert_lt(a,b) _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__)
|
|
+#define ef_assert_le(a,b) _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__)
|
|
+#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__)
|
|
+#define ef_assert_ne ef_assert_nequal
|
|
+#define ef_assert_ge(a,b) _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__)
|
|
+#define ef_assert_gt(a,b) _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__)
|
|
+
|
|
+/**********************************************************************
|
|
+ * Debug checks. ******************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+#ifdef NDEBUG
|
|
+# define EF_VI_MAGIC_SET(p, type)
|
|
+# define EF_VI_CHECK_VI(p)
|
|
+# define EF_VI_CHECK_EVENT_Q(p)
|
|
+# define EF_VI_CHECK_IOBUFSET(p)
|
|
+# define EF_VI_CHECK_FILTER(p)
|
|
+# define EF_VI_CHECK_SHMBUF(p)
|
|
+# define EF_VI_CHECK_PT_EP(p)
|
|
+#else
|
|
+# define EF_VI 0x3
|
|
+# define EF_EPLOCK 0x6
|
|
+# define EF_IOBUFSET 0x9
|
|
+# define EF_FILTER 0xa
|
|
+# define EF_SHMBUF 0x11
|
|
+
|
|
+# define EF_VI_MAGIC(p, type) \
|
|
+ (((unsigned)(type) << 28) | \
|
|
+ (((unsigned)(intptr_t)(p)) & 0x0fffffffu))
|
|
+
|
|
+# if !EF_VI_DO_MAGIC_CHECKS
|
|
+# define EF_VI_MAGIC_SET(p, type)
|
|
+# define EF_VI_MAGIC_CHECK(p, type)
|
|
+# else
|
|
+# define EF_VI_MAGIC_SET(p, type) \
|
|
+ do { \
|
|
+ (p)->magic = EF_VI_MAGIC((p), (type)); \
|
|
+ } while (0)
|
|
+
|
|
+# define EF_VI_MAGIC_OKAY(p, type) \
|
|
+ ((p)->magic == EF_VI_MAGIC((p), (type)))
|
|
+
|
|
+# define EF_VI_MAGIC_CHECK(p, type) \
|
|
+ ef_assert(EF_VI_MAGIC_OKAY((p), (type)))
|
|
+
|
|
+#endif /* EF_VI_DO_MAGIC_CHECKS */
|
|
+
|
|
+# define EF_VI_CHECK_VI(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_VI);
|
|
+
|
|
+# define EF_VI_CHECK_EVENT_Q(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_VI); \
|
|
+ ef_assert((p)->evq_base); \
|
|
+ ef_assert((p)->evq_mask);
|
|
+
|
|
+# define EF_VI_CHECK_PT_EP(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_VI); \
|
|
+ ef_assert((p)->ep_state);
|
|
+
|
|
+# define EF_VI_CHECK_IOBUFSET(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_IOBUFSET)
|
|
+
|
|
+# define EF_VI_CHECK_FILTER(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_FILTER);
|
|
+
|
|
+# define EF_VI_CHECK_SHMBUF(p) \
|
|
+ ef_assert(p); \
|
|
+ EF_VI_MAGIC_CHECK((p), EF_SHMBUF);
|
|
+
|
|
+#endif
|
|
+
|
|
+#ifndef NDEBUG
|
|
+# define EF_DRIVER_MAGIC 0x00f00ba4
|
|
+# define EF_ASSERT_THIS_DRIVER_VALID(driver) \
|
|
+ do{ ef_assert(driver); \
|
|
+ EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC); \
|
|
+ ef_assert((driver)->init); }while(0)
|
|
+
|
|
+# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver)
|
|
+#else
|
|
+# define EF_ASSERT_THIS_DRIVER_VALID(driver)
|
|
+# define EF_ASSERT_DRIVER_VALID()
|
|
+#endif
|
|
+
|
|
+
|
|
+/* *************************************
|
|
+ * Power of 2 FIFO
|
|
+ */
|
|
+
|
|
+#define EF_VI_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
|
|
+#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
|
|
+ (f)->fifo_rd_i <= (f)->fifo_mask && \
|
|
+ (f)->fifo_wr_i <= (f)->fifo_mask && \
|
|
+ EF_VI_IS_POW2((f)->fifo_mask+1u))
|
|
+
|
|
+#define ef_vi_fifo2_init(f, cap) \
|
|
+ do{ ef_assert(EF_VI_IS_POW2((cap) + 1)); \
|
|
+ (f)->fifo_rd_i = (f)->fifo_wr_i = 0u; \
|
|
+ (f)->fifo_mask = (cap); \
|
|
+ }while(0)
|
|
+
|
|
+#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i)
|
|
+#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask)
|
|
+#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
|
|
+#define ef_vi_fifo2_end(f) ((f)->fifo + ef_vi_fifo2_buf_size(f))
|
|
+#define ef_vi_fifo2_peek(f) ((f)->fifo[(f)->fifo_rd_i])
|
|
+#define ef_vi_fifo2_poke(f) ((f)->fifo[(f)->fifo_wr_i])
|
|
+#define ef_vi_fifo2_num(f) EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i)
|
|
+
|
|
+#define ef_vi_fifo2_wr_prev(f) \
|
|
+ do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0)
|
|
+#define ef_vi_fifo2_wr_next(f) \
|
|
+ do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0)
|
|
+#define ef_vi_fifo2_rd_adv(f, n) \
|
|
+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0)
|
|
+#define ef_vi_fifo2_rd_prev(f) \
|
|
+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0)
|
|
+#define ef_vi_fifo2_rd_next(f) \
|
|
+ do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0)
|
|
+
|
|
+#define ef_vi_fifo2_put(f, v) \
|
|
+ do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0)
|
|
+#define ef_vi_fifo2_get(f, pv) \
|
|
+ do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0)
|
|
+
|
|
+
|
|
+/* *********************************************************************
|
|
+ * Eventq handling
|
|
+ */
|
|
+
|
|
+typedef union {
|
|
+ uint64_t u64;
|
|
+ struct {
|
|
+ uint32_t a;
|
|
+ uint32_t b;
|
|
+ } opaque;
|
|
+} ef_vi_event;
|
|
+
|
|
+
|
|
+#define EF_VI_EVENT_OFFSET(q, i) \
|
|
+ (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask)
|
|
+
|
|
+#define EF_VI_EVENT_PTR(q, i) \
|
|
+ ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i))))
|
|
+
|
|
+/* *********************************************************************
|
|
+ * Miscellaneous goodies
|
|
+ */
|
|
+#ifdef NDEBUG
|
|
+# define EF_VI_DEBUG(x)
|
|
+#else
|
|
+# define EF_VI_DEBUG(x) x
|
|
+#endif
|
|
+
|
|
+#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u))
|
|
+#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u))
|
|
+#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u))
|
|
+#define EF_VI_PTR_ALIGN_BACK(p, align) \
|
|
+ ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align))))
|
|
+#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1)))
|
|
+
|
|
+
|
|
+/* ********************************************************************
|
|
+ */
|
|
+
|
|
+extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF;
|
|
+extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF;
|
|
+extern void __ef_init(void) EF_VI_HF;
|
|
+
|
|
+
|
|
+#endif /* __CI_EF_VI_INTERNAL_H__ */
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/etherfabric/ef_vi.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,647 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \brief Virtual Interface
|
|
+ * \date 2007/05/16
|
|
+ */
|
|
+
|
|
+#ifndef __EFAB_EF_VI_H__
|
|
+#define __EFAB_EF_VI_H__
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Primitive types ****************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/* We standardise on the types from stdint.h and synthesise these types
|
|
+ * for compilers/platforms that don't provide them */
|
|
+
|
|
+# include <linux/types.h>
|
|
+# define EF_VI_ALIGN(x) __attribute__ ((aligned (x)))
|
|
+# define ef_vi_inline static inline
|
|
+
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Types **************************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+typedef uint32_t ef_eventq_ptr;
|
|
+
|
|
+typedef uint64_t ef_addr;
|
|
+typedef char* ef_vi_ioaddr_t;
|
|
+
|
|
+/**********************************************************************
|
|
+ * ef_event ***********************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/*! \i_ef_vi A DMA request identifier.
|
|
+**
|
|
+** This is an integer token specified by the transport and associated
|
|
+** with a DMA request. It is returned to the VI user with DMA completion
|
|
+** events. It is typically used to identify the buffer associated with
|
|
+** the transfer.
|
|
+*/
|
|
+typedef int ef_request_id;
|
|
+
|
|
+typedef union {
|
|
+ uint64_t u64[1];
|
|
+ uint32_t u32[2];
|
|
+} ef_vi_qword;
|
|
+
|
|
+typedef ef_vi_qword ef_hw_event;
|
|
+
|
|
+#define EF_REQUEST_ID_BITS 16u
|
|
+#define EF_REQUEST_ID_MASK ((1u << EF_REQUEST_ID_BITS) - 1u)
|
|
+
|
|
+/*! \i_ef_event An [ef_event] is a token that identifies something that
|
|
+** has happened. Examples include packets received, packets transmitted
|
|
+** and errors.
|
|
+*/
|
|
+typedef union {
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ } generic;
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
|
|
+ unsigned q_id :16;
|
|
+ unsigned len :16;
|
|
+ unsigned flags :16;
|
|
+ } rx;
|
|
+ struct { /* This *must* have same layout as [rx]. */
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
|
|
+ unsigned q_id :16;
|
|
+ unsigned len :16;
|
|
+ unsigned flags :16;
|
|
+ unsigned subtype :16;
|
|
+ } rx_discard;
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
|
|
+ unsigned q_id :16;
|
|
+ } tx;
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/
|
|
+ unsigned q_id :16;
|
|
+ unsigned subtype :16;
|
|
+ } tx_error;
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ unsigned q_id :16;
|
|
+ } rx_no_desc_trunc;
|
|
+ struct {
|
|
+ ef_hw_event ev;
|
|
+ unsigned type :16;
|
|
+ unsigned data;
|
|
+ } sw;
|
|
+} ef_event;
|
|
+
|
|
+
|
|
+#define EF_EVENT_TYPE(e) ((e).generic.type)
|
|
+enum {
|
|
+ /** Good data was received. */
|
|
+ EF_EVENT_TYPE_RX,
|
|
+ /** Packets have been sent. */
|
|
+ EF_EVENT_TYPE_TX,
|
|
+ /** Data received and buffer consumed, but something is wrong. */
|
|
+ EF_EVENT_TYPE_RX_DISCARD,
|
|
+ /** Transmit of packet failed. */
|
|
+ EF_EVENT_TYPE_TX_ERROR,
|
|
+ /** Received packet was truncated due to lack of descriptors. */
|
|
+ EF_EVENT_TYPE_RX_NO_DESC_TRUNC,
|
|
+ /** Software generated event. */
|
|
+ EF_EVENT_TYPE_SW,
|
|
+ /** Event queue overflow. */
|
|
+ EF_EVENT_TYPE_OFLOW,
|
|
+};
|
|
+
|
|
+#define EF_EVENT_RX_BYTES(e) ((e).rx.len)
|
|
+#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id)
|
|
+#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT)
|
|
+#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP)
|
|
+#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK)
|
|
+#define EF_EVENT_FLAG_SOP 0x1
|
|
+#define EF_EVENT_FLAG_CONT 0x2
|
|
+#define EF_EVENT_FLAG_ISCSI_OK 0x4
|
|
+
|
|
+#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id)
|
|
+
|
|
+#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id)
|
|
+#define EF_EVENT_RX_DISCARD_LEN(e) ((e).rx_discard.len)
|
|
+#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype)
|
|
+enum {
|
|
+ EF_EVENT_RX_DISCARD_CSUM_BAD,
|
|
+ EF_EVENT_RX_DISCARD_CRC_BAD,
|
|
+ EF_EVENT_RX_DISCARD_TRUNC,
|
|
+ EF_EVENT_RX_DISCARD_RIGHTS,
|
|
+ EF_EVENT_RX_DISCARD_OTHER,
|
|
+};
|
|
+
|
|
+#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id)
|
|
+#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype)
|
|
+enum {
|
|
+ EF_EVENT_TX_ERROR_RIGHTS,
|
|
+ EF_EVENT_TX_ERROR_OFLOW,
|
|
+ EF_EVENT_TX_ERROR_2BIG,
|
|
+ EF_EVENT_TX_ERROR_BUS,
|
|
+};
|
|
+
|
|
+#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id)
|
|
+
|
|
+#define EF_EVENT_SW_DATA_MASK 0xffff
|
|
+#define EF_EVENT_SW_DATA(e) ((e).sw.data)
|
|
+
|
|
+#define EF_EVENT_FMT "[ev:%x:%08x:%08x]"
|
|
+#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type, \
|
|
+ (unsigned) (e).generic.ev.u32[1], \
|
|
+ (unsigned) (e).generic.ev.u32[0]
|
|
+
|
|
+#define EF_GET_HW_EV(e) ((e).generic.ev)
|
|
+#define EF_GET_HW_EV_PTR(e) (&(e).generic.ev)
|
|
+#define EF_GET_HW_EV_U64(e) ((e).generic.ev.u64[0])
|
|
+
|
|
+
|
|
+/* ***************** */
|
|
+
|
|
+/*! Used by netif shared state. Must use types of explicit size. */
|
|
+typedef struct {
|
|
+ uint16_t rx_last_desc_ptr; /* for RX duplicates */
|
|
+ uint8_t bad_sop; /* bad SOP detected */
|
|
+ uint8_t frag_num; /* next fragment #, 0=>SOP */
|
|
+} ef_rx_dup_state_t;
|
|
+
|
|
+
|
|
+/* Max number of ports on any SF NIC. */
|
|
+#define EFAB_DMAQS_PER_EVQ_MAX 32
|
|
+
|
|
+typedef struct {
|
|
+ ef_eventq_ptr evq_ptr;
|
|
+ int32_t trashed;
|
|
+ ef_rx_dup_state_t rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX];
|
|
+} ef_eventq_state;
|
|
+
|
|
+
|
|
+/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec]. An
|
|
+** array of these is used to designate a scatter/gather list of I/O
|
|
+** buffers.
|
|
+*/
|
|
+typedef struct {
|
|
+ ef_addr iov_base EF_VI_ALIGN(8);
|
|
+ unsigned iov_len;
|
|
+} ef_iovec;
|
|
+
|
|
+/* Falcon constants */
|
|
+#define TX_EV_DESC_PTR_LBN 0
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * ef_vi **************************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+enum ef_vi_flags {
|
|
+ EF_VI_RX_SCATTER = 0x1,
|
|
+ EF_VI_ISCSI_RX_HDIG = 0x2,
|
|
+ EF_VI_ISCSI_TX_HDIG = 0x4,
|
|
+ EF_VI_ISCSI_RX_DDIG = 0x8,
|
|
+ EF_VI_ISCSI_TX_DDIG = 0x10,
|
|
+ EF_VI_TX_PHYS_ADDR = 0x20,
|
|
+ EF_VI_RX_PHYS_ADDR = 0x40,
|
|
+ EF_VI_TX_IP_CSUM_DIS = 0x80,
|
|
+ EF_VI_TX_TCPUDP_CSUM_DIS= 0x100,
|
|
+ EF_VI_TX_TCPUDP_ONLY = 0x200,
|
|
+ /* Flags in range 0xXXXX0000 are for internal use. */
|
|
+};
|
|
+
|
|
+typedef struct {
|
|
+ uint32_t added;
|
|
+ uint32_t removed;
|
|
+} ef_vi_txq_state;
|
|
+
|
|
+typedef struct {
|
|
+ uint32_t added;
|
|
+ uint32_t removed;
|
|
+} ef_vi_rxq_state;
|
|
+
|
|
+typedef struct {
|
|
+ uint32_t mask;
|
|
+ void* doorbell;
|
|
+ void* descriptors;
|
|
+ uint16_t* ids;
|
|
+ unsigned misalign_mask;
|
|
+} ef_vi_txq;
|
|
+
|
|
+typedef struct {
|
|
+ uint32_t mask;
|
|
+ void* doorbell;
|
|
+ void* descriptors;
|
|
+ uint16_t* ids;
|
|
+} ef_vi_rxq;
|
|
+
|
|
+typedef struct {
|
|
+ ef_eventq_state evq;
|
|
+ ef_vi_txq_state txq;
|
|
+ ef_vi_rxq_state rxq;
|
|
+ /* Followed by request id fifos. */
|
|
+} ef_vi_state;
|
|
+
|
|
+/*! \i_ef_vi A virtual interface.
|
|
+**
|
|
+** An [ef_vi] represents a virtual interface on a specific NIC. A
|
|
+** virtual interface is a collection of an event queue and two DMA queues
|
|
+** used to pass Ethernet frames between the transport implementation and
|
|
+** the network.
|
|
+*/
|
|
+typedef struct ef_vi {
|
|
+ unsigned magic;
|
|
+
|
|
+ unsigned vi_resource_id;
|
|
+ unsigned vi_resource_handle_hack;
|
|
+ unsigned vi_i;
|
|
+
|
|
+ char* vi_mem_mmap_ptr;
|
|
+ int vi_mem_mmap_bytes;
|
|
+ char* vi_io_mmap_ptr;
|
|
+ int vi_io_mmap_bytes;
|
|
+
|
|
+ ef_eventq_state* evq_state;
|
|
+ char* evq_base;
|
|
+ unsigned evq_mask;
|
|
+ ef_vi_ioaddr_t evq_timer_reg;
|
|
+
|
|
+ ef_vi_txq vi_txq;
|
|
+ ef_vi_rxq vi_rxq;
|
|
+ ef_vi_state* ep_state;
|
|
+ enum ef_vi_flags vi_flags;
|
|
+} ef_vi;
|
|
+
|
|
+
|
|
+enum ef_vi_arch {
|
|
+ EF_VI_ARCH_FALCON,
|
|
+};
|
|
+
|
|
+
|
|
+struct ef_vi_nic_type {
|
|
+ unsigned char arch;
|
|
+ char variant;
|
|
+ unsigned char revision;
|
|
+};
|
|
+
|
|
+
|
|
+/* This structure is opaque to the client & used to pass mapping data
|
|
+ * from the resource manager to the ef_vi lib. for ef_vi_init().
|
|
+ */
|
|
+struct vi_mappings {
|
|
+ uint32_t signature;
|
|
+# define VI_MAPPING_VERSION 0x02 /*Byte: Increment me if struct altered*/
|
|
+# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION)
|
|
+
|
|
+ struct ef_vi_nic_type nic_type;
|
|
+
|
|
+ int vi_instance;
|
|
+
|
|
+ unsigned evq_bytes;
|
|
+ char* evq_base;
|
|
+ ef_vi_ioaddr_t evq_timer_reg;
|
|
+
|
|
+ unsigned rx_queue_capacity;
|
|
+ ef_vi_ioaddr_t rx_dma_ef1;
|
|
+ char* rx_dma_falcon;
|
|
+ ef_vi_ioaddr_t rx_bell;
|
|
+
|
|
+ unsigned tx_queue_capacity;
|
|
+ ef_vi_ioaddr_t tx_dma_ef1;
|
|
+ char* tx_dma_falcon;
|
|
+ ef_vi_ioaddr_t tx_bell;
|
|
+};
|
|
+/* This is used by clients to allocate a suitably sized buffer for the
|
|
+ * resource manager to fill & ef_vi_init() to use. */
|
|
+#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings))
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * ef_config **********************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+struct ef_config_t {
|
|
+ int log; /* debug logging level */
|
|
+};
|
|
+
|
|
+extern struct ef_config_t ef_config;
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * ef_vi **************************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/* Initialise [data_area] with information required to initialise an ef_vi.
|
|
+ * In the following, an unused param should be set to NULL. Note the case
|
|
+ * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this
|
|
+ * must be NULL.
|
|
+ *
|
|
+ * \param data_area [in,out] required, must ref at least VI_MAPPINGS_SIZE
|
|
+ * bytes
|
|
+ * \param evq_capacity [in] number of events in event queue. Specify 0 for
|
|
+ * no event queue.
|
|
+ * \param rxq_capacity [in] number of descriptors in RX DMA queue. Specify
|
|
+ * 0 for no RX queue.
|
|
+ * \param txq_capacity [in] number of descriptors in TX DMA queue. Specify
|
|
+ * 0 for no TX queue.
|
|
+ * \param mmap_info [in] mem-map info for resource
|
|
+ * \param io_mmap [in] ef1, required
|
|
+ * falcon, required
|
|
+ * \param iobuf_mmap [in] ef1, UL: unused
|
|
+ * falcon, UL: required
|
|
+ */
|
|
+extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type,
|
|
+ unsigned rxq_capacity,
|
|
+ unsigned txq_capacity, int instance,
|
|
+ void* io_mmap, void* iobuf_mmap_rx,
|
|
+ void* iobuf_mmap_tx, enum ef_vi_flags);
|
|
+
|
|
+
|
|
+extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type,
|
|
+ int instance, unsigned evq_bytes,
|
|
+ void* base, void* timer_reg);
|
|
+
|
|
+ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi)
|
|
+{
|
|
+ return vi->vi_resource_id;
|
|
+}
|
|
+
|
|
+ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi)
|
|
+{
|
|
+ return vi->vi_flags;
|
|
+}
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Receive interface **************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/*! \i_ef_vi Returns the amount of space in the RX descriptor ring.
|
|
+**
|
|
+** \return the amount of space in the queue.
|
|
+*/
|
|
+ef_vi_inline int ef_vi_receive_space(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
|
|
+ return vi->vi_rxq.mask - (qs->added - qs->removed);
|
|
+}
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Returns the fill level of the RX descriptor ring.
|
|
+**
|
|
+** \return the fill level of the queue.
|
|
+*/
|
|
+ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
|
|
+ return qs->added - qs->removed;
|
|
+}
|
|
+
|
|
+
|
|
+ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi)
|
|
+{
|
|
+ return vi->vi_rxq.mask;
|
|
+}
|
|
+
|
|
+/*! \i_ef_vi Complete a receive operation.
|
|
+**
|
|
+** When a receive completion event is received, it should be passed to
|
|
+** this function. The request-id for the buffer that the packet was
|
|
+** delivered to is returned.
|
|
+**
|
|
+** After this function returns, more space may be available in the
|
|
+** receive queue.
|
|
+*/
|
|
+extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*);
|
|
+
|
|
+/*! \i_ef_vi Return request ID indicated by a receive event
|
|
+ */
|
|
+ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi,
|
|
+ const ef_event* ef_ev)
|
|
+{
|
|
+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
|
|
+ return ev->u32[0] & vi->vi_rxq.mask;
|
|
+}
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Form a receive descriptor.
|
|
+**
|
|
+** If \c initial_rx_bytes is zero use a reception size at least as large
|
|
+** as an MTU.
|
|
+*/
|
|
+extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
|
|
+ int intial_rx_bytes);
|
|
+
|
|
+/*! \i_ef_vi Submit initialised receive descriptors to the NIC. */
|
|
+extern void ef_vi_receive_push(ef_vi* vi);
|
|
+
|
|
+/*! \i_ef_vi Post a buffer on the receive queue.
|
|
+**
|
|
+** \return 0 on success, or -EAGAIN if the receive queue is full
|
|
+*/
|
|
+extern int ef_vi_receive_post(ef_vi*, ef_addr addr,
|
|
+ ef_request_id dma_id);
|
|
+
|
|
+/**********************************************************************
|
|
+ * Transmit interface *************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit
|
|
+** queue.
|
|
+**
|
|
+** \return the amount of space in the queue (in descriptors)
|
|
+*/
|
|
+ef_vi_inline int ef_vi_transmit_space(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_txq_state* qs = &vi->ep_state->txq;
|
|
+ return vi->vi_txq.mask - (qs->added - qs->removed);
|
|
+}
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Returns the fill level of the TX descriptor ring.
|
|
+**
|
|
+** \return the fill level of the queue.
|
|
+*/
|
|
+ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_txq_state* qs = &vi->ep_state->txq;
|
|
+ return qs->added - qs->removed;
|
|
+}
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Returns the total capacity of the TX descriptor ring.
|
|
+**
|
|
+** \return the capacity of the queue.
|
|
+*/
|
|
+ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi)
|
|
+{
|
|
+ return vi->vi_txq.mask;
|
|
+}
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Transmit a packet.
|
|
+**
|
|
+** \param bytes must be greater than ETH_ZLEN.
|
|
+** \return -EAGAIN if the transmit queue is full, or 0 on success
|
|
+*/
|
|
+extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id);
|
|
+
|
|
+/*! \i_ef_vi Transmit a packet using a gather list.
|
|
+**
|
|
+** \param iov_len must be greater than zero
|
|
+** \param iov the first must be non-zero in length (but others need not)
|
|
+**
|
|
+** \return -EAGAIN if the queue is full, or 0 on success
|
|
+*/
|
|
+extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len,
|
|
+ ef_request_id dma_id);
|
|
+
|
|
+/*! \i_ef_vi Initialise a DMA request.
|
|
+**
|
|
+** \return -EAGAIN if the queue is full, or 0 on success
|
|
+*/
|
|
+extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes,
|
|
+ ef_request_id dma_id);
|
|
+
|
|
+/*! \i_ef_vi Initialise a DMA request.
|
|
+**
|
|
+** \return -EAGAIN if the queue is full, or 0 on success
|
|
+*/
|
|
+extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len,
|
|
+ ef_request_id dma_id);
|
|
+
|
|
+/*! \i_ef_vi Submit DMA requests to the NIC.
|
|
+**
|
|
+** The DMA requests must have been initialised using
|
|
+** ef_vi_transmit_init() or ef_vi_transmitv_init().
|
|
+*/
|
|
+extern void ef_vi_transmit_push(ef_vi*);
|
|
+
|
|
+
|
|
+/*! \i_ef_vi Maximum number of transmit completions per transmit event. */
|
|
+#define EF_VI_TRANSMIT_BATCH 64
|
|
+
|
|
+/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request
|
|
+** which has been completed by a given transmit completion
|
|
+** event.
|
|
+**
|
|
+** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH
|
|
+** \return the number of valid [ef_request_id]s (can be zero)
|
|
+*/
|
|
+extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*,
|
|
+ ef_request_id* ids);
|
|
+
|
|
+
|
|
+/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */
|
|
+extern int ef_eventq_has_event(ef_vi* vi);
|
|
+
|
|
+/*! \i_ef_event Returns true if there are quite a few events in the event
|
|
+** queue.
|
|
+**
|
|
+** This looks ahead in the event queue, so has the property that it will
|
|
+** not ping-pong a cache-line when it is called concurrently with events
|
|
+** being delivered.
|
|
+*/
|
|
+extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead);
|
|
+
|
|
+/*! Type of function to handle unknown events arriving on event queue
|
|
+** Return CI_TRUE iff the event has been handled.
|
|
+*/
|
|
+typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev);
|
|
+
|
|
+/*! Standard poll exception routine */
|
|
+extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq,
|
|
+ ef_event* ev);
|
|
+
|
|
+/*! \i_ef_event Retrieve events from the event queue, handle RX/TX events
|
|
+** and pass any others to an exception handler function
|
|
+**
|
|
+** \return The number of events retrieved.
|
|
+*/
|
|
+extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
|
|
+ ef_event_handler_fn *exception, void *expt_priv);
|
|
+
|
|
+/*! \i_ef_event Retrieve events from the event queue.
|
|
+**
|
|
+** \return The number of events retrieved.
|
|
+*/
|
|
+ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len)
|
|
+{
|
|
+ return ef_eventq_poll_evs(evq, evs, evs_len,
|
|
+ &ef_eventq_poll_exception, (void*)0);
|
|
+}
|
|
+
|
|
+/*! \i_ef_event Returns the capacity of an event queue. */
|
|
+ef_vi_inline int ef_eventq_capacity(ef_vi* vi)
|
|
+{
|
|
+ return (vi->evq_mask + 1u) / sizeof(ef_hw_event);
|
|
+}
|
|
+
|
|
+/* Returns the instance ID of [vi] */
|
|
+ef_vi_inline unsigned ef_vi_instance(ef_vi* vi)
|
|
+{ return vi->vi_i; }
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Initialisation *****************************************************
|
|
+ **********************************************************************/
|
|
+
|
|
+/*! Return size of state buffer of an initialised VI. */
|
|
+extern int ef_vi_state_bytes(ef_vi*);
|
|
+
|
|
+/*! Return size of buffer needed for VI state given sizes of RX and TX
|
|
+** DMA queues. Queue sizes must be legal sizes (power of 2), or 0 (no
|
|
+** queue).
|
|
+*/
|
|
+extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size);
|
|
+
|
|
+/*! Initialise [ef_vi] from the provided resources. [vvis] must have been
|
|
+** created by ef_make_vi_data() & remains owned by the caller.
|
|
+*/
|
|
+extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state,
|
|
+ ef_eventq_state* evq_state, enum ef_vi_flags);
|
|
+
|
|
+extern void ef_vi_state_init(ef_vi*);
|
|
+extern void ef_eventq_state_init(ef_vi*);
|
|
+
|
|
+/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not
|
|
+** recognised.
|
|
+*/
|
|
+extern int ef_vi_arch_from_efhw_arch(int efhw_arch);
|
|
+
|
|
+
|
|
+#endif /* __EFAB_EF_VI_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/falcon_event.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,346 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Routine to poll event queues.
|
|
+ * \date 2003/03/04
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_lib_ef */
|
|
+#include "ef_vi_internal.h"
|
|
+
|
|
+/* Be worried about this on byteswapped machines */
|
|
+/* Due to crazy chipsets, we see the event words being written in
|
|
+** arbitrary order (bug4539). So test for presence of event must ensure
|
|
+** that both halves have changed from the null.
|
|
+*/
|
|
+# define EF_VI_IS_EVENT(evp) \
|
|
+ ( (((evp)->opaque.a != (uint32_t)-1) && \
|
|
+ ((evp)->opaque.b != (uint32_t)-1)) )
|
|
+
|
|
+
|
|
+#ifdef NDEBUG
|
|
+# define IS_DEBUG 0
|
|
+#else
|
|
+# define IS_DEBUG 1
|
|
+#endif
|
|
+
|
|
+
|
|
+/*! Check for RX events with inconsistent SOP/CONT
|
|
+**
|
|
+** Returns true if this event should be discarded
|
|
+*/
|
|
+ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi,
|
|
+ const ef_vi_qword* ev)
|
|
+{
|
|
+ ef_rx_dup_state_t* rx_dup_state;
|
|
+ uint8_t* bad_sop;
|
|
+
|
|
+ unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
|
|
+ unsigned sop = QWORD_TEST_BIT(RX_SOP, *ev);
|
|
+
|
|
+ ef_assert(vi);
|
|
+ ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX);
|
|
+
|
|
+ rx_dup_state = &vi->evq_state->rx_dup_state[label];
|
|
+ bad_sop = &rx_dup_state->bad_sop;
|
|
+
|
|
+ if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) {
|
|
+ *bad_sop = (*bad_sop && !sop);
|
|
+ }
|
|
+ else {
|
|
+ unsigned cont = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev);
|
|
+ uint8_t *frag_num = &rx_dup_state->frag_num;
|
|
+
|
|
+ /* bad_sop should latch till the next sop */
|
|
+ *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) );
|
|
+
|
|
+ /* we do not check the number of bytes relative to the
|
|
+ * fragment number and size of the user rx buffer here
|
|
+ * because we don't know the size of the user rx
|
|
+ * buffer - we probably should perform this check in
|
|
+ * the nearest code calling this though.
|
|
+ */
|
|
+ *frag_num = cont ? (*frag_num + 1) : 0;
|
|
+ }
|
|
+
|
|
+ return *bad_sop;
|
|
+}
|
|
+
|
|
+
|
|
+ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out,
|
|
+ const ef_vi_qword* ev)
|
|
+{
|
|
+ unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
|
|
+ uint16_t desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev);
|
|
+ ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id];
|
|
+
|
|
+ if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) {
|
|
+ rx_dup_state->rx_last_desc_ptr = desc_ptr;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ rx_dup_state->rx_last_desc_ptr = desc_ptr;
|
|
+ rx_dup_state->bad_sop = 1;
|
|
+#ifndef NDEBUG
|
|
+ rx_dup_state->frag_num = 0;
|
|
+#endif
|
|
+ BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev));
|
|
+ BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev));
|
|
+ BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0);
|
|
+ ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC;
|
|
+ ev_out->rx_no_desc_trunc.q_id = q_id;
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+
|
|
+ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev)
|
|
+{
|
|
+ if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) {
|
|
+ ev_out->rx.type = EF_EVENT_TYPE_RX;
|
|
+ ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
|
|
+ ev_out->rx.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
|
|
+ if( QWORD_TEST_BIT(RX_SOP, *ev) )
|
|
+ ev_out->rx.flags = EF_EVENT_FLAG_SOP;
|
|
+ else
|
|
+ ev_out->rx.flags = 0;
|
|
+ if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
|
|
+ ev_out->rx.flags |= EF_EVENT_FLAG_CONT;
|
|
+ if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
|
|
+ ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK;
|
|
+ }
|
|
+ else {
|
|
+ ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD;
|
|
+ ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev);
|
|
+ ev_out->rx_discard.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev);
|
|
+#if 1 /* hack for ptloop compatability: ?? TODO purge */
|
|
+ if( QWORD_TEST_BIT(RX_SOP, *ev) )
|
|
+ ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP;
|
|
+ else
|
|
+ ev_out->rx_discard.flags = 0;
|
|
+ if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) )
|
|
+ ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT;
|
|
+ if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) )
|
|
+ ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK;
|
|
+#endif
|
|
+ /* Order matters here: more fundamental errors first. */
|
|
+ if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) )
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_RIGHTS;
|
|
+ else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) )
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_TRUNC;
|
|
+ else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) )
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_CRC_BAD;
|
|
+ else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) )
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_CSUM_BAD;
|
|
+ else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) )
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_CSUM_BAD;
|
|
+ else
|
|
+ ev_out->rx_discard.subtype =
|
|
+ EF_EVENT_RX_DISCARD_OTHER;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev)
|
|
+{
|
|
+ /* Danger danger! No matter what we ask for wrt batching, we
|
|
+ ** will get a batched event every 16 descriptors, and we also
|
|
+ ** get dma-queue-empty events. i.e. Duplicates are expected.
|
|
+ **
|
|
+ ** In addition, if it's been requested in the descriptor, we
|
|
+ ** get an event per descriptor. (We don't currently request
|
|
+ ** this).
|
|
+ */
|
|
+ if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) {
|
|
+ ev_out->tx.type = EF_EVENT_TYPE_TX;
|
|
+ ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
|
|
+ }
|
|
+ else {
|
|
+ ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR;
|
|
+ ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev);
|
|
+ if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) ))
|
|
+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS;
|
|
+ else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) ))
|
|
+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW;
|
|
+ else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) ))
|
|
+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG;
|
|
+ else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) ))
|
|
+ ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void mark_bad(ef_event* ev)
|
|
+{
|
|
+ ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN);
|
|
+}
|
|
+
|
|
+
|
|
+int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len,
|
|
+ ef_event_handler_fn *exception, void *expt_priv)
|
|
+{
|
|
+ int evs_len_orig = evs_len;
|
|
+
|
|
+ EF_VI_CHECK_EVENT_Q(evq);
|
|
+ ef_assert(evs);
|
|
+ ef_assert_gt(evs_len, 0);
|
|
+
|
|
+ if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) ))
|
|
+ goto overflow;
|
|
+
|
|
+ do {
|
|
+ { /* Read the event out of the ring, then fiddle with
|
|
+ * copied version. Reason is that the ring is
|
|
+ * likely to get pushed out of cache by another
|
|
+ * event being delivered by hardware. */
|
|
+ ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0);
|
|
+ if( ! EF_VI_IS_EVENT(ev) )
|
|
+ break;
|
|
+ evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64);
|
|
+ evq->evq_state->evq_ptr += sizeof(ef_vi_event);
|
|
+ ev->u64 = (uint64_t)(int64_t) -1;
|
|
+ }
|
|
+
|
|
+ /* Ugly: Exploit the fact that event code lies in top
|
|
+ * bits of event. */
|
|
+ ef_assert_ge(EV_CODE_LBN, 32u);
|
|
+ switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
|
|
+ case RX_IP_EV_DECODE:
|
|
+ /* Look for duplicate desc_ptr: it signals
|
|
+ * that a jumbo frame was truncated because we
|
|
+ * ran out of descriptors. */
|
|
+ if(unlikely( falcon_rx_check_dup
|
|
+ (evq, evs, &evs->generic.ev) )) {
|
|
+ --evs_len;
|
|
+ ++evs;
|
|
+ break;
|
|
+ }
|
|
+ else {
|
|
+ /* Cope with FalconA1 bugs where RX
|
|
+ * gives inconsistent RX events Mark
|
|
+ * events as bad until SOP becomes
|
|
+ * consistent again
|
|
+ * ef_eventq_is_rx_sop_cont_bad() has
|
|
+ * side effects - order is important
|
|
+ */
|
|
+ if(unlikely
|
|
+ (ef_eventq_is_rx_sop_cont_bad_efab
|
|
+ (evq, &evs->generic.ev) )) {
|
|
+ mark_bad(evs);
|
|
+ }
|
|
+ }
|
|
+ falcon_rx_event(evs, &evs->generic.ev);
|
|
+ --evs_len;
|
|
+ ++evs;
|
|
+ break;
|
|
+
|
|
+ case TX_IP_EV_DECODE:
|
|
+ falcon_tx_event(evs, &evs->generic.ev);
|
|
+ --evs_len;
|
|
+ ++evs;
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ } while( evs_len );
|
|
+
|
|
+ return evs_len_orig - evs_len;
|
|
+
|
|
+
|
|
+ overflow:
|
|
+ evs->generic.type = EF_EVENT_TYPE_OFLOW;
|
|
+ evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1);
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+
|
|
+int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev)
|
|
+{
|
|
+ int /*bool*/ handled = 0;
|
|
+
|
|
+ switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) {
|
|
+ case DRIVER_EV_DECODE:
|
|
+ if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) ==
|
|
+ EVQ_INIT_DONE_EV_DECODE )
|
|
+ /* EVQ initialised event: ignore. */
|
|
+ handled = 1;
|
|
+ break;
|
|
+ }
|
|
+ return handled;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_eventq_iterate(ef_vi* vi,
|
|
+ void (*fn)(void* arg, ef_vi*, int rel_pos,
|
|
+ int abs_pos, void* event),
|
|
+ void* arg, int stop_at_end)
|
|
+{
|
|
+ int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event);
|
|
+
|
|
+ for( i = 0; i < size_evs; ++i ) {
|
|
+ ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i);
|
|
+ if( EF_VI_IS_EVENT(e) )
|
|
+ fn(arg, vi, i,
|
|
+ EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event),
|
|
+ e);
|
|
+ else if( stop_at_end )
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+int ef_eventq_has_event(ef_vi* vi)
|
|
+{
|
|
+ return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0));
|
|
+}
|
|
+
|
|
+
|
|
+int ef_eventq_has_many_events(ef_vi* vi, int look_ahead)
|
|
+{
|
|
+ ef_assert_ge(look_ahead, 0);
|
|
+ return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead));
|
|
+}
|
|
+
|
|
+
|
|
+int ef_eventq_has_rx_event(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_event* ev;
|
|
+ int i, n_evs = 0;
|
|
+
|
|
+ for( i = 0; EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) {
|
|
+ ev = EF_VI_EVENT_PTR(vi, i);
|
|
+ if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX ) n_evs++;
|
|
+ }
|
|
+ return n_evs;
|
|
+}
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/falcon_vi.c 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,473 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr, stg
|
|
+ * \brief Falcon-specific VI
|
|
+ * \date 2006/11/30
|
|
+ */
|
|
+
|
|
+#include "ef_vi_internal.h"
|
|
+
|
|
+
|
|
+#define EFVI_FALCON_DMA_TX_FRAG 1
|
|
+
|
|
+
|
|
+/* TX descriptor for both physical and virtual packet transfers */
|
|
+typedef union {
|
|
+ uint32_t dword[2];
|
|
+} ef_vi_falcon_dma_tx_buf_desc;
|
|
+typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc;
|
|
+
|
|
+
|
|
+/* RX descriptor for physical addressed transfers */
|
|
+typedef union {
|
|
+ uint32_t dword[2];
|
|
+} ef_vi_falcon_dma_rx_phys_desc;
|
|
+
|
|
+
|
|
+/* RX descriptor for virtual packet transfers */
|
|
+typedef struct {
|
|
+ uint32_t dword[1];
|
|
+} ef_vi_falcon_dma_rx_buf_desc;
|
|
+
|
|
+/* Buffer table index */
|
|
+typedef uint32_t ef_vi_buffer_addr_t;
|
|
+
|
|
+ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr)
|
|
+{
|
|
+ return (src_dma_addr & __FALCON_MASK(46, int64_t));
|
|
+}
|
|
+
|
|
+/*! Setup a physical address based descriptor with a specified length */
|
|
+ef_vi_inline void
|
|
+__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa,
|
|
+ ef_vi_falcon_dma_rx_phys_desc *desc,
|
|
+ int bytes)
|
|
+{
|
|
+ int region = 0; /* TODO fixme */
|
|
+ int64_t dest = dma_addr_to_u46(dest_pa); /* lower 46 bits */
|
|
+
|
|
+ DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN), RX_KER_BUF_SIZE_WIDTH);
|
|
+ DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH);
|
|
+
|
|
+ LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
|
|
+
|
|
+ RANGECHCK(bytes, RX_KER_BUF_SIZE_WIDTH);
|
|
+ RANGECHCK(region, RX_KER_BUF_REGION_WIDTH);
|
|
+
|
|
+ ef_assert(desc);
|
|
+
|
|
+ desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) |
|
|
+ (region << __DW2(RX_KER_BUF_REGION_LBN)) |
|
|
+ (HIGH(dest,
|
|
+ RX_KER_BUF_ADR_LBN,
|
|
+ RX_KER_BUF_ADR_WIDTH)));
|
|
+
|
|
+ desc->dword[0] = LOW(dest,
|
|
+ RX_KER_BUF_ADR_LBN,
|
|
+ RX_KER_BUF_ADR_WIDTH);
|
|
+}
|
|
+
|
|
+/*! Setup a virtual buffer descriptor for an IPMODE transfer */
|
|
+ef_vi_inline void
|
|
+__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes,
|
|
+ int port, int frag,
|
|
+ ef_vi_falcon_dma_tx_buf_desc *desc)
|
|
+{
|
|
+ DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH);
|
|
+ DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH);
|
|
+ DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH);
|
|
+ LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH);
|
|
+ DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH);
|
|
+
|
|
+ RANGECHCK(bytes, TX_USR_BYTE_CNT_WIDTH);
|
|
+ RANGECHCK(port, TX_USR_PORT_WIDTH);
|
|
+ RANGECHCK(frag, TX_USR_CONT_WIDTH);
|
|
+ RANGECHCK(buf_id, TX_USR_BUF_ID_WIDTH);
|
|
+ RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH);
|
|
+
|
|
+ ef_assert(desc);
|
|
+
|
|
+ desc->dword[1] = ((port << __DW2(TX_USR_PORT_LBN)) |
|
|
+ (frag << __DW2(TX_USR_CONT_LBN)) |
|
|
+ (bytes << __DW2(TX_USR_BYTE_CNT_LBN)) |
|
|
+ (HIGH(buf_id,
|
|
+ TX_USR_BUF_ID_LBN,
|
|
+ TX_USR_BUF_ID_WIDTH)));
|
|
+
|
|
+ desc->dword[0] = ((LOW(buf_id,
|
|
+ TX_USR_BUF_ID_LBN,
|
|
+ (TX_USR_BUF_ID_WIDTH))) |
|
|
+ (buf_ofs << TX_USR_BYTE_OFS_LBN));
|
|
+}
|
|
+
|
|
+ef_vi_inline void
|
|
+falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes,
|
|
+ int port, int frag,
|
|
+ ef_vi_falcon_dma_tx_buf_desc *desc)
|
|
+{
|
|
+ /* TODO FIXME [buf_vaddr] consists of the buffer index in the
|
|
+ ** high bits, and an offset in the low bits. Assumptions
|
|
+ ** permate the code that these can be rolled into one 32bit
|
|
+ ** value, so this is currently preserved for Falcon. But we
|
|
+ ** should change to support 8K pages
|
|
+ */
|
|
+ unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
|
|
+ unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
|
|
+
|
|
+ __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc);
|
|
+}
|
|
+
|
|
+ef_vi_inline void
|
|
+falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port,
|
|
+ int frag, ef_vi_falcon_dma_tx_buf_desc *desc)
|
|
+{
|
|
+ falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc);
|
|
+}
|
|
+
|
|
+/*! Setup a virtual buffer based descriptor */
|
|
+ef_vi_inline void
|
|
+__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs,
|
|
+ ef_vi_falcon_dma_rx_buf_desc *desc)
|
|
+{
|
|
+ /* check alignment of buffer offset and pack */
|
|
+ ef_assert((buf_ofs & 0x1) == 0);
|
|
+
|
|
+ buf_ofs >>= 1;
|
|
+
|
|
+ DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH);
|
|
+ DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH);
|
|
+
|
|
+ RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH);
|
|
+ RANGECHCK(buf_id, RX_USR_BUF_ID_WIDTH);
|
|
+
|
|
+ ef_assert(desc);
|
|
+
|
|
+ desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) |
|
|
+ (buf_id << RX_USR_BUF_ID_LBN));
|
|
+}
|
|
+
|
|
+ef_vi_inline void
|
|
+falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr,
|
|
+ ef_vi_falcon_dma_rx_buf_desc *desc)
|
|
+{
|
|
+ /* TODO FIXME [buf_vaddr] consists of the buffer index in the
|
|
+ ** high bits, and an offset in the low bits. Assumptions
|
|
+ ** permeate the code that these can be rolled into one 32bit
|
|
+ ** value, so this is currently preserved for Falcon. But we
|
|
+ ** should change to support 8K pages
|
|
+ */
|
|
+ unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr);
|
|
+ unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr);
|
|
+
|
|
+ __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc);
|
|
+}
|
|
+
|
|
+ef_vi_inline void
|
|
+falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr,
|
|
+ ef_vi_falcon_dma_rx_buf_desc *desc)
|
|
+{
|
|
+ falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc);
|
|
+}
|
|
+
|
|
+
|
|
+ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr)
|
|
+{
|
|
+ return (ef_vi_dma_addr_t) efaddr;
|
|
+}
|
|
+
|
|
+
|
|
+/*! Convert between an ef_addr and a buffer table index
|
|
+** Assert that this was not a physical address
|
|
+*/
|
|
+ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr)
|
|
+{
|
|
+ ef_assert(efaddr < ((uint64_t)1 << 32) );
|
|
+
|
|
+ return (ef_vi_buffer_addr_t) efaddr;
|
|
+}
|
|
+
|
|
+
|
|
+/*! Setup an physical address based descriptor for an IPMODE transfer */
|
|
+ef_vi_inline void
|
|
+falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes,
|
|
+ int port, int frag,
|
|
+ ef_vi_falcon_dma_tx_phys_desc *desc)
|
|
+{
|
|
+
|
|
+ int region = 0; /* FIXME */
|
|
+ int64_t src = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */
|
|
+
|
|
+ DWCHCK(__DW2(TX_KER_PORT_LBN), TX_KER_PORT_WIDTH);
|
|
+ DWCHCK(__DW2(TX_KER_CONT_LBN), TX_KER_CONT_WIDTH);
|
|
+ DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN), TX_KER_BYTE_CNT_WIDTH);
|
|
+ DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH);
|
|
+
|
|
+ LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH);
|
|
+
|
|
+ RANGECHCK(port, TX_KER_PORT_WIDTH);
|
|
+ RANGECHCK(frag, TX_KER_CONT_WIDTH);
|
|
+ RANGECHCK(bytes, TX_KER_BYTE_CNT_WIDTH);
|
|
+ RANGECHCK(region, TX_KER_BUF_REGION_WIDTH);
|
|
+
|
|
+ desc->dword[1] = ((port << __DW2(TX_KER_PORT_LBN)) |
|
|
+ (frag << __DW2(TX_KER_CONT_LBN)) |
|
|
+ (bytes << __DW2(TX_KER_BYTE_CNT_LBN)) |
|
|
+ (region << __DW2(TX_KER_BUF_REGION_LBN)) |
|
|
+ (HIGH(src,
|
|
+ TX_KER_BUF_ADR_LBN,
|
|
+ TX_KER_BUF_ADR_WIDTH)));
|
|
+
|
|
+ ef_assert_equal(TX_KER_BUF_ADR_LBN, 0);
|
|
+ desc->dword[0] = (uint32_t) src_dma_addr;
|
|
+}
|
|
+
|
|
+
|
|
+void falcon_vi_init(ef_vi* vi, void* vvis)
|
|
+{
|
|
+ struct vi_mappings *vm = (struct vi_mappings*)vvis;
|
|
+ uint16_t* ids;
|
|
+
|
|
+ ef_assert(vi);
|
|
+ ef_assert(vvis);
|
|
+ ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE);
|
|
+ ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON);
|
|
+
|
|
+ /* Initialise masks to zero, so that ef_vi_state_init() will
|
|
+ ** not do any harm when we don't have DMA queues. */
|
|
+ vi->vi_rxq.mask = vi->vi_txq.mask = 0;
|
|
+
|
|
+ /* Used for BUG5391_WORKAROUND. */
|
|
+ vi->vi_txq.misalign_mask = 0;
|
|
+
|
|
+ /* Initialise doorbell addresses to a distinctive small value
|
|
+ ** which will cause a segfault, to trap doorbell pushes to VIs
|
|
+ ** without DMA queues. */
|
|
+ vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb;
|
|
+
|
|
+ ids = (uint16_t*) (vi->ep_state + 1);
|
|
+
|
|
+ if( vm->tx_queue_capacity ) {
|
|
+ vi->vi_txq.mask = vm->tx_queue_capacity - 1;
|
|
+ vi->vi_txq.doorbell = vm->tx_bell + 12;
|
|
+ vi->vi_txq.descriptors = vm->tx_dma_falcon;
|
|
+ vi->vi_txq.ids = ids;
|
|
+ ids += vi->vi_txq.mask + 1;
|
|
+ /* Check that the id fifo fits in the space allocated. */
|
|
+ ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity),
|
|
+ (char*) vi->ep_state
|
|
+ + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
|
|
+ vm->tx_queue_capacity));
|
|
+ }
|
|
+ if( vm->rx_queue_capacity ) {
|
|
+ vi->vi_rxq.mask = vm->rx_queue_capacity - 1;
|
|
+ vi->vi_rxq.doorbell = vm->rx_bell + 12;
|
|
+ vi->vi_rxq.descriptors = vm->rx_dma_falcon;
|
|
+ vi->vi_rxq.ids = ids;
|
|
+ /* Check that the id fifo fits in the space allocated. */
|
|
+ ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity),
|
|
+ (char*) vi->ep_state
|
|
+ + ef_vi_calc_state_bytes(vm->rx_queue_capacity,
|
|
+ vm->tx_queue_capacity));
|
|
+ }
|
|
+
|
|
+ if( vm->nic_type.variant == 'A' ) {
|
|
+ vi->vi_txq.misalign_mask = 15; /* BUG5391_WORKAROUND */
|
|
+ vi->vi_flags |= EF_VI_BUG5692_WORKAROUND;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len,
|
|
+ ef_request_id dma_id)
|
|
+{
|
|
+ ef_vi_txq* q = &vi->vi_txq;
|
|
+ ef_vi_txq_state* qs = &vi->ep_state->txq;
|
|
+ ef_vi_falcon_dma_tx_buf_desc* dp;
|
|
+ unsigned len, dma_len, di;
|
|
+ unsigned added_save = qs->added;
|
|
+ ef_addr dma_addr;
|
|
+ unsigned last_len = 0;
|
|
+
|
|
+ ef_assert(iov_len > 0);
|
|
+ ef_assert(iov);
|
|
+ ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id);
|
|
+ ef_assert_nequal(dma_id, 0xffff);
|
|
+
|
|
+ dma_addr = iov->iov_base;
|
|
+ len = iov->iov_len;
|
|
+
|
|
+ if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) {
|
|
+ /* Last 4 bytes of placeholder for digest must be
|
|
+ * removed for h/w */
|
|
+ ef_assert(len > 4);
|
|
+ last_len = iov[iov_len - 1].iov_len;
|
|
+ if( last_len <= 4 ) {
|
|
+ ef_assert(iov_len > 1);
|
|
+ --iov_len;
|
|
+ last_len = iov[iov_len - 1].iov_len - (4 - last_len);
|
|
+ }
|
|
+ else {
|
|
+ last_len = iov[iov_len - 1].iov_len - 4;
|
|
+ }
|
|
+ if( iov_len == 1 )
|
|
+ len = last_len;
|
|
+ }
|
|
+
|
|
+ while( 1 ) {
|
|
+ if( qs->added - qs->removed >= q->mask ) {
|
|
+ qs->added = added_save;
|
|
+ return -EAGAIN;
|
|
+ }
|
|
+
|
|
+ dma_len = (~((unsigned) dma_addr) & 0xfff) + 1;
|
|
+ if( dma_len > len ) dma_len = len;
|
|
+ { /* BUG5391_WORKAROUND */
|
|
+ unsigned misalign =
|
|
+ (unsigned) dma_addr & q->misalign_mask;
|
|
+ if( misalign && dma_len + misalign > 512 )
|
|
+ dma_len = 512 - misalign;
|
|
+ }
|
|
+
|
|
+ di = qs->added++ & q->mask;
|
|
+ dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di;
|
|
+ if( vi->vi_flags & EF_VI_TX_PHYS_ADDR )
|
|
+ falcon_dma_tx_calc_ip_phys
|
|
+ (ef_physaddr(dma_addr), dma_len, /*port*/ 0,
|
|
+ (iov_len == 1 && dma_len == len) ? 0 :
|
|
+ EFVI_FALCON_DMA_TX_FRAG, dp);
|
|
+ else
|
|
+ falcon_dma_tx_calc_ip_buf
|
|
+ (ef_bufaddr(dma_addr), dma_len, /*port*/ 0,
|
|
+ (iov_len == 1 && dma_len == len) ? 0 :
|
|
+ EFVI_FALCON_DMA_TX_FRAG, dp);
|
|
+
|
|
+ dma_addr += dma_len;
|
|
+ len -= dma_len;
|
|
+
|
|
+ if( len == 0 ) {
|
|
+ if( --iov_len == 0 ) break;
|
|
+ ++iov;
|
|
+ dma_addr = iov->iov_base;
|
|
+ len = iov->iov_len;
|
|
+ if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) &&
|
|
+ (iov_len == 1) )
|
|
+ len = last_len;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ q->ids[di] = (uint16_t) dma_id;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_vi_transmit_push(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_wiob();
|
|
+ writel((vi->ep_state->txq.added & vi->vi_txq.mask) <<
|
|
+ __DW4(TX_DESC_WPTR_LBN),
|
|
+ vi->vi_txq.doorbell);
|
|
+}
|
|
+
|
|
+
|
|
+/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial
|
|
+** receive descriptor here if physical addressing is being used. A value of
|
|
+** zero represents 16384 bytes. This is okay, because caller must provide a
|
|
+** buffer than is > MTU, and mac should filter anything bigger than that.
|
|
+*/
|
|
+int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id,
|
|
+ int initial_rx_bytes)
|
|
+{
|
|
+ ef_vi_rxq* q = &vi->vi_rxq;
|
|
+ ef_vi_rxq_state* qs = &vi->ep_state->rxq;
|
|
+ unsigned di;
|
|
+
|
|
+ if( ef_vi_receive_space(vi) ) {
|
|
+ di = qs->added++ & q->mask;
|
|
+ ef_assert_equal(q->ids[di], 0xffff);
|
|
+ q->ids[di] = (uint16_t) dma_id;
|
|
+
|
|
+ if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) {
|
|
+ ef_vi_falcon_dma_rx_buf_desc* dp;
|
|
+ dp = (ef_vi_falcon_dma_rx_buf_desc*)
|
|
+ q->descriptors + di;
|
|
+ falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp);
|
|
+ }
|
|
+ else {
|
|
+ ef_vi_falcon_dma_rx_phys_desc* dp;
|
|
+ dp = (ef_vi_falcon_dma_rx_phys_desc*)
|
|
+ q->descriptors + di;
|
|
+ __falcon_dma_rx_calc_ip_phys(addr, dp,
|
|
+ initial_rx_bytes);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -EAGAIN;
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_receive_post(ef_vi* vi, ef_addr addr, ef_request_id dma_id)
|
|
+{
|
|
+ int rc = ef_vi_receive_init(vi, addr, dma_id, 0);
|
|
+ if( rc == 0 ) ef_vi_receive_push(vi);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_vi_receive_push(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_wiob();
|
|
+ writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) <<
|
|
+ __DW4(RX_DESC_WPTR_LBN),
|
|
+ vi->vi_rxq.doorbell);
|
|
+}
|
|
+
|
|
+
|
|
+ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev)
|
|
+{
|
|
+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev);
|
|
+ unsigned di = ev->u32[0] & vi->vi_rxq.mask;
|
|
+ ef_request_id rq_id;
|
|
+
|
|
+ ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX ||
|
|
+ EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD);
|
|
+
|
|
+ /* Detect spurious / duplicate RX events. We may need to modify this
|
|
+ ** code so that we are robust if they happen. */
|
|
+ ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask);
|
|
+
|
|
+ /* We only support 1 port: so events should be in order. */
|
|
+ ef_assert(vi->vi_rxq.ids[di] != 0xffff);
|
|
+
|
|
+ rq_id = vi->vi_rxq.ids[di];
|
|
+ vi->vi_rxq.ids[di] = 0xffff;
|
|
+ ++vi->ep_state->rxq.removed;
|
|
+ return rq_id;
|
|
+}
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/pt_tx.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,91 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Packet-mode transmit interface.
|
|
+ * \date 2003/04/02
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_lib_ef */
|
|
+#include "ef_vi_internal.h"
|
|
+
|
|
+
|
|
+int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
|
|
+{
|
|
+ ef_iovec iov = { base, len };
|
|
+ return ef_vi_transmitv_init(vi, &iov, 1, dma_id);
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id)
|
|
+{
|
|
+ ef_iovec iov = { base, len };
|
|
+ int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id);
|
|
+ if( rc == 0 ) ef_vi_transmit_push(vi);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len,
|
|
+ ef_request_id dma_id)
|
|
+{
|
|
+ int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id);
|
|
+ if( rc == 0 ) ef_vi_transmit_push(vi);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev,
|
|
+ ef_request_id* ids)
|
|
+{
|
|
+ ef_request_id* ids_in = ids;
|
|
+ ef_vi_txq* q = &vi->vi_txq;
|
|
+ ef_vi_txq_state* qs = &vi->ep_state->txq;
|
|
+ const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev);
|
|
+ unsigned i, stop = (ev->u32[0] + 1) & q->mask;
|
|
+
|
|
+ ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX ||
|
|
+ EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR);
|
|
+
|
|
+ /* Shouldn't be batching more than 64 descriptors, and should not go
|
|
+ ** backwards. */
|
|
+ ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64);
|
|
+ /* Should not complete more than we've posted. */
|
|
+ ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask),
|
|
+ qs->added - qs->removed);
|
|
+
|
|
+ for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask )
|
|
+ if( q->ids[i] != 0xffff ) {
|
|
+ *ids++ = q->ids[i];
|
|
+ q->ids[i] = 0xffff;
|
|
+ }
|
|
+
|
|
+ ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH);
|
|
+
|
|
+ return (int) (ids - ids_in);
|
|
+}
|
|
+
|
|
+/*! \cidoxg_end */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/sysdep.h 2009-04-07 13:58:48.000000000 +0200
|
|
@@ -0,0 +1,185 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author stg
|
|
+ * \brief System dependent support for ef vi lib
|
|
+ * \date 2007/05/10
|
|
+ */
|
|
+
|
|
+/*! \cidoxg_include_ci_ul */
|
|
+#ifndef __CI_CIUL_SYSDEP_LINUX_H__
|
|
+#define __CI_CIUL_SYSDEP_LINUX_H__
|
|
+
|
|
+
|
|
+#define ef_vi_wiob() mmiowb()
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Kernel version compatability
|
|
+ */
|
|
+
|
|
+#if defined(__GNUC__)
|
|
+
|
|
+/* Linux kernel doesn't have stdint.h or [u]intptr_t. */
|
|
+# if !defined(LINUX_VERSION_CODE)
|
|
+# include <linux/version.h>
|
|
+# endif
|
|
+# include <asm/io.h>
|
|
+
|
|
+/* In Linux 2.6.24, linux/types.h has uintptr_t */
|
|
+# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
|
|
+# if BITS_PER_LONG == 32
|
|
+ typedef __u32 uintptr_t;
|
|
+# else
|
|
+ typedef __u64 uintptr_t;
|
|
+# endif
|
|
+# endif
|
|
+
|
|
+/* But even 2.6.24 doesn't define intptr_t */
|
|
+# if BITS_PER_LONG == 32
|
|
+ typedef __s32 intptr_t;
|
|
+# else
|
|
+ typedef __s64 intptr_t;
|
|
+# endif
|
|
+
|
|
+# if defined(__ia64__)
|
|
+# define EF_VI_PRIx64 "lx"
|
|
+# else
|
|
+# define EF_VI_PRIx64 "llx"
|
|
+# endif
|
|
+
|
|
+# define EF_VI_HF __attribute__((visibility("hidden")))
|
|
+# define EF_VI_HV __attribute__((visibility("hidden")))
|
|
+
|
|
+# if defined(__i386__) || defined(__x86_64__) /* GCC x86/x64 */
|
|
+ typedef unsigned long long ef_vi_dma_addr_t;
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#ifndef mmiowb
|
|
+# if defined(__i386__) || defined(__x86_64__)
|
|
+# define mmiowb()
|
|
+# elif defined(__ia64__)
|
|
+# ifndef ia64_mfa
|
|
+# define ia64_mfa() asm volatile ("mf.a" ::: "memory")
|
|
+# endif
|
|
+# define mmiowb ia64_mfa
|
|
+# else
|
|
+# error "Need definition for mmiowb"
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#ifdef EFX_NOT_UPSTREAM
|
|
+
|
|
+/* Stuff for architectures/compilers not officially supported */
|
|
+
|
|
+#if !defined(__GNUC__)
|
|
+# if defined(__PPC__) /* GCC, PPC */
|
|
+ typedef unsigned long ef_vi_dma_addr_t;
|
|
+
|
|
+# ifdef __powerpc64__
|
|
+# ifdef CONFIG_SMP
|
|
+# define CI_SMP_SYNC "\n eieio \n" /* memory cache sync */
|
|
+# define CI_SMP_ISYNC "\n isync \n" /* instr cache sync */
|
|
+# else
|
|
+# define CI_SMP_SYNC
|
|
+# define CI_SMP_ISYNC
|
|
+# endif
|
|
+# else /* for ppc32 systems */
|
|
+# ifdef CONFIG_SMP
|
|
+# define CI_SMP_SYNC "\n eieio \n"
|
|
+# define CI_SMP_ISYNC "\n sync \n"
|
|
+# else
|
|
+# define CI_SMP_SYNC
|
|
+# define CI_SMP_ISYNC
|
|
+# endif
|
|
+# endif
|
|
+
|
|
+# elif defined(__ia64__) /* GCC, IA64 */
|
|
+ typedef unsigned long ef_vi_dma_addr_t;
|
|
+# else
|
|
+# error Unknown processor - GNU C
|
|
+# endif
|
|
+
|
|
+#elif defined(__PGI)
|
|
+# error PGI not supported
|
|
+
|
|
+#elif defined(__INTEL_COMPILER)
|
|
+
|
|
+/* Intel compilers v7 claim to be very gcc compatible. */
|
|
+# if __INTEL_COMPILER >= 700
|
|
+# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91)
|
|
+# define EF_VI_LIKELY(t) __builtin_expect((t), 1)
|
|
+# define EF_VI_UNLIKELY(t) __builtin_expect((t), 0)
|
|
+# endif
|
|
+# else
|
|
+# error Old Intel compiler not supported.
|
|
+# endif
|
|
+
|
|
+#else
|
|
+# error Unknown compiler.
|
|
+#endif
|
|
+
|
|
+#endif
|
|
+
|
|
+
|
|
+# include <linux/errno.h>
|
|
+
|
|
+
|
|
+/**********************************************************************
|
|
+ * Extracting bit fields.
|
|
+ */
|
|
+
|
|
+#define _QWORD_GET_LOW(f, v) \
|
|
+ (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u))
|
|
+#define _QWORD_GET_HIGH(f, v) \
|
|
+ (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u))
|
|
+#define _QWORD_GET_ANY(f, v) \
|
|
+ (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u))
|
|
+
|
|
+#define QWORD_GET(f, v) \
|
|
+ ((f##_LBN + f##_WIDTH) <= 32u \
|
|
+ ? _QWORD_GET_LOW(f, (v)) \
|
|
+ : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v))))
|
|
+
|
|
+#define QWORD_GET_U(f, v) ((unsigned) QWORD_GET(f, (v)))
|
|
+
|
|
+#define _QWORD_TEST_BIT_LOW(f, v) ((v).u32[0] & (1u << (f##_LBN)))
|
|
+#define _QWORD_TEST_BIT_HIGH(f, v) ((v).u32[1] & (1u << (f##_LBN - 32u)))
|
|
+
|
|
+#define QWORD_TEST_BIT(f, v) \
|
|
+ (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v)))
|
|
+
|
|
+
|
|
+
|
|
+
|
|
+#ifndef DECLSPEC_NORETURN
|
|
+/* normally defined on Windows to expand to a declaration that the
|
|
+ function will not return */
|
|
+# define DECLSPEC_NORETURN
|
|
+#endif
|
|
+
|
|
+#endif /* __CI_CIUL_SYSDEP_LINUX_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netfront/vi_init.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,183 @@
|
|
+/****************************************************************************
|
|
+ * Copyright 2002-2005: Level 5 Networks Inc.
|
|
+ * Copyright 2005-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications
|
|
+ * <linux-xen-drivers@solarflare.com>
|
|
+ * <onload-dev@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * \author djr
|
|
+ * \brief Initialisation of VIs.
|
|
+ * \date 2007/06/08
|
|
+ */
|
|
+
|
|
+#include "ef_vi_internal.h"
|
|
+
|
|
+#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \
|
|
+ (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t) \
|
|
+ + (txq_sz) * sizeof(uint16_t))
|
|
+
|
|
+int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz)
|
|
+{
|
|
+ ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
|
|
+ ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
|
|
+
|
|
+ return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
|
|
+}
|
|
+
|
|
+
|
|
+int ef_vi_state_bytes(ef_vi* vi)
|
|
+{
|
|
+ int rxq_sz = 0, txq_sz = 0;
|
|
+ if( ef_vi_receive_capacity(vi) )
|
|
+ rxq_sz = ef_vi_receive_capacity(vi) + 1;
|
|
+ if( ef_vi_transmit_capacity(vi) )
|
|
+ txq_sz = ef_vi_transmit_capacity(vi) + 1;
|
|
+
|
|
+ ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz));
|
|
+ ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz));
|
|
+
|
|
+ return EF_VI_STATE_BYTES(rxq_sz, txq_sz);
|
|
+}
|
|
+
|
|
+
|
|
+void ef_eventq_state_init(ef_vi* evq)
|
|
+{
|
|
+ int j;
|
|
+
|
|
+ for (j = 0; j<EFAB_DMAQS_PER_EVQ_MAX; j++) {
|
|
+ ef_rx_dup_state_t *rx_dup_state =
|
|
+ &evq->evq_state->rx_dup_state[j];
|
|
+ rx_dup_state->bad_sop = 0;
|
|
+ rx_dup_state->rx_last_desc_ptr = -1;
|
|
+ rx_dup_state->frag_num = 0;
|
|
+ }
|
|
+
|
|
+ evq->evq_state->evq_ptr = 0;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_vi_state_init(ef_vi* vi)
|
|
+{
|
|
+ ef_vi_state* state = vi->ep_state;
|
|
+ unsigned i;
|
|
+
|
|
+ state->txq.added = state->txq.removed = 0;
|
|
+ state->rxq.added = state->rxq.removed = 0;
|
|
+
|
|
+ if( vi->vi_rxq.mask )
|
|
+ for( i = 0; i <= vi->vi_rxq.mask; ++i )
|
|
+ vi->vi_rxq.ids[i] = (uint16_t) -1;
|
|
+ if( vi->vi_txq.mask )
|
|
+ for( i = 0; i <= vi->vi_txq.mask; ++i )
|
|
+ vi->vi_txq.ids[i] = (uint16_t) -1;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type,
|
|
+ int instance, unsigned evq_bytes, void* base,
|
|
+ void* timer_reg)
|
|
+{
|
|
+ struct vi_mappings* vm = (struct vi_mappings*) data_area;
|
|
+
|
|
+ vm->signature = VI_MAPPING_SIGNATURE;
|
|
+ vm->vi_instance = instance;
|
|
+ vm->nic_type = nic_type;
|
|
+ vm->evq_bytes = evq_bytes;
|
|
+ vm->evq_base = base;
|
|
+ vm->evq_timer_reg = timer_reg;
|
|
+}
|
|
+
|
|
+
|
|
+void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state,
|
|
+ ef_eventq_state* evq_state, enum ef_vi_flags vi_flags)
|
|
+{
|
|
+ struct vi_mappings* vm = (struct vi_mappings*) vvis;
|
|
+
|
|
+ vi->vi_i = vm->vi_instance;
|
|
+ vi->ep_state = state;
|
|
+ vi->vi_flags = vi_flags;
|
|
+
|
|
+ switch( vm->nic_type.arch ) {
|
|
+ case EF_VI_ARCH_FALCON:
|
|
+ falcon_vi_init(vi, vvis);
|
|
+ break;
|
|
+ default:
|
|
+ /* ?? TODO: We should return an error code. */
|
|
+ ef_assert(0);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if( vm->evq_bytes ) {
|
|
+ vi->evq_state = evq_state;
|
|
+ vi->evq_mask = vm->evq_bytes - 1u;
|
|
+ vi->evq_base = vm->evq_base;
|
|
+ vi->evq_timer_reg = vm->evq_timer_reg;
|
|
+ }
|
|
+
|
|
+ EF_VI_MAGIC_SET(vi, EF_VI);
|
|
+}
|
|
+
|
|
+
|
|
+/* Initialise [data_area] with information required to initialise an ef_vi.
|
|
+ * In the following, an unused param should be set to NULL. Note the case
|
|
+ * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this
|
|
+ * must be NULL.
|
|
+ *
|
|
+ * \param data_area [in,out] required, must ref at least VI_MAPPING_SIZE
|
|
+ * bytes
|
|
+ * \param io_mmap [in] ef1, required
|
|
+ * falcon, required
|
|
+ * \param iobuf_mmap [in] ef1, unused
|
|
+ * falcon, required
|
|
+ */
|
|
+void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type,
|
|
+ unsigned rxq_capacity, unsigned txq_capacity,
|
|
+ int instance, void* io_mmap,
|
|
+ void* iobuf_mmap_rx, void* iobuf_mmap_tx,
|
|
+ enum ef_vi_flags vi_flags)
|
|
+{
|
|
+ struct vi_mappings* vm = (struct vi_mappings*) data_area;
|
|
+ int rx_desc_bytes, rxq_bytes;
|
|
+
|
|
+ ef_assert(rxq_capacity > 0 || txq_capacity > 0);
|
|
+ ef_assert(vm);
|
|
+ ef_assert(io_mmap);
|
|
+ ef_assert(iobuf_mmap_rx || iobuf_mmap_tx);
|
|
+
|
|
+ vm->signature = VI_MAPPING_SIGNATURE;
|
|
+ vm->vi_instance = instance;
|
|
+ vm->nic_type = nic_type;
|
|
+
|
|
+ rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4;
|
|
+ rxq_bytes = rxq_capacity * rx_desc_bytes;
|
|
+ rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
|
|
+
|
|
+ if( iobuf_mmap_rx == iobuf_mmap_tx )
|
|
+ iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes;
|
|
+
|
|
+ vm->rx_queue_capacity = rxq_capacity;
|
|
+ vm->rx_dma_falcon = iobuf_mmap_rx;
|
|
+ vm->rx_bell = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095);
|
|
+ vm->tx_queue_capacity = txq_capacity;
|
|
+ vm->tx_dma_falcon = iobuf_mmap_tx;
|
|
+ vm->tx_bell = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/Makefile 2008-02-26 10:54:12.000000000 +0100
|
|
@@ -0,0 +1,11 @@
|
|
+EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil
|
|
+EXTRA_CFLAGS += -Werror
|
|
+
|
|
+ifdef GGOV
|
|
+EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV
|
|
+endif
|
|
+
|
|
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o
|
|
+
|
|
+sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,651 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/types.h> /* needed for linux/random.h */
|
|
+#include <linux/random.h>
|
|
+
|
|
+#include "accel_cuckoo_hash.h"
|
|
+#include "accel_util.h"
|
|
+
|
|
+static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key1,
|
|
+ cuckoo_hash_key *key2)
|
|
+{
|
|
+ return !memcmp(key1, key2, hashtab->key_length);
|
|
+}
|
|
+
|
|
+
|
|
+static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1,
|
|
+ cuckoo_hash_key *key2)
|
|
+{
|
|
+ *key1 = *key2;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Sets hash function parameters. Chooses "a" to be odd, 0 < a < 2^w
|
|
+ * where w is the length of the key
|
|
+ */
|
|
+static void set_hash_parameters(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ again:
|
|
+ hashtab->a0 = hashtab->a1 = 0;
|
|
+
|
|
+ /* Make sure random */
|
|
+ get_random_bytes(&hashtab->a0, hashtab->key_length);
|
|
+ get_random_bytes(&hashtab->a1, hashtab->key_length);
|
|
+
|
|
+ /* Make sure odd */
|
|
+ hashtab->a0 |= 1;
|
|
+ hashtab->a1 |= 1;
|
|
+
|
|
+ /* Being different is good */
|
|
+ if (hashtab->a0 != hashtab->a1)
|
|
+ return;
|
|
+
|
|
+ goto again;
|
|
+}
|
|
+
|
|
+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
|
|
+ unsigned key_length)
|
|
+{
|
|
+ char *table_mem;
|
|
+ unsigned length = 1 << length_bits;
|
|
+
|
|
+ BUG_ON(length_bits >= sizeof(unsigned) * 8);
|
|
+ BUG_ON(key_length > sizeof(cuckoo_hash_key));
|
|
+
|
|
+ table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL);
|
|
+
|
|
+ if (table_mem == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ hashtab->length = length;
|
|
+ hashtab->length_bits = length_bits;
|
|
+ hashtab->key_length = key_length;
|
|
+ hashtab->entries = 0;
|
|
+
|
|
+ hashtab->table0 = (cuckoo_hash_entry *)table_mem;
|
|
+ hashtab->table1 = (cuckoo_hash_entry *)
|
|
+ (table_mem + length * sizeof(cuckoo_hash_entry));
|
|
+
|
|
+ set_hash_parameters(hashtab);
|
|
+
|
|
+ /* Zero the table */
|
|
+ memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry));
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_init);
|
|
+
|
|
+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ if (hashtab->table0 != NULL)
|
|
+ kfree(hashtab->table0);
|
|
+}
|
|
+
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_destroy);
|
|
+
|
|
+/*
|
|
+ * This computes sizeof(cuckoo_hash) bits of hash, not all will be
|
|
+ * necessarily used, but the hash function throws away any that
|
|
+ * aren't
|
|
+ */
|
|
+static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *a,
|
|
+ cuckoo_hash_key *x,
|
|
+ cuckoo_hash *result)
|
|
+{
|
|
+ u64 multiply_result = 0, a_temp, x_temp;
|
|
+ u32 carry = 0;
|
|
+ u32 *a_words;
|
|
+ u32 *x_words;
|
|
+ int i;
|
|
+
|
|
+ /*
|
|
+ * As the mod and div operations in the function effectively
|
|
+ * reduce and shift the bits of the product down to just the
|
|
+ * third word, we need only compute that and return it as a
|
|
+ * result.
|
|
+ *
|
|
+ * Do enough long multiplication to get the word we need
|
|
+ */
|
|
+
|
|
+ /* This assumes things about the sizes of the key and hash */
|
|
+ BUG_ON(hashtab->key_length % sizeof(u32) != 0);
|
|
+ BUG_ON(sizeof(cuckoo_hash) != sizeof(u32));
|
|
+
|
|
+ a_words = (u32 *)a;
|
|
+ x_words = (u32 *)x;
|
|
+
|
|
+ for (i = 0; i < hashtab->key_length / sizeof(u32); i++) {
|
|
+ a_temp = a_words[i];
|
|
+ x_temp = x_words[i];
|
|
+
|
|
+ multiply_result = (a_temp * x_temp) + carry;
|
|
+ carry = (multiply_result >> 32) & 0xffffffff;
|
|
+ }
|
|
+
|
|
+ *result = multiply_result & 0xffffffff;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w;
|
|
+ * w is the length of the key, q is the length of the hash, I think.
|
|
+ * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
|
|
+ */
|
|
+static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_key *a)
|
|
+{
|
|
+ unsigned q = hashtab->length_bits;
|
|
+ unsigned shift = 32 - q;
|
|
+ unsigned mask = ((1 << q) - 1) << shift;
|
|
+ cuckoo_hash hash;
|
|
+
|
|
+ cuckoo_compute_hash_helper(hashtab, a, key, &hash);
|
|
+
|
|
+ /*
|
|
+ * Take the top few bits to get the right length for this
|
|
+ * hash table
|
|
+ */
|
|
+ hash = (hash & mask) >> shift;
|
|
+
|
|
+ BUG_ON(hash >= hashtab->length);
|
|
+
|
|
+ return hash;
|
|
+}
|
|
+
|
|
+
|
|
+static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value *value)
|
|
+{
|
|
+ cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
|
|
+
|
|
+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
|
|
+ key)) {
|
|
+ *value = hashtab->table0[hash].value;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value *value)
|
|
+{
|
|
+ cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
|
|
+
|
|
+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
|
|
+ key)) {
|
|
+ *value = hashtab->table1[hash].value;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value *value)
|
|
+{
|
|
+ return cuckoo_hash_lookup0(hashtab, key, value)
|
|
+ || cuckoo_hash_lookup1(hashtab, key, value);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_lookup);
|
|
+
|
|
+
|
|
+/* Transfer any active entries from "old_table" into hashtab */
|
|
+static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_entry *old_table,
|
|
+ unsigned capacity)
|
|
+{
|
|
+ int i, rc;
|
|
+ cuckoo_hash_entry *entry;
|
|
+
|
|
+ hashtab->entries = 0;
|
|
+
|
|
+ for (i = 0; i < capacity; i++) {
|
|
+ entry = &old_table[i];
|
|
+ if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) {
|
|
+ rc = cuckoo_hash_add(hashtab, &(entry->key),
|
|
+ entry->value, 0);
|
|
+ if (rc != 0) {
|
|
+ return rc;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int cuckoo_hash_rehash(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ cuckoo_hash_entry *new_table;
|
|
+ cuckoo_hash_table old_hashtab;
|
|
+ int resize = 0, rc, rehash_count;
|
|
+
|
|
+ /*
|
|
+ * Store old tables so we can access the existing values and
|
|
+ * copy across
|
|
+ */
|
|
+ memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table));
|
|
+
|
|
+ /* resize if hashtable is more than half full */
|
|
+ if (old_hashtab.entries > old_hashtab.length &&
|
|
+ old_hashtab.length_bits < 32)
|
|
+ resize = 1;
|
|
+
|
|
+ resize:
|
|
+ if (resize) {
|
|
+ new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * hashtab->length,
|
|
+ GFP_ATOMIC);
|
|
+ if (new_table == NULL) {
|
|
+ rc = -ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ hashtab->length = 2 * hashtab->length;
|
|
+ hashtab->length_bits++;
|
|
+ } else {
|
|
+ new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * hashtab->length,
|
|
+ GFP_ATOMIC);
|
|
+ if (new_table == NULL) {
|
|
+ rc = -ENOMEM;
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Point hashtab to new memory region so we can try to
|
|
+ * construct new table
|
|
+ */
|
|
+ hashtab->table0 = new_table;
|
|
+ hashtab->table1 = (cuckoo_hash_entry *)
|
|
+ ((char *)new_table + hashtab->length * sizeof(cuckoo_hash_entry));
|
|
+
|
|
+ rehash_count = 0;
|
|
+
|
|
+ again:
|
|
+ /* Zero the new tables */
|
|
+ memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry));
|
|
+
|
|
+ /* Choose new parameters for the hash functions */
|
|
+ set_hash_parameters(hashtab);
|
|
+
|
|
+ /*
|
|
+ * Multiply old_table_length by 2 as the length refers to each
|
|
+ * table, and there are two of them. This assumes that they
|
|
+ * are arranged sequentially in memory, so assert it
|
|
+ */
|
|
+ BUG_ON(((char *)old_hashtab.table1) !=
|
|
+ ((char *)old_hashtab.table0 + old_hashtab.length
|
|
+ * sizeof(cuckoo_hash_entry)));
|
|
+ rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0,
|
|
+ old_hashtab.length * 2);
|
|
+ if (rc < 0) {
|
|
+ /* Problem */
|
|
+ if (rc == -ENOSPC) {
|
|
+ ++rehash_count;
|
|
+ if (rehash_count < CUCKOO_HASH_MAX_LOOP) {
|
|
+ /*
|
|
+ * Wanted to rehash, but rather than
|
|
+ * recurse we can just do it here
|
|
+ */
|
|
+ goto again;
|
|
+ } else {
|
|
+ /*
|
|
+ * Didn't manage to rehash, so let's
|
|
+ * go up a size (if we haven't already
|
|
+ * and there's space)
|
|
+ */
|
|
+ if (!resize && hashtab->length_bits < 32) {
|
|
+ resize = 1;
|
|
+ kfree(new_table);
|
|
+ goto resize;
|
|
+ }
|
|
+ else
|
|
+ goto err;
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ /* Success, I think. Free up the old table */
|
|
+ kfree(old_hashtab.table0);
|
|
+
|
|
+ /* We should have put all the entries from old table in the new one */
|
|
+ BUG_ON(hashtab->entries != old_hashtab.entries);
|
|
+
|
|
+ return 0;
|
|
+ err:
|
|
+ EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__);
|
|
+ /* Some other error, give up, at least restore table to how it was */
|
|
+ memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table));
|
|
+ if (new_table)
|
|
+ kfree(new_table);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_rehash);
|
|
+
|
|
+
|
|
+static int
|
|
+cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value,
|
|
+ cuckoo_hash_key *displaced_key,
|
|
+ cuckoo_hash_value *displaced_value)
|
|
+{
|
|
+ if (table[hash].state == CUCKOO_HASH_STATE_VACANT) {
|
|
+ cuckoo_hash_key_set(&(table[hash].key), key);
|
|
+ table[hash].value = value;
|
|
+ table[hash].state = CUCKOO_HASH_STATE_OCCUPIED;
|
|
+
|
|
+ return 1;
|
|
+ } else {
|
|
+ cuckoo_hash_key_set(displaced_key, &(table[hash].key));
|
|
+ *displaced_value = table[hash].value;
|
|
+ cuckoo_hash_key_set(&(table[hash].key), key);
|
|
+ table[hash].value = value;
|
|
+
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value, int can_rehash)
|
|
+{
|
|
+ cuckoo_hash hash0, hash1;
|
|
+ int i, rc;
|
|
+ cuckoo_hash_key key1, key2;
|
|
+
|
|
+ cuckoo_hash_key_set(&key1, key);
|
|
+
|
|
+ again:
|
|
+ i = 0;
|
|
+ do {
|
|
+ hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0);
|
|
+ if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0,
|
|
+ &key1, value, &key2,
|
|
+ &value)) {
|
|
+ /* Success */
|
|
+ hashtab->entries++;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1);
|
|
+ if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1,
|
|
+ &key2, value, &key1,
|
|
+ &value)) {
|
|
+ /* Success */
|
|
+ hashtab->entries++;
|
|
+ return 0;
|
|
+ }
|
|
+ } while (++i < CUCKOO_HASH_MAX_LOOP);
|
|
+
|
|
+ if (can_rehash) {
|
|
+ if ((rc = cuckoo_hash_rehash(hashtab)) < 0) {
|
|
+ /*
|
|
+ * Give up - this will drop whichever
|
|
+ * key/value pair we have currently displaced
|
|
+ * on the floor
|
|
+ */
|
|
+ return rc;
|
|
+ }
|
|
+ goto again;
|
|
+ }
|
|
+
|
|
+ EPRINTK("%s: failed hash add\n", __FUNCTION__);
|
|
+ /*
|
|
+ * Couldn't do it - bad as we've now removed some random thing
|
|
+ * from the table, and will just drop it on the floor. Better
|
|
+ * would be to somehow revert the table to the state it was in
|
|
+ * at the start
|
|
+ */
|
|
+ return -ENOSPC;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_add);
|
|
+
|
|
+
|
|
+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key, cuckoo_hash_value value,
|
|
+ int can_rehash)
|
|
+{
|
|
+ int stored_value;
|
|
+
|
|
+ if (cuckoo_hash_lookup(hashtab, key, &stored_value))
|
|
+ return -EBUSY;
|
|
+
|
|
+ return cuckoo_hash_add(hashtab, key, value, can_rehash);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_add_check);
|
|
+
|
|
+
|
|
+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key)
|
|
+{
|
|
+ cuckoo_hash hash;
|
|
+
|
|
+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
|
|
+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
|
|
+ cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
|
|
+ key)) {
|
|
+ hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT;
|
|
+ hashtab->entries--;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
|
|
+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
|
|
+ cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
|
|
+ key)) {
|
|
+ hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT;
|
|
+ hashtab->entries--;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_remove);
|
|
+
|
|
+
|
|
+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value)
|
|
+{
|
|
+ cuckoo_hash hash;
|
|
+
|
|
+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0);
|
|
+ if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
|
|
+ cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key),
|
|
+ key)) {
|
|
+ hashtab->table0[hash].value = value;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1);
|
|
+ if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) &&
|
|
+ cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key),
|
|
+ key)) {
|
|
+ hashtab->table1[hash].value = value;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_update);
|
|
+
|
|
+
|
|
+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ hashtab->iterate_index = 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset);
|
|
+
|
|
+
|
|
+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key, cuckoo_hash_value *value)
|
|
+{
|
|
+ unsigned index;
|
|
+
|
|
+ while (hashtab->iterate_index < hashtab->length) {
|
|
+ index = hashtab->iterate_index;
|
|
+ ++hashtab->iterate_index;
|
|
+ if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
|
|
+ *key = hashtab->table0[index].key;
|
|
+ *value = hashtab->table0[index].value;
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ while (hashtab->iterate_index >= hashtab->length &&
|
|
+ hashtab->iterate_index < hashtab->length * 2) {
|
|
+ index = hashtab->iterate_index - hashtab->length;
|
|
+ ++hashtab->iterate_index;
|
|
+ if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) {
|
|
+ *key = hashtab->table1[index].key;
|
|
+ *value = hashtab->table1[index].value;
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return -ENOSPC;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_iterate);
|
|
+
|
|
+
|
|
+#if 0
|
|
+void cuckoo_hash_valid(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ int i, entry_count = 0;
|
|
+
|
|
+ for (i=0; i < hashtab->length; i++) {
|
|
+ EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
|
|
+ hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
|
|
+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ entry_count++;
|
|
+ EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
|
|
+ hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
|
|
+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ entry_count++;
|
|
+ }
|
|
+
|
|
+ if (entry_count != hashtab->entries) {
|
|
+ EPRINTK("%s: bad count\n", __FUNCTION__);
|
|
+ cuckoo_hash_dump(hashtab);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for (i=0; i< hashtab->length; i++) {
|
|
+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ if (i != cuckoo_compute_hash(hashtab,
|
|
+ &hashtab->table0[i].key,
|
|
+ &hashtab->a0)) {
|
|
+ EPRINTK("%s: Bad key table 0 index %d\n",
|
|
+ __FUNCTION__, i);
|
|
+ cuckoo_hash_dump(hashtab);
|
|
+ return;
|
|
+ }
|
|
+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ if (i != cuckoo_compute_hash(hashtab,
|
|
+ &hashtab->table1[i].key,
|
|
+ &hashtab->a1)) {
|
|
+ EPRINTK("%s: Bad key table 1 index %d\n",
|
|
+ __FUNCTION__, i);
|
|
+ cuckoo_hash_dump(hashtab);
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_valid);
|
|
+
|
|
+
|
|
+void cuckoo_hash_dump(cuckoo_hash_table *hashtab)
|
|
+{
|
|
+ int i, entry_count;
|
|
+
|
|
+ entry_count = 0;
|
|
+ for (i=0; i < hashtab->length; i++) {
|
|
+ EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT &&
|
|
+ hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED);
|
|
+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ entry_count++;
|
|
+ EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT &&
|
|
+ hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED);
|
|
+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ entry_count++;
|
|
+ }
|
|
+
|
|
+ EPRINTK("======================\n");
|
|
+ EPRINTK("Cuckoo hash table dump\n");
|
|
+ EPRINTK("======================\n");
|
|
+ EPRINTK("length: %d; length_bits: %d; key_length: %d\n", hashtab->length,
|
|
+ hashtab->length_bits, hashtab->key_length);
|
|
+ EPRINTK("Recorded entries: %d\n", hashtab->entries);
|
|
+ EPRINTK("Counted entries: %d\n", entry_count);
|
|
+ EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1);
|
|
+ EPRINTK("-----------------------------------------\n");
|
|
+ EPRINTK("Index Occupied Key Value Index0 Index1\n");
|
|
+ EPRINTK("-----------------------------------------\n");
|
|
+ for (i=0; i< hashtab->length; i++) {
|
|
+ if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ EPRINTK("%d %d %llx %d %d %d\n", i,
|
|
+ hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED,
|
|
+ hashtab->table0[i].key, hashtab->table0[i].value,
|
|
+ cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
|
|
+ &hashtab->a0),
|
|
+ cuckoo_compute_hash(hashtab, &hashtab->table0[i].key,
|
|
+ &hashtab->a1));
|
|
+ else
|
|
+ EPRINTK("%d %d - - - -\n", i,
|
|
+ hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED);
|
|
+
|
|
+ }
|
|
+ EPRINTK("-----------------------------------------\n");
|
|
+ EPRINTK("Index Occupied Key Value Index0 Index1\n");
|
|
+ EPRINTK("-----------------------------------------\n");
|
|
+ for (i=0; i< hashtab->length; i++) {
|
|
+ if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED)
|
|
+ EPRINTK("%d %d %llx %d %d %d\n", i,
|
|
+ hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED,
|
|
+ hashtab->table1[i].key, hashtab->table1[i].value,
|
|
+ cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
|
|
+ &hashtab->a0),
|
|
+ cuckoo_compute_hash(hashtab, &hashtab->table1[i].key,
|
|
+ &hashtab->a1));
|
|
+ else
|
|
+ EPRINTK("%d %d - - - -\n", i,
|
|
+ hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED);
|
|
+ }
|
|
+ EPRINTK("======================\n");
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(cuckoo_hash_dump);
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,227 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * A cuckoo hash table consists of two sub tables. Each entry can
|
|
+ * hash to a position in each table. If, on entry, its position is
|
|
+ * found to be occupied, the existing element is moved to it's other
|
|
+ * location. This recurses until success or a loop is found. If a
|
|
+ * loop is found the table is rehashed.
|
|
+ *
|
|
+ * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf
|
|
+ */
|
|
+
|
|
+#ifndef NET_ACCEL_CUCKOO_HASH_H
|
|
+#define NET_ACCEL_CUCKOO_HASH_H
|
|
+
|
|
+/*! Type used for hash table keys of ip pairs */
|
|
+typedef struct {
|
|
+ u32 local_ip;
|
|
+ //u32 remote_ip;
|
|
+ u16 local_port;
|
|
+ //u16 remote_port;
|
|
+ /* Technically only 1 bit, but use 16 to make key a round
|
|
+ number size */
|
|
+ u16 proto;
|
|
+} cuckoo_hash_ip_key;
|
|
+
|
|
+/*! Type used for hash table keys of mac addresses */
|
|
+typedef u64 cuckoo_hash_mac_key;
|
|
+
|
|
+/*! This type is designed to be large enough to hold all supported key
|
|
+ * sizes to avoid having to malloc storage for them.
|
|
+ */
|
|
+typedef u64 cuckoo_hash_key;
|
|
+
|
|
+/*! Type used for the values stored in the hash table */
|
|
+typedef int cuckoo_hash_value;
|
|
+
|
|
+/*! Type used for the hash used to index the table */
|
|
+typedef u32 cuckoo_hash;
|
|
+
|
|
+/*! How long to spend displacing values when adding before giving up
|
|
+ * and rehashing */
|
|
+#define CUCKOO_HASH_MAX_LOOP (hashtab->length)
|
|
+
|
|
+/*! State of hash table entry */
|
|
+typedef enum {
|
|
+ CUCKOO_HASH_STATE_VACANT = 0,
|
|
+ CUCKOO_HASH_STATE_OCCUPIED
|
|
+} cuckoo_hash_state;
|
|
+
|
|
+/*! An entry in the hash table */
|
|
+typedef struct {
|
|
+ cuckoo_hash_state state;
|
|
+ cuckoo_hash_key key;
|
|
+ cuckoo_hash_value value;
|
|
+} cuckoo_hash_entry;
|
|
+
|
|
+/*! A cuckoo hash table */
|
|
+typedef struct {
|
|
+ /*! The length of each table (NB. there are two tables of this
|
|
+ * length) */
|
|
+ unsigned length;
|
|
+ /*! The length of each table in bits */
|
|
+ unsigned length_bits;
|
|
+ /*! The length of the key in bytes */
|
|
+ unsigned key_length;
|
|
+ /*! The number of entries currently stored in the table */
|
|
+ unsigned entries;
|
|
+ /*! Index into table used by cuckoo_hash_iterate */
|
|
+ unsigned iterate_index;
|
|
+
|
|
+ /* parameter of hash functions */
|
|
+ /*! The "a" parameter of the first hash function */
|
|
+ cuckoo_hash_key a0;
|
|
+ /*! The "a" parameter of the second hash function */
|
|
+ cuckoo_hash_key a1;
|
|
+
|
|
+ /*! The first table */
|
|
+ cuckoo_hash_entry *table0;
|
|
+ /*! The second table */
|
|
+ cuckoo_hash_entry *table1;
|
|
+} cuckoo_hash_table;
|
|
+
|
|
+/*! Initialise the cuckoo has table
|
|
+ *
|
|
+ * \param hashtab A pointer to an unitialised hash table structure
|
|
+ * \param length_bits The number of elements in each table equals
|
|
+ * 2**length_bits
|
|
+ * \param key_length The length of the key in bytes
|
|
+ *
|
|
+ * \return 0 on success, -ENOMEM if it couldn't allocate the tables
|
|
+ */
|
|
+extern
|
|
+int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits,
|
|
+ unsigned key_length);
|
|
+
|
|
+
|
|
+/*! Destroy a hash table
|
|
+ *
|
|
+ * \param hashtab A hash table that has previously been passed to a
|
|
+ * successful call of cuckoo_hash_init()
|
|
+ */
|
|
+extern
|
|
+void cuckoo_hash_destroy(cuckoo_hash_table *hashtab);
|
|
+
|
|
+
|
|
+/*! Lookup an entry in the hash table
|
|
+ *
|
|
+ * \param hashtab The hash table in which to look.
|
|
+ * \param key Pointer to a mac address to use as the key
|
|
+ * \param value On exit set to the value stored if key was present
|
|
+ *
|
|
+ * \return 0 if not present in the table, non-zero if it is (and value
|
|
+ * is set accordingly)
|
|
+ */
|
|
+extern
|
|
+int cuckoo_hash_lookup(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value *value);
|
|
+
|
|
+/*! Add an entry to the hash table. Key must not be a duplicate of
|
|
+ * anything already in the table. If this is a risk, see
|
|
+ * cuckoo_hash_add_check
|
|
+ *
|
|
+ * \param hashtab The hash table to add the entry to
|
|
+ * \param key Pointer to a mac address to use as a key
|
|
+ * \param value The value to store
|
|
+ * \param can_rehash Flag to allow the add function to rehash the
|
|
+ * table if necessary
|
|
+ *
|
|
+ * \return 0 on success, non-zero on failure. -ENOSPC means it just
|
|
+ * couldn't find anywhere to put it - this is bad and probably means
|
|
+ * an entry has been dropped on the floor (but the entry you just
|
|
+ * tried to add may now be included)
|
|
+ */
|
|
+extern
|
|
+int cuckoo_hash_add(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value,
|
|
+ int can_rehash);
|
|
+
|
|
+/*! Same as cuckoo_hash_add but first checks to ensure entry is not
|
|
+ * already there
|
|
+ * \return -EBUSY if already there
|
|
+ */
|
|
+
|
|
+extern
|
|
+int cuckoo_hash_add_check(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value,
|
|
+ int can_rehash);
|
|
+/*! Remove an entry from the table
|
|
+ *
|
|
+ * \param hashtab The hash table to remove the entry from
|
|
+ * \param key The key that was used to previously add the entry
|
|
+ *
|
|
+ * \return 0 on success, -EINVAL if the entry couldn't be found
|
|
+ */
|
|
+extern
|
|
+int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key);
|
|
+
|
|
+
|
|
+/*! Helper for those using mac addresses to convert to a key for the
|
|
+ * hash table
|
|
+ */
|
|
+static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac)
|
|
+{
|
|
+ return (cuckoo_hash_mac_key)(mac[0])
|
|
+ | (cuckoo_hash_mac_key)(mac[1]) << 8
|
|
+ | (cuckoo_hash_mac_key)(mac[2]) << 16
|
|
+ | (cuckoo_hash_mac_key)(mac[3]) << 24
|
|
+ | (cuckoo_hash_mac_key)(mac[4]) << 32
|
|
+ | (cuckoo_hash_mac_key)(mac[5]) << 40;
|
|
+}
|
|
+
|
|
+
|
|
+/*! Update an entry already in the hash table to take a new value
|
|
+ *
|
|
+ * \param hashtab The hash table to add the entry to
|
|
+ * \param key Pointer to a mac address to use as a key
|
|
+ * \param value The value to store
|
|
+ *
|
|
+ * \return 0 on success, non-zero on failure.
|
|
+ */
|
|
+int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key,
|
|
+ cuckoo_hash_value value);
|
|
+
|
|
+
|
|
+/*! Go through the hash table and return all used entries (one per call)
|
|
+ *
|
|
+ * \param hashtab The hash table to iterate over
|
|
+ * \param key Pointer to a key to take the returned key
|
|
+ * \param value Pointer to a value to take the returned value
|
|
+ *
|
|
+ * \return 0 on success (key, value set), non-zero on failure.
|
|
+ */
|
|
+int cuckoo_hash_iterate(cuckoo_hash_table *hashtab,
|
|
+ cuckoo_hash_key *key, cuckoo_hash_value *value);
|
|
+void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab);
|
|
+
|
|
+/* debug, not compiled by default */
|
|
+void cuckoo_hash_valid(cuckoo_hash_table *hashtab);
|
|
+void cuckoo_hash_dump(cuckoo_hash_table *hashtab);
|
|
+
|
|
+#endif /* NET_ACCEL_CUCKOO_HASH_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_msg_iface.c 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,301 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+#include "accel_util.h"
|
|
+#include "accel_msg_iface.h"
|
|
+
|
|
+#define NET_ACCEL_MSG_Q_SIZE (1024)
|
|
+#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1)
|
|
+
|
|
+#ifdef NDEBUG
|
|
+#define NET_ACCEL_CHECK_MAGIC(_p, _errval)
|
|
+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id)
|
|
+#else
|
|
+#define NET_ACCEL_CHECK_MAGIC(_p, _errval) \
|
|
+ if (_p->magic != NET_ACCEL_MSG_MAGIC) { \
|
|
+ printk(KERN_ERR "%s: passed invalid shared page %p!\n", \
|
|
+ __FUNCTION__, _p); \
|
|
+ return _errval; \
|
|
+ }
|
|
+#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) \
|
|
+ printk(_t ": queue %d write %x read %x base %x limit %x\n", \
|
|
+ _id, _q->write, _q->read, _q->base, _q->limit);
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * We've been passed at least 2 pages. 1 control page and 1 or more
|
|
+ * data pages.
|
|
+ */
|
|
+int net_accel_msg_init_page(void *mem, int len, int up)
|
|
+{
|
|
+ struct net_accel_shared_page *shared_page =
|
|
+ (struct net_accel_shared_page*)mem;
|
|
+
|
|
+ if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK)
|
|
+ return -EINVAL;
|
|
+
|
|
+ shared_page->magic = NET_ACCEL_MSG_MAGIC;
|
|
+
|
|
+ shared_page->aflags = 0;
|
|
+
|
|
+ shared_page->net_dev_up = up;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_init_page);
|
|
+
|
|
+
|
|
+void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg_queue *indices,
|
|
+ struct net_accel_msg *base, int size)
|
|
+{
|
|
+ queue->fifo = base;
|
|
+ spin_lock_init(&queue->lock);
|
|
+ sh_fifo2_init(queue, size-1, &indices->read, &indices->write);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_init_queue);
|
|
+
|
|
+
|
|
+static inline int _net_accel_msg_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg *msg,
|
|
+ int is_reply)
|
|
+{
|
|
+ int rc = 0;
|
|
+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
|
|
+ rmb();
|
|
+ if (is_reply) {
|
|
+ EPRINTK_ON(sh_fifo2_is_full(queue));
|
|
+ sh_fifo2_put(queue, *msg);
|
|
+ } else {
|
|
+ if (sh_fifo2_not_half_full(queue)) {
|
|
+ sh_fifo2_put(queue, *msg);
|
|
+ } else {
|
|
+ rc = -ENOSPC;
|
|
+ }
|
|
+ }
|
|
+ wmb();
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/* Notify after a batch of messages have been sent */
|
|
+void net_accel_msg_notify(int irq)
|
|
+{
|
|
+ notify_remote_via_irq(irq);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_notify);
|
|
+
|
|
+/*
|
|
+ * Send a message on the specified FIFO. Returns 0 on success, -errno
|
|
+ * on failure. The message in msg is copied to the current slot of the
|
|
+ * FIFO.
|
|
+ */
|
|
+int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc;
|
|
+ net_accel_msg_lock_queue(q, &flags);
|
|
+ rc = _net_accel_msg_send(sp, q, msg, 0);
|
|
+ net_accel_msg_unlock_queue(q, &flags);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_send);
|
|
+
|
|
+
|
|
+/* As net_accel_msg_send but also posts a notification to the far end. */
|
|
+int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq,
|
|
+ sh_msg_fifo2 *q, struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc;
|
|
+ net_accel_msg_lock_queue(q, &flags);
|
|
+ rc = _net_accel_msg_send(sp, q, msg, 0);
|
|
+ net_accel_msg_unlock_queue(q, &flags);
|
|
+ if (rc >= 0)
|
|
+ notify_remote_via_irq(irq);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_send_notify);
|
|
+
|
|
+
|
|
+int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc;
|
|
+ net_accel_msg_lock_queue(q, &flags);
|
|
+ rc = _net_accel_msg_send(sp, q, msg, 1);
|
|
+ net_accel_msg_unlock_queue(q, &flags);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_reply);
|
|
+
|
|
+
|
|
+/* As net_accel_msg_send but also posts a notification to the far end. */
|
|
+int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq,
|
|
+ sh_msg_fifo2 *q, struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc;
|
|
+ net_accel_msg_lock_queue(q, &flags);
|
|
+ rc = _net_accel_msg_send(sp, q, msg, 1);
|
|
+ net_accel_msg_unlock_queue(q, &flags);
|
|
+ if (rc >= 0)
|
|
+ notify_remote_via_irq(irq);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Look at a received message, if any, so a decision can be made about
|
|
+ * whether to read it now or not. Cookie is a bit of debug which is
|
|
+ * set here and checked when passed to net_accel_msg_recv_next()
|
|
+ */
|
|
+int net_accel_msg_peek(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg *msg, int *cookie)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc = 0;
|
|
+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
|
|
+ net_accel_msg_lock_queue(queue, &flags);
|
|
+ rmb();
|
|
+ if (sh_fifo2_is_empty(queue)) {
|
|
+ rc = -ENOENT;
|
|
+ } else {
|
|
+ *msg = sh_fifo2_peek(queue);
|
|
+ *cookie = *(queue->fifo_rd_i);
|
|
+ }
|
|
+ net_accel_msg_unlock_queue(queue, &flags);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_peek);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Move the queue onto the next element, used after finished with a
|
|
+ * peeked msg
|
|
+ */
|
|
+int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue, int cookie)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
|
|
+ net_accel_msg_lock_queue(queue, &flags);
|
|
+ rmb();
|
|
+ /* Mustn't be empty */
|
|
+ BUG_ON(sh_fifo2_is_empty(queue));
|
|
+ /*
|
|
+ * Check cookie matches, i.e. we're advancing over the same message
|
|
+ * as was got using peek
|
|
+ */
|
|
+ BUG_ON(cookie != *(queue->fifo_rd_i));
|
|
+ sh_fifo2_rd_next(queue);
|
|
+ wmb();
|
|
+ net_accel_msg_unlock_queue(queue, &flags);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_recv_next);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Receive a message on the specified FIFO. Returns 0 on success,
|
|
+ * -errno on failure.
|
|
+ */
|
|
+int net_accel_msg_recv(struct net_accel_shared_page *sp, sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg *msg)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int rc = 0;
|
|
+ NET_ACCEL_CHECK_MAGIC(sp, -EINVAL);
|
|
+ net_accel_msg_lock_queue(queue, &flags);
|
|
+ rmb();
|
|
+ if (sh_fifo2_is_empty(queue)) {
|
|
+ rc = -ENOENT;
|
|
+ } else {
|
|
+ sh_fifo2_get(queue, msg);
|
|
+ }
|
|
+ wmb();
|
|
+ net_accel_msg_unlock_queue(queue, &flags);
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_recv);
|
|
+
|
|
+
|
|
+/*
|
|
+ * Start sending a message without copying. returns a pointer to a message
|
|
+ * that will be filled out in place. The queue is locked until the message
|
|
+ * is sent.
|
|
+ */
|
|
+struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue, unsigned long *flags)
|
|
+{
|
|
+ struct net_accel_msg *msg;
|
|
+ NET_ACCEL_CHECK_MAGIC(sp, NULL);
|
|
+ net_accel_msg_lock_queue(queue, flags);
|
|
+ rmb();
|
|
+ if (sh_fifo2_not_half_full(queue)) {
|
|
+ msg = sh_fifo2_pokep(queue);
|
|
+ } else {
|
|
+ net_accel_msg_unlock_queue(queue, flags);
|
|
+ msg = NULL;
|
|
+ }
|
|
+ return msg;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_start_send);
|
|
+
|
|
+
|
|
+static inline void _msg_complete(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags)
|
|
+{
|
|
+ sh_fifo2_wr_next(queue);
|
|
+ net_accel_msg_unlock_queue(queue, flags);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Complete the sending of a message started with net_accel_msg_start_send. The
|
|
+ * message is implicit since the queue was locked by _start
|
|
+ */
|
|
+void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags)
|
|
+{
|
|
+ _msg_complete(sp, queue, flags);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_complete_send);
|
|
+
|
|
+/* As net_accel_msg_complete_send but does the notify. */
|
|
+void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags, int irq)
|
|
+{
|
|
+ _msg_complete(sp, queue, flags);
|
|
+ notify_remote_via_irq(irq);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_msg_complete_send_notify);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_msg_iface.h 2010-01-18 15:23:12.000000000 +0100
|
|
@@ -0,0 +1,415 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NET_ACCEL_MSG_IFACE_H
|
|
+#define NET_ACCEL_MSG_IFACE_H
|
|
+
|
|
+#include <linux/ip.h>
|
|
+#include <linux/tcp.h>
|
|
+#include <linux/udp.h>
|
|
+#include <linux/in.h>
|
|
+#include <linux/netdevice.h>
|
|
+#include <linux/etherdevice.h>
|
|
+
|
|
+#include "accel_shared_fifo.h"
|
|
+
|
|
+#define NET_ACCEL_MSG_MAGIC (0x85465479)
|
|
+
|
|
+/*! We talk version 0.010 of the interdomain protocol */
|
|
+#define NET_ACCEL_MSG_VERSION (0x00001000)
|
|
+
|
|
+/*! Shared memory portion of inter-domain FIFO */
|
|
+struct net_accel_msg_queue {
|
|
+ u32 read;
|
|
+ u32 write;
|
|
+};
|
|
+
|
|
+
|
|
+/*
|
|
+ * The aflags in the following structure is used as follows:
|
|
+ *
|
|
+ * - each bit is set when one of the corresponding variables is
|
|
+ * changed by either end.
|
|
+ *
|
|
+ * - the end that has made the change then forwards an IRQ to the
|
|
+ * other
|
|
+ *
|
|
+ * - the IRQ handler deals with these bits either on the fast path, or
|
|
+ * for less common changes, by jumping onto the slow path.
|
|
+ *
|
|
+ * - once it has seen a change, it clears the relevant bit.
|
|
+ *
|
|
+ * aflags is accessed atomically using clear_bit, test_bit,
|
|
+ * test_and_set_bit etc
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * The following used to signify to the other domain when the queue
|
|
+ * they want to use is full, and when it is no longer full. Could be
|
|
+ * compressed to use fewer bits but done this way for simplicity and
|
|
+ * clarity
|
|
+ */
|
|
+
|
|
+/* "dom0->domU queue" is full */
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL 0x1
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B 0
|
|
+/* "dom0->domU queue" is not full */
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL 0x2
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B 1
|
|
+/* "domU->dom0 queue" is full */
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL 0x4
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B 2
|
|
+/* "domU->dom0 queue" is not full */
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL 0x8
|
|
+#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B 3
|
|
+/* dom0 -> domU net_dev up/down events */
|
|
+#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN 0x10
|
|
+#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B 4
|
|
+
|
|
+/*
|
|
+ * Masks used to test if there are any messages for domU and dom0
|
|
+ * respectively
|
|
+ */
|
|
+#define NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK \
|
|
+ (NET_ACCEL_MSG_AFLAGS_QUEUE0FULL | \
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL | \
|
|
+ NET_ACCEL_MSG_AFLAGS_NETUPDOWN)
|
|
+#define NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK \
|
|
+ (NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL | \
|
|
+ NET_ACCEL_MSG_AFLAGS_QUEUEUFULL)
|
|
+
|
|
+/*! The shared data structure used for inter-VM communication. */
|
|
+struct net_accel_shared_page {
|
|
+ /*! Sanity check */
|
|
+ u32 magic;
|
|
+ /*! Used by host/Dom0 */
|
|
+ struct net_accel_msg_queue queue0;
|
|
+ /*! Used by guest/DomU */
|
|
+ struct net_accel_msg_queue queue1;
|
|
+ /*! Atomic flags, used to communicate simple state changes */
|
|
+ u32 aflags;
|
|
+ /*! State of net_dev used for acceleration */
|
|
+ u32 net_dev_up;
|
|
+};
|
|
+
|
|
+
|
|
+enum net_accel_hw_type {
|
|
+ /*! Not a virtualisable NIC: use slow path. */
|
|
+ NET_ACCEL_MSG_HWTYPE_NONE = 0,
|
|
+ /*! NIC is Falcon-based */
|
|
+ NET_ACCEL_MSG_HWTYPE_FALCON_A = 1,
|
|
+ NET_ACCEL_MSG_HWTYPE_FALCON_B = 2,
|
|
+ NET_ACCEL_MSG_HWTYPE_SIENA_A = 3,
|
|
+};
|
|
+
|
|
+/*! The maximum number of pages used by an event queue. */
|
|
+#define EF_HW_FALCON_EVQ_PAGES 8
|
|
+
|
|
+struct net_accel_hw_falcon_b {
|
|
+ /* VI */
|
|
+ /*! Grant for Tx DMA Q */
|
|
+ u32 txdmaq_gnt;
|
|
+ /*! Grant for Rx DMA Q */
|
|
+ u32 rxdmaq_gnt;
|
|
+ /*! Machine frame number for Tx/Rx doorbell page */
|
|
+ u32 doorbell_mfn;
|
|
+ /*! Grant for Tx/Rx doorbell page */
|
|
+ u32 doorbell_gnt;
|
|
+
|
|
+ /* Event Q */
|
|
+ /*! Grants for the pages of the EVQ */
|
|
+ u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES];
|
|
+ u32 evq_offs;
|
|
+ /*! log2(pages in event Q) */
|
|
+ u32 evq_order;
|
|
+ /*! Capacity in events */
|
|
+ u32 evq_capacity;
|
|
+ /*! Eventq pointer register physical address */
|
|
+ u32 evq_rptr;
|
|
+ /*! Interface instance */
|
|
+ u32 instance;
|
|
+ /*! Capacity of RX queue */
|
|
+ u32 rx_capacity;
|
|
+ /*! Capacity of TX queue */
|
|
+ u32 tx_capacity;
|
|
+
|
|
+ /* NIC */
|
|
+ s32 nic_arch;
|
|
+ s32 nic_revision;
|
|
+ u8 nic_variant;
|
|
+};
|
|
+
|
|
+struct net_accel_hw_falcon_a {
|
|
+ struct net_accel_hw_falcon_b common;
|
|
+ u32 evq_rptr_gnt;
|
|
+};
|
|
+
|
|
+
|
|
+/*! Description of the hardware that the DomU is being given. */
|
|
+struct net_accel_msg_hw {
|
|
+ u32 type; /*!< Hardware type */
|
|
+ union {
|
|
+ struct net_accel_hw_falcon_a falcon_a;
|
|
+ struct net_accel_hw_falcon_b falcon_b;
|
|
+ } resources;
|
|
+};
|
|
+
|
|
+/*! Start-of-day handshake message. Dom0 fills in its version and
|
|
+ * sends, DomU checks, inserts its version and replies
|
|
+ */
|
|
+struct net_accel_msg_hello {
|
|
+ /*! Sender's version (set by each side in turn) */
|
|
+ u32 version;
|
|
+ /*! max pages allocated/allowed for buffers */
|
|
+ u32 max_pages;
|
|
+};
|
|
+
|
|
+/*! Maximum number of page requests that can fit in a message. */
|
|
+#define NET_ACCEL_MSG_MAX_PAGE_REQ (8)
|
|
+
|
|
+/*! Request for NIC buffers. DomU fils out pages and grants (and
|
|
+ * optionally) reqid, dom0 fills out buf and sends reply
|
|
+ */
|
|
+struct net_accel_msg_map_buffers {
|
|
+ u32 reqid; /*!< Optional request ID */
|
|
+ u32 pages; /*!< Number of pages to map */
|
|
+ u32 grants[NET_ACCEL_MSG_MAX_PAGE_REQ]; /*!< Grant ids to map */
|
|
+ u32 buf; /*!< NIC buffer address of pages obtained */
|
|
+};
|
|
+
|
|
+/*! Notification of a change to local mac address, used to filter
|
|
+ locally destined packets off the fast path */
|
|
+struct net_accel_msg_localmac {
|
|
+ u32 flags; /*!< Should this be added or removed? */
|
|
+ u8 mac[ETH_ALEN]; /*!< The mac address to filter onto slow path */
|
|
+};
|
|
+
|
|
+struct net_accel_msg_fastpath {
|
|
+ u32 flags; /*!< Should this be added or removed? */
|
|
+ u8 mac[ETH_ALEN];/*!< The mac address to filter onto fast path */
|
|
+ u16 port; /*!< The port of the connection */
|
|
+ u32 ip; /*!< The IP address of the connection */
|
|
+ u8 proto; /*!< The protocol of connection (TCP/UDP) */
|
|
+};
|
|
+
|
|
+/*! Values for struct ef_msg_localmac/fastpath.flags */
|
|
+#define NET_ACCEL_MSG_ADD 0x1
|
|
+#define NET_ACCEL_MSG_REMOVE 0x2
|
|
+
|
|
+/*! Overall message structure */
|
|
+struct net_accel_msg {
|
|
+ /*! ID specifying type of messge */
|
|
+ u32 id;
|
|
+ union {
|
|
+ /*! handshake */
|
|
+ struct net_accel_msg_hello hello;
|
|
+ /*! hardware description */
|
|
+ struct net_accel_msg_hw hw;
|
|
+ /*! buffer map request */
|
|
+ struct net_accel_msg_map_buffers mapbufs;
|
|
+ /*! mac address of a local interface */
|
|
+ struct net_accel_msg_localmac localmac;
|
|
+ /*! address of a new fastpath connection */
|
|
+ struct net_accel_msg_fastpath fastpath;
|
|
+ /*! make the message a fixed size */
|
|
+ u8 pad[128 - sizeof(u32)];
|
|
+ } u;
|
|
+};
|
|
+
|
|
+
|
|
+#define NET_ACCEL_MSG_HW_TO_MSG(_u) container_of(_u, struct net_accel_msg, u.hw)
|
|
+
|
|
+/*! Inter-domain message FIFO */
|
|
+typedef struct {
|
|
+ struct net_accel_msg *fifo;
|
|
+ u32 fifo_mask;
|
|
+ u32 *fifo_rd_i;
|
|
+ u32 *fifo_wr_i;
|
|
+ spinlock_t lock;
|
|
+ u32 is_locked; /* Debug flag */
|
|
+} sh_msg_fifo2;
|
|
+
|
|
+
|
|
+#define NET_ACCEL_MSG_OFFSET_MASK PAGE_MASK
|
|
+
|
|
+/* Modifiers */
|
|
+#define NET_ACCEL_MSG_REPLY (0x80000000)
|
|
+#define NET_ACCEL_MSG_ERROR (0x40000000)
|
|
+
|
|
+/* Dom0 -> DomU and reply. Handshake/version check. */
|
|
+#define NET_ACCEL_MSG_HELLO (0x00000001)
|
|
+/* Dom0 -> DomU : hardware setup (VI info.) */
|
|
+#define NET_ACCEL_MSG_SETHW (0x00000002)
|
|
+/*
|
|
+ * Dom0 -> DomU. Notification of a local mac to add/remove from slow
|
|
+ * path filter
|
|
+ */
|
|
+#define NET_ACCEL_MSG_LOCALMAC (0x00000003)
|
|
+/*
|
|
+ * DomU -> Dom0 and reply. Request for buffer table entries for
|
|
+ * preallocated pages.
|
|
+ */
|
|
+#define NET_ACCEL_MSG_MAPBUF (0x00000004)
|
|
+/*
|
|
+ * Dom0 -> DomU. Notification of a local mac to add/remove from fast
|
|
+ * path filter
|
|
+ */
|
|
+#define NET_ACCEL_MSG_FASTPATH (0x00000005)
|
|
+
|
|
+/*! Initialise a message and set the type
|
|
+ * \param message : the message
|
|
+ * \param code : the message type
|
|
+ */
|
|
+static inline void net_accel_msg_init(struct net_accel_msg *msg, int code) {
|
|
+ msg->id = (u32)code;
|
|
+}
|
|
+
|
|
+/*! initialise a shared page structure
|
|
+ * \param shared_page : mapped memory in which the structure resides
|
|
+ * \param len : size of the message FIFO area that follows
|
|
+ * \param up : initial up/down state of netdev
|
|
+ * \return 0 or an error code
|
|
+ */
|
|
+extern int net_accel_msg_init_page(void *shared_page, int len, int up);
|
|
+
|
|
+/*! initialise a message queue
|
|
+ * \param queue : the message FIFO to initialise
|
|
+ * \param indices : the read and write indices in shared memory
|
|
+ * \param base : the start of the memory area for the FIFO
|
|
+ * \param size : the size of the FIFO in bytes
|
|
+ */
|
|
+extern void net_accel_msg_init_queue(sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg_queue *indices,
|
|
+ struct net_accel_msg *base, int size);
|
|
+
|
|
+/* Notify after a batch of messages have been sent */
|
|
+extern void net_accel_msg_notify(int irq);
|
|
+
|
|
+/*! Send a message on the specified FIFO. The message is copied to the
|
|
+ * current slot of the FIFO.
|
|
+ * \param sp : pointer to shared page
|
|
+ * \param q : pointer to message FIFO to use
|
|
+ * \param msg : pointer to message
|
|
+ * \return 0 on success, -errno on
|
|
+ */
|
|
+extern int net_accel_msg_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg);
|
|
+extern int net_accel_msg_reply(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg);
|
|
+
|
|
+/*! As net_accel_msg_send but also posts a notification to the far end. */
|
|
+extern int net_accel_msg_send_notify(struct net_accel_shared_page *sp,
|
|
+ int irq, sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg);
|
|
+/*! As net_accel_msg_send but also posts a notification to the far end. */
|
|
+extern int net_accel_msg_reply_notify(struct net_accel_shared_page *sp,
|
|
+ int irq, sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg);
|
|
+
|
|
+/*! Receive a message on the specified FIFO. Returns 0 on success,
|
|
+ * -errno on failure.
|
|
+ */
|
|
+extern int net_accel_msg_recv(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *q,
|
|
+ struct net_accel_msg *msg);
|
|
+
|
|
+/*! Look at a received message, if any, so a decision can be made
|
|
+ * about whether to read it now or not. Cookie is a bit of debug
|
|
+ * which is set here and checked when passed to
|
|
+ * net_accel_msg_recv_next()
|
|
+ */
|
|
+extern int net_accel_msg_peek(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ struct net_accel_msg *msg, int *cookie);
|
|
+/*! Move the queue onto the next element, used after finished with a
|
|
+ * peeked msg
|
|
+ */
|
|
+extern int net_accel_msg_recv_next(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue, int cookie);
|
|
+
|
|
+/*! Start sending a message without copying. returns a pointer to a
|
|
+ * message that will be filled out in place. The queue is locked
|
|
+ * until the message is sent.
|
|
+ */
|
|
+extern
|
|
+struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags);
|
|
+
|
|
+
|
|
+/*! Complete the sending of a message started with
|
|
+ * net_accel_msg_start_send. The message is implicit since the queue
|
|
+ * was locked by _start
|
|
+ */
|
|
+extern void net_accel_msg_complete_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags);
|
|
+
|
|
+/*! As net_accel_msg_complete_send but does the notify. */
|
|
+extern void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue,
|
|
+ unsigned long *flags, int irq);
|
|
+
|
|
+/*! Lock the queue so that multiple "_locked" functions can be called
|
|
+ * without the queue being modified by others
|
|
+ */
|
|
+static inline
|
|
+void net_accel_msg_lock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
|
|
+{
|
|
+ spin_lock_irqsave(&queue->lock, (*flags));
|
|
+ rmb();
|
|
+ BUG_ON(queue->is_locked);
|
|
+ queue->is_locked = 1;
|
|
+}
|
|
+
|
|
+/*! Unlock the queue */
|
|
+static inline
|
|
+void net_accel_msg_unlock_queue(sh_msg_fifo2 *queue, unsigned long *flags)
|
|
+{
|
|
+ BUG_ON(!queue->is_locked);
|
|
+ queue->is_locked = 0;
|
|
+ wmb();
|
|
+ spin_unlock_irqrestore(&queue->lock, (*flags));
|
|
+}
|
|
+
|
|
+/*! Give up without sending a message that was started with
|
|
+ * net_accel_msg_start_send()
|
|
+ */
|
|
+static inline
|
|
+void net_accel_msg_abort_send(struct net_accel_shared_page *sp,
|
|
+ sh_msg_fifo2 *queue, unsigned long *flags)
|
|
+{
|
|
+ net_accel_msg_unlock_queue(queue, flags);
|
|
+}
|
|
+
|
|
+/*! Test the queue to ensure there is sufficient space */
|
|
+static inline
|
|
+int net_accel_msg_check_space(sh_msg_fifo2 *queue, unsigned space)
|
|
+{
|
|
+ return sh_fifo2_space(queue) >= space;
|
|
+}
|
|
+
|
|
+#endif /* NET_ACCEL_MSG_IFACE_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_shared_fifo.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,127 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NET_ACCEL_SHARED_FIFO_H
|
|
+#define NET_ACCEL_SHARED_FIFO_H
|
|
+
|
|
+/*
|
|
+ * This is based on fifo.h, but handles sharing between address spaces
|
|
+ * that don't trust each other, by splitting out the read and write
|
|
+ * indices. This costs at least one pointer indirection more than the
|
|
+ * vanilla version per access.
|
|
+ */
|
|
+
|
|
+typedef struct {
|
|
+ char* fifo;
|
|
+ unsigned fifo_mask;
|
|
+ unsigned *fifo_rd_i;
|
|
+ unsigned *fifo_wr_i;
|
|
+} sh_byte_fifo2;
|
|
+
|
|
+#define SH_FIFO2_M(f, x) ((x) & ((f)->fifo_mask))
|
|
+
|
|
+static inline unsigned log2_ge(unsigned long n, unsigned min_order) {
|
|
+ unsigned order = min_order;
|
|
+ while((1ul << order) < n) ++order;
|
|
+ return order;
|
|
+}
|
|
+
|
|
+static inline unsigned long pow2(unsigned order) {
|
|
+ return (1ul << order);
|
|
+}
|
|
+
|
|
+#define is_pow2(x) (pow2(log2_ge((x), 0)) == (x))
|
|
+
|
|
+#define sh_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \
|
|
+ is_pow2((f)->fifo_mask+1u))
|
|
+
|
|
+#define sh_fifo2_init(f, cap, _rptr, _wptr) \
|
|
+ do { \
|
|
+ BUG_ON(!is_pow2((cap) + 1)); \
|
|
+ (f)->fifo_rd_i = _rptr; \
|
|
+ (f)->fifo_wr_i = _wptr; \
|
|
+ *(f)->fifo_rd_i = *(f)->fifo_wr_i = 0u; \
|
|
+ (f)->fifo_mask = (cap); \
|
|
+ } while(0)
|
|
+
|
|
+#define sh_fifo2_num(f) SH_FIFO2_M((f),*(f)->fifo_wr_i - *(f)->fifo_rd_i)
|
|
+#define sh_fifo2_space(f) SH_FIFO2_M((f),*(f)->fifo_rd_i - *(f)->fifo_wr_i-1u)
|
|
+#define sh_fifo2_is_empty(f) (sh_fifo2_num(f)==0)
|
|
+#define sh_fifo2_not_empty(f) (sh_fifo2_num(f)!=0)
|
|
+#define sh_fifo2_is_full(f) (sh_fifo2_space(f)==0u)
|
|
+#define sh_fifo2_not_full(f) (sh_fifo2_space(f)!=0u)
|
|
+#define sh_fifo2_buf_size(f) ((f)->fifo_mask + 1u)
|
|
+#define sh_fifo2_capacity(f) ((f)->fifo_mask)
|
|
+#define sh_fifo2_end(f) ((f)->fifo + sh_fifo2_buf_size(f))
|
|
+#define sh_fifo2_not_half_full(f) (sh_fifo2_space(f) > (sh_fifo2_capacity(f) >> 1))
|
|
+
|
|
+#define sh_fifo2_peek(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i)])
|
|
+#define sh_fifo2_peekp(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_rd_i))
|
|
+#define sh_fifo2_poke(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i)])
|
|
+#define sh_fifo2_pokep(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_wr_i))
|
|
+#define sh_fifo2_peek_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i+(i))])
|
|
+#define sh_fifo2_poke_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i+(i))])
|
|
+
|
|
+#define sh_fifo2_rd_next(f) \
|
|
+ do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + 1u;} while(0)
|
|
+#define sh_fifo2_wr_next(f) \
|
|
+ do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + 1u;} while(0)
|
|
+#define sh_fifo2_rd_adv(f, n) \
|
|
+ do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + (n);} while(0)
|
|
+#define sh_fifo2_wr_adv(f, n) \
|
|
+ do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + (n);} while(0)
|
|
+
|
|
+#define sh_fifo2_put(f, v) \
|
|
+ do {sh_fifo2_poke(f) = (v); wmb(); sh_fifo2_wr_next(f);} while(0)
|
|
+
|
|
+#define sh_fifo2_get(f, pv) \
|
|
+ do {*(pv) = sh_fifo2_peek(f); mb(); sh_fifo2_rd_next(f);} while(0)
|
|
+
|
|
+static inline unsigned sh_fifo2_contig_num(sh_byte_fifo2 *f)
|
|
+{
|
|
+ unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
|
|
+ unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
|
|
+
|
|
+ return (fifo_wr_i >= fifo_rd_i)
|
|
+ ? fifo_wr_i - fifo_rd_i
|
|
+ : f->fifo_mask + 1u - *(f)->fifo_rd_i;
|
|
+}
|
|
+
|
|
+static inline unsigned sh_fifo2_contig_space(sh_byte_fifo2 *f)
|
|
+{
|
|
+ unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i);
|
|
+ unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i);
|
|
+
|
|
+ return (fifo_rd_i > fifo_wr_i)
|
|
+ ? fifo_rd_i - fifo_wr_i - 1
|
|
+ : (f->fifo_mask + 1u - fifo_wr_i
|
|
+ /*
|
|
+ * The last byte can't be used if the read pointer
|
|
+ * is at zero.
|
|
+ */
|
|
+ - (fifo_rd_i==0));
|
|
+}
|
|
+
|
|
+
|
|
+#endif /* NET_ACCEL_SHARED_FIFO_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_util.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,336 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#include <linux/if_ether.h>
|
|
+#include <linux/delay.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/hypercall.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+#include "accel_util.h"
|
|
+
|
|
+#ifdef EFX_GCOV
|
|
+#include "gcov.h"
|
|
+
|
|
+static int __init net_accel_init(void)
|
|
+{
|
|
+ gcov_provider_init(THIS_MODULE);
|
|
+ return 0;
|
|
+}
|
|
+module_init(net_accel_init);
|
|
+
|
|
+static void __exit net_accel_exit(void)
|
|
+{
|
|
+ gcov_provider_fini(THIS_MODULE);
|
|
+}
|
|
+module_exit(net_accel_exit);
|
|
+#endif
|
|
+
|
|
+/* Shutdown remote domain that is misbehaving */
|
|
+int net_accel_shutdown_remote(int domain)
|
|
+{
|
|
+ struct sched_remote_shutdown sched_shutdown = {
|
|
+ .domain_id = domain,
|
|
+ .reason = SHUTDOWN_crash
|
|
+ };
|
|
+
|
|
+ EPRINTK("Crashing domain %d\n", domain);
|
|
+
|
|
+ return HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &sched_shutdown);
|
|
+}
|
|
+EXPORT_SYMBOL(net_accel_shutdown_remote);
|
|
+
|
|
+
|
|
+/* Based on xenbus_backend_client.c:xenbus_map_ring() */
|
|
+static int net_accel_map_grant(struct xenbus_device *dev, int gnt_ref,
|
|
+ grant_handle_t *handle, void *vaddr,
|
|
+ u64 *dev_bus_addr, unsigned flags)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)vaddr, flags,
|
|
+ gnt_ref, dev->otherend_id);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ xenbus_dev_error
|
|
+ (dev, op.status,
|
|
+ "failed mapping in shared page %d from domain %d\n",
|
|
+ gnt_ref, dev->otherend_id);
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ *handle = op.handle;
|
|
+ if (dev_bus_addr)
|
|
+ *dev_bus_addr = op.dev_bus_addr;
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+
|
|
+/* Based on xenbus_backend_client.c:xenbus_unmap_ring() */
|
|
+static int net_accel_unmap_grant(struct xenbus_device *dev,
|
|
+ grant_handle_t handle,
|
|
+ void *vaddr, u64 dev_bus_addr,
|
|
+ unsigned flags)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)vaddr, flags, handle);
|
|
+
|
|
+ if (dev_bus_addr)
|
|
+ op.dev_bus_addr = dev_bus_addr;
|
|
+
|
|
+ BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1));
|
|
+
|
|
+ if (op.status != GNTST_okay)
|
|
+ xenbus_dev_error(dev, op.status,
|
|
+ "failed unmapping page at handle %d error %d\n",
|
|
+ handle, op.status);
|
|
+
|
|
+ return op.status == GNTST_okay ? 0 : -EINVAL;
|
|
+}
|
|
+
|
|
+
|
|
+int net_accel_map_device_page(struct xenbus_device *dev,
|
|
+ int gnt_ref, grant_handle_t *handle,
|
|
+ u64 *dev_bus_addr)
|
|
+{
|
|
+ return net_accel_map_grant(dev, gnt_ref, handle, 0, dev_bus_addr,
|
|
+ GNTMAP_device_map);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_map_device_page);
|
|
+
|
|
+
|
|
+int net_accel_unmap_device_page(struct xenbus_device *dev,
|
|
+ grant_handle_t handle, u64 dev_bus_addr)
|
|
+{
|
|
+ return net_accel_unmap_grant(dev, handle, 0, dev_bus_addr,
|
|
+ GNTMAP_device_map);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_unmap_device_page);
|
|
+
|
|
+
|
|
+struct net_accel_valloc_grant_mapping {
|
|
+ struct vm_struct *vm;
|
|
+ int pages;
|
|
+ grant_handle_t grant_handles[0];
|
|
+};
|
|
+
|
|
+/* Map a series of grants into a contiguous virtual area */
|
|
+static void *net_accel_map_grants_valloc(struct xenbus_device *dev,
|
|
+ unsigned *grants, int npages,
|
|
+ unsigned flags, void **priv)
|
|
+{
|
|
+ struct net_accel_valloc_grant_mapping *map;
|
|
+ struct vm_struct *vm;
|
|
+ void *addr;
|
|
+ int i, j, rc;
|
|
+
|
|
+ vm = alloc_vm_area(PAGE_SIZE * npages);
|
|
+ if (vm == NULL) {
|
|
+ EPRINTK("No memory from alloc_vm_area.\n");
|
|
+ return NULL;
|
|
+ }
|
|
+ /*
|
|
+ * Get a structure in which we will record all the info needed
|
|
+ * to undo the mapping.
|
|
+ */
|
|
+ map = kzalloc(sizeof(struct net_accel_valloc_grant_mapping) +
|
|
+ npages * sizeof(grant_handle_t), GFP_KERNEL);
|
|
+ if (map == NULL) {
|
|
+ EPRINTK("No memory for net_accel_valloc_grant_mapping\n");
|
|
+ free_vm_area(vm);
|
|
+ return NULL;
|
|
+ }
|
|
+ map->vm = vm;
|
|
+ map->pages = npages;
|
|
+
|
|
+ /* Do the actual mapping */
|
|
+ addr = vm->addr;
|
|
+
|
|
+ for (i = 0; i < npages; i++) {
|
|
+ rc = net_accel_map_grant(dev, grants[i], map->grant_handles + i,
|
|
+ addr, NULL, flags);
|
|
+ if (rc < 0)
|
|
+ goto undo;
|
|
+ addr = (void*)((unsigned long)addr + PAGE_SIZE);
|
|
+ }
|
|
+
|
|
+ if (priv)
|
|
+ *priv = (void *)map;
|
|
+ else
|
|
+ kfree(map);
|
|
+
|
|
+ return vm->addr;
|
|
+
|
|
+ undo:
|
|
+ EPRINTK("Aborting contig map due to single map failure %d (%d of %d)\n",
|
|
+ rc, i+1, npages);
|
|
+ for (j = 0; j < i; j++) {
|
|
+ addr = (void*)((unsigned long)vm->addr + (j * PAGE_SIZE));
|
|
+ net_accel_unmap_grant(dev, map->grant_handles[j], addr, 0,
|
|
+ flags);
|
|
+ }
|
|
+ free_vm_area(vm);
|
|
+ kfree(map);
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+/* Undo the result of the mapping */
|
|
+static void net_accel_unmap_grants_vfree(struct xenbus_device *dev,
|
|
+ unsigned flags, void *priv)
|
|
+{
|
|
+ struct net_accel_valloc_grant_mapping *map =
|
|
+ (struct net_accel_valloc_grant_mapping *)priv;
|
|
+
|
|
+ void *addr = map->vm->addr;
|
|
+ int npages = map->pages;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < npages; i++) {
|
|
+ net_accel_unmap_grant(dev, map->grant_handles[i], addr, 0,
|
|
+ flags);
|
|
+ addr = (void*)((unsigned long)addr + PAGE_SIZE);
|
|
+ }
|
|
+ free_vm_area(map->vm);
|
|
+ kfree(map);
|
|
+}
|
|
+
|
|
+
|
|
+void *net_accel_map_grants_contig(struct xenbus_device *dev,
|
|
+ unsigned *grants, int npages,
|
|
+ void **priv)
|
|
+{
|
|
+ return net_accel_map_grants_valloc(dev, grants, npages, GNTMAP_host_map, priv);
|
|
+}
|
|
+EXPORT_SYMBOL(net_accel_map_grants_contig);
|
|
+
|
|
+
|
|
+void net_accel_unmap_grants_contig(struct xenbus_device *dev,
|
|
+ void *priv)
|
|
+{
|
|
+ net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
|
|
+}
|
|
+EXPORT_SYMBOL(net_accel_unmap_grants_contig);
|
|
+
|
|
+
|
|
+void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
|
|
+ void **priv)
|
|
+{
|
|
+ return net_accel_map_grants_valloc(dev, &gnt_ref, 1, GNTMAP_host_map, priv);
|
|
+}
|
|
+EXPORT_SYMBOL(net_accel_map_iomem_page);
|
|
+
|
|
+
|
|
+void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv)
|
|
+{
|
|
+ net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv);
|
|
+}
|
|
+EXPORT_SYMBOL(net_accel_unmap_iomem_page);
|
|
+
|
|
+
|
|
+int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
|
|
+ int is_iomem)
|
|
+{
|
|
+ int err = gnttab_grant_foreign_access(dev->otherend_id, mfn,
|
|
+ is_iomem ? GTF_PCD : 0);
|
|
+ if (err < 0)
|
|
+ xenbus_dev_error(dev, err, "failed granting access to page\n");
|
|
+ return err;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_grant_page);
|
|
+
|
|
+
|
|
+int net_accel_ungrant_page(grant_ref_t gntref)
|
|
+{
|
|
+ if (unlikely(gnttab_query_foreign_access(gntref) != 0)) {
|
|
+ EPRINTK("%s: remote domain still using grant %d\n", __FUNCTION__,
|
|
+ gntref);
|
|
+ return -EBUSY;
|
|
+ }
|
|
+
|
|
+ gnttab_end_foreign_access(gntref, 0);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_ungrant_page);
|
|
+
|
|
+
|
|
+int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
|
|
+{
|
|
+ char *s, *e, *macstr;
|
|
+ int i;
|
|
+
|
|
+ macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
|
|
+ if (IS_ERR(macstr))
|
|
+ return PTR_ERR(macstr);
|
|
+
|
|
+ for (i = 0; i < ETH_ALEN; i++) {
|
|
+ mac[i] = simple_strtoul(s, &e, 16);
|
|
+ if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
|
|
+ kfree(macstr);
|
|
+ return -ENOENT;
|
|
+ }
|
|
+ s = e+1;
|
|
+ }
|
|
+
|
|
+ kfree(macstr);
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_xen_net_read_mac);
|
|
+
|
|
+
|
|
+void net_accel_update_state(struct xenbus_device *dev, int state)
|
|
+{
|
|
+ struct xenbus_transaction tr;
|
|
+ int err;
|
|
+
|
|
+ DPRINTK("%s: setting accelstate to %s\n", __FUNCTION__,
|
|
+ xenbus_strstate(state));
|
|
+
|
|
+ if (xenbus_exists(XBT_NIL, dev->nodename, "")) {
|
|
+ VPRINTK("%s: nodename %s\n", __FUNCTION__, dev->nodename);
|
|
+ again:
|
|
+ err = xenbus_transaction_start(&tr);
|
|
+ if (err == 0)
|
|
+ err = xenbus_printf(tr, dev->nodename, "accelstate",
|
|
+ "%d", state);
|
|
+ if (err != 0) {
|
|
+ xenbus_transaction_end(tr, 1);
|
|
+ } else {
|
|
+ err = xenbus_transaction_end(tr, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(net_accel_update_state);
|
|
+
|
|
+MODULE_LICENSE("GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100
|
|
@@ -0,0 +1,127 @@
|
|
+/****************************************************************************
|
|
+ * Solarflare driver for Xen network acceleration
|
|
+ *
|
|
+ * Copyright 2006-2008: Solarflare Communications Inc,
|
|
+ * 9501 Jeronimo Road, Suite 250,
|
|
+ * Irvine, CA 92618, USA
|
|
+ *
|
|
+ * Maintained by Solarflare Communications <linux-xen-drivers@solarflare.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify it
|
|
+ * under the terms of the GNU General Public License version 2 as published
|
|
+ * by the Free Software Foundation, incorporated herein by reference.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
+ ****************************************************************************
|
|
+ */
|
|
+
|
|
+#ifndef NETBACK_ACCEL_UTIL_H
|
|
+#define NETBACK_ACCEL_UTIL_H
|
|
+
|
|
+#ifdef DPRINTK
|
|
+#undef DPRINTK
|
|
+#endif
|
|
+
|
|
+#define FILE_LEAF strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__
|
|
+
|
|
+#if 1
|
|
+#define VPRINTK(_f, _a...)
|
|
+#else
|
|
+#define VPRINTK(_f, _a...) \
|
|
+ printk("(file=%s, line=%d) " _f, \
|
|
+ FILE_LEAF , __LINE__ , ## _a )
|
|
+#endif
|
|
+
|
|
+#if 1
|
|
+#define DPRINTK(_f, _a...)
|
|
+#else
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ printk("(file=%s, line=%d) " _f, \
|
|
+ FILE_LEAF , __LINE__ , ## _a )
|
|
+#endif
|
|
+
|
|
+#define EPRINTK(_f, _a...) \
|
|
+ printk("(file=%s, line=%d) " _f, \
|
|
+ FILE_LEAF , __LINE__ , ## _a )
|
|
+
|
|
+#define EPRINTK_ON(exp) \
|
|
+ do { \
|
|
+ if (exp) \
|
|
+ EPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
|
|
+ } while(0)
|
|
+
|
|
+#define DPRINTK_ON(exp) \
|
|
+ do { \
|
|
+ if (exp) \
|
|
+ DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \
|
|
+ } while(0)
|
|
+
|
|
+#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"
|
|
+#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5]
|
|
+
|
|
+#include <xen/xenbus.h>
|
|
+
|
|
+/*! Map a set of pages from another domain
|
|
+ * \param dev The xenbus device context
|
|
+ * \param priv The private data returned by the mapping function
|
|
+ */
|
|
+extern
|
|
+void *net_accel_map_grants_contig(struct xenbus_device *dev,
|
|
+ unsigned *grants, int npages,
|
|
+ void **priv);
|
|
+
|
|
+/*! Unmap a set of pages mapped using net_accel_map_grants_contig.
|
|
+ * \param dev The xenbus device context
|
|
+ * \param priv The private data returned by the mapping function
|
|
+ */
|
|
+extern
|
|
+void net_accel_unmap_grants_contig(struct xenbus_device *dev, void *priv);
|
|
+
|
|
+/*! Read the MAC address of a device from xenstore */
|
|
+extern
|
|
+int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]);
|
|
+
|
|
+/*! Update the accelstate field for a device in xenstore */
|
|
+extern
|
|
+void net_accel_update_state(struct xenbus_device *dev, int state);
|
|
+
|
|
+/* These four map/unmap functions are based on
|
|
+ * xenbus_backend_client.c:xenbus_map_ring(). However, they are not
|
|
+ * used for ring buffers, instead just to map pages between domains,
|
|
+ * or to map a page so that it is accessible by a device
|
|
+ */
|
|
+extern
|
|
+int net_accel_map_device_page(struct xenbus_device *dev,
|
|
+ int gnt_ref, grant_handle_t *handle,
|
|
+ u64 *dev_bus_addr);
|
|
+extern
|
|
+int net_accel_unmap_device_page(struct xenbus_device *dev,
|
|
+ grant_handle_t handle, u64 dev_bus_addr);
|
|
+extern
|
|
+void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref,
|
|
+ void **priv);
|
|
+extern
|
|
+void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv);
|
|
+
|
|
+/*! Grrant a page to remote domain */
|
|
+extern
|
|
+int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn,
|
|
+ int is_iomem);
|
|
+/*! Undo a net_accel_grant_page */
|
|
+extern
|
|
+int net_accel_ungrant_page(grant_ref_t gntref);
|
|
+
|
|
+
|
|
+/*! Shutdown remote domain that is misbehaving */
|
|
+extern
|
|
+int net_accel_shutdown_remote(int domain);
|
|
+
|
|
+
|
|
+#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/tpmback/Makefile 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,4 @@
|
|
+
|
|
+obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmbk.o
|
|
+
|
|
+tpmbk-y += tpmback.o interface.o xenbus.o
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/tpmback/common.h 2007-06-12 13:13:45.000000000 +0200
|
|
@@ -0,0 +1,85 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/tpmback/common.h
|
|
+ */
|
|
+
|
|
+#ifndef __TPM__BACKEND__COMMON_H__
|
|
+#define __TPM__BACKEND__COMMON_H__
|
|
+
|
|
+#include <linux/version.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/interface/io/tpmif.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/pgalloc.h>
|
|
+
|
|
+#define DPRINTK(_f, _a...) \
|
|
+ pr_debug("(file=%s, line=%d) " _f, \
|
|
+ __FILE__ , __LINE__ , ## _a )
|
|
+
|
|
+struct backend_info;
|
|
+
|
|
+typedef struct tpmif_st {
|
|
+ struct list_head tpmif_list;
|
|
+ /* Unique identifier for this interface. */
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+
|
|
+ /* Physical parameters of the comms window. */
|
|
+ unsigned int irq;
|
|
+
|
|
+ /* The shared rings and indexes. */
|
|
+ tpmif_tx_interface_t *tx;
|
|
+ struct vm_struct *tx_area;
|
|
+
|
|
+ /* Miscellaneous private stuff. */
|
|
+ enum { DISCONNECTED, DISCONNECTING, CONNECTED } status;
|
|
+ int active;
|
|
+
|
|
+ struct tpmif_st *hash_next;
|
|
+ struct list_head list; /* scheduling list */
|
|
+ atomic_t refcnt;
|
|
+
|
|
+ struct backend_info *bi;
|
|
+
|
|
+ grant_handle_t shmem_handle;
|
|
+ grant_ref_t shmem_ref;
|
|
+ struct page **mmap_pages;
|
|
+
|
|
+ char devname[20];
|
|
+} tpmif_t;
|
|
+
|
|
+void tpmif_disconnect_complete(tpmif_t * tpmif);
|
|
+tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi);
|
|
+void tpmif_interface_init(void);
|
|
+void tpmif_interface_exit(void);
|
|
+void tpmif_schedule_work(tpmif_t * tpmif);
|
|
+void tpmif_deschedule_work(tpmif_t * tpmif);
|
|
+void tpmif_xenbus_init(void);
|
|
+void tpmif_xenbus_exit(void);
|
|
+int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
|
|
+irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+
|
|
+long int tpmback_get_instance(struct backend_info *bi);
|
|
+
|
|
+int vtpm_release_packets(tpmif_t * tpmif, int send_msgs);
|
|
+
|
|
+
|
|
+#define tpmif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define tpmif_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ tpmif_disconnect_complete(_b); \
|
|
+ } while (0)
|
|
+
|
|
+extern int num_frontends;
|
|
+
|
|
+static inline unsigned long idx_to_kaddr(tpmif_t *t, unsigned int idx)
|
|
+{
|
|
+ return (unsigned long)pfn_to_kaddr(page_to_pfn(t->mmap_pages[idx]));
|
|
+}
|
|
+
|
|
+#endif /* __TPMIF__BACKEND__COMMON_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/tpmback/interface.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,170 @@
|
|
+ /*****************************************************************************
|
|
+ * drivers/xen/tpmback/interface.c
|
|
+ *
|
|
+ * Vritual TPM interface management.
|
|
+ *
|
|
+ * Copyright (c) 2005, IBM Corporation
|
|
+ *
|
|
+ * Author: Stefan Berger, stefanb@us.ibm.com
|
|
+ *
|
|
+ * This code has been derived from drivers/xen/netback/interface.c
|
|
+ * Copyright (c) 2004, Keir Fraser
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <linux/delay.h>
|
|
+#include <xen/balloon.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+static kmem_cache_t *tpmif_cachep;
|
|
+int num_frontends = 0;
|
|
+
|
|
+LIST_HEAD(tpmif_list);
|
|
+
|
|
+static tpmif_t *alloc_tpmif(domid_t domid, struct backend_info *bi)
|
|
+{
|
|
+ tpmif_t *tpmif;
|
|
+
|
|
+ tpmif = kmem_cache_alloc(tpmif_cachep, GFP_KERNEL);
|
|
+ if (tpmif == NULL)
|
|
+ goto out_of_memory;
|
|
+
|
|
+ memset(tpmif, 0, sizeof (*tpmif));
|
|
+ tpmif->domid = domid;
|
|
+ tpmif->status = DISCONNECTED;
|
|
+ tpmif->bi = bi;
|
|
+ snprintf(tpmif->devname, sizeof(tpmif->devname), "tpmif%d", domid);
|
|
+ atomic_set(&tpmif->refcnt, 1);
|
|
+
|
|
+ tpmif->mmap_pages = alloc_empty_pages_and_pagevec(TPMIF_TX_RING_SIZE);
|
|
+ if (tpmif->mmap_pages == NULL)
|
|
+ goto out_of_memory;
|
|
+
|
|
+ list_add(&tpmif->tpmif_list, &tpmif_list);
|
|
+ num_frontends++;
|
|
+
|
|
+ return tpmif;
|
|
+
|
|
+ out_of_memory:
|
|
+ if (tpmif != NULL)
|
|
+ kmem_cache_free(tpmif_cachep, tpmif);
|
|
+ printk("%s: out of memory\n", __FUNCTION__);
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+}
|
|
+
|
|
+static void free_tpmif(tpmif_t * tpmif)
|
|
+{
|
|
+ num_frontends--;
|
|
+ list_del(&tpmif->tpmif_list);
|
|
+ free_empty_pages_and_pagevec(tpmif->mmap_pages, TPMIF_TX_RING_SIZE);
|
|
+ kmem_cache_free(tpmif_cachep, tpmif);
|
|
+}
|
|
+
|
|
+tpmif_t *tpmif_find(domid_t domid, struct backend_info *bi)
|
|
+{
|
|
+ tpmif_t *tpmif;
|
|
+
|
|
+ list_for_each_entry(tpmif, &tpmif_list, tpmif_list) {
|
|
+ if (tpmif->bi == bi) {
|
|
+ if (tpmif->domid == domid) {
|
|
+ tpmif_get(tpmif);
|
|
+ return tpmif;
|
|
+ } else {
|
|
+ return ERR_PTR(-EEXIST);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return alloc_tpmif(domid, bi);
|
|
+}
|
|
+
|
|
+static int map_frontend_page(tpmif_t *tpmif, unsigned long shared_page)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)tpmif->tx_area->addr,
|
|
+ GNTMAP_host_map, shared_page, tpmif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ DPRINTK(" Grant table operation failure %d!\n", (int)op.status);
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ tpmif->shmem_ref = shared_page;
|
|
+ tpmif->shmem_handle = op.handle;
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_page(tpmif_t *tpmif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)tpmif->tx_area->addr,
|
|
+ GNTMAP_host_map, tpmif->shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (tpmif->irq)
|
|
+ return 0;
|
|
+
|
|
+ if ((tpmif->tx_area = alloc_vm_area(PAGE_SIZE)) == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ err = map_frontend_page(tpmif, shared_page);
|
|
+ if (err) {
|
|
+ free_vm_area(tpmif->tx_area);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ tpmif->tx = (tpmif_tx_interface_t *)tpmif->tx_area->addr;
|
|
+ memset(tpmif->tx, 0, PAGE_SIZE);
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ tpmif->domid, evtchn, tpmif_be_int, 0, tpmif->devname, tpmif);
|
|
+ if (err < 0) {
|
|
+ unmap_frontend_page(tpmif);
|
|
+ free_vm_area(tpmif->tx_area);
|
|
+ return err;
|
|
+ }
|
|
+ tpmif->irq = err;
|
|
+
|
|
+ tpmif->shmem_ref = shared_page;
|
|
+ tpmif->active = 1;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void tpmif_disconnect_complete(tpmif_t *tpmif)
|
|
+{
|
|
+ if (tpmif->irq)
|
|
+ unbind_from_irqhandler(tpmif->irq, tpmif);
|
|
+
|
|
+ if (tpmif->tx) {
|
|
+ unmap_frontend_page(tpmif);
|
|
+ free_vm_area(tpmif->tx_area);
|
|
+ }
|
|
+
|
|
+ free_tpmif(tpmif);
|
|
+}
|
|
+
|
|
+void __init tpmif_interface_init(void)
|
|
+{
|
|
+ tpmif_cachep = kmem_cache_create("tpmif_cache", sizeof (tpmif_t),
|
|
+ 0, 0, NULL, NULL);
|
|
+}
|
|
+
|
|
+void __exit tpmif_interface_exit(void)
|
|
+{
|
|
+ kmem_cache_destroy(tpmif_cachep);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/tpmback/tpmback.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,939 @@
|
|
+/******************************************************************************
|
|
+ * drivers/xen/tpmback/tpmback.c
|
|
+ *
|
|
+ * Copyright (c) 2005, IBM Corporation
|
|
+ *
|
|
+ * Author: Stefan Berger, stefanb@us.ibm.com
|
|
+ * Grant table support: Mahadevan Gomathisankaran
|
|
+ *
|
|
+ * This code has been derived from drivers/xen/netback/netback.c
|
|
+ * Copyright (c) 2002-2004, K A Fraser
|
|
+ *
|
|
+ */
|
|
+
|
|
+#include "common.h"
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+#include <linux/types.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/delay.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/grant_table.h>
|
|
+#include <xen/gnttab.h>
|
|
+
|
|
+/* local data structures */
|
|
+struct data_exchange {
|
|
+ struct list_head pending_pak;
|
|
+ struct list_head current_pak;
|
|
+ unsigned int copied_so_far;
|
|
+ u8 has_opener:1;
|
|
+ u8 aborted:1;
|
|
+ rwlock_t pak_lock; // protects all of the previous fields
|
|
+ wait_queue_head_t wait_queue;
|
|
+};
|
|
+
|
|
+struct vtpm_resp_hdr {
|
|
+ uint32_t instance_no;
|
|
+ uint16_t tag_no;
|
|
+ uint32_t len_no;
|
|
+ uint32_t ordinal_no;
|
|
+} __attribute__ ((packed));
|
|
+
|
|
+struct packet {
|
|
+ struct list_head next;
|
|
+ unsigned int data_len;
|
|
+ u8 *data_buffer;
|
|
+ tpmif_t *tpmif;
|
|
+ u32 tpm_instance;
|
|
+ u8 req_tag;
|
|
+ u32 last_read;
|
|
+ u8 flags;
|
|
+ struct timer_list processing_timer;
|
|
+};
|
|
+
|
|
+enum {
|
|
+ PACKET_FLAG_DISCARD_RESPONSE = 1,
|
|
+};
|
|
+
|
|
+/* local variables */
|
|
+static struct data_exchange dataex;
|
|
+
|
|
+/* local function prototypes */
|
|
+static int _packet_write(struct packet *pak,
|
|
+ const char *data, size_t size, int userbuffer);
|
|
+static void processing_timeout(unsigned long ptr);
|
|
+static int packet_read_shmem(struct packet *pak,
|
|
+ tpmif_t * tpmif,
|
|
+ u32 offset,
|
|
+ char *buffer, int isuserbuffer, u32 left);
|
|
+static int vtpm_queue_packet(struct packet *pak);
|
|
+
|
|
+/***************************************************************
|
|
+ Buffer copying fo user and kernel space buffes.
|
|
+***************************************************************/
|
|
+static inline int copy_from_buffer(void *to,
|
|
+ const void *from, unsigned long size,
|
|
+ int isuserbuffer)
|
|
+{
|
|
+ if (isuserbuffer) {
|
|
+ if (copy_from_user(to, (void __user *)from, size))
|
|
+ return -EFAULT;
|
|
+ } else {
|
|
+ memcpy(to, from, size);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline int copy_to_buffer(void *to,
|
|
+ const void *from, unsigned long size,
|
|
+ int isuserbuffer)
|
|
+{
|
|
+ if (isuserbuffer) {
|
|
+ if (copy_to_user((void __user *)to, from, size))
|
|
+ return -EFAULT;
|
|
+ } else {
|
|
+ memcpy(to, from, size);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static void dataex_init(struct data_exchange *dataex)
|
|
+{
|
|
+ INIT_LIST_HEAD(&dataex->pending_pak);
|
|
+ INIT_LIST_HEAD(&dataex->current_pak);
|
|
+ dataex->has_opener = 0;
|
|
+ rwlock_init(&dataex->pak_lock);
|
|
+ init_waitqueue_head(&dataex->wait_queue);
|
|
+}
|
|
+
|
|
+/***************************************************************
|
|
+ Packet-related functions
|
|
+***************************************************************/
|
|
+
|
|
+static struct packet *packet_find_instance(struct list_head *head,
|
|
+ u32 tpm_instance)
|
|
+{
|
|
+ struct packet *pak;
|
|
+ struct list_head *p;
|
|
+
|
|
+ /*
|
|
+ * traverse the list of packets and return the first
|
|
+ * one with the given instance number
|
|
+ */
|
|
+ list_for_each(p, head) {
|
|
+ pak = list_entry(p, struct packet, next);
|
|
+
|
|
+ if (pak->tpm_instance == tpm_instance) {
|
|
+ return pak;
|
|
+ }
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static struct packet *packet_find_packet(struct list_head *head, void *packet)
|
|
+{
|
|
+ struct packet *pak;
|
|
+ struct list_head *p;
|
|
+
|
|
+ /*
|
|
+ * traverse the list of packets and return the first
|
|
+ * one with the given instance number
|
|
+ */
|
|
+ list_for_each(p, head) {
|
|
+ pak = list_entry(p, struct packet, next);
|
|
+
|
|
+ if (pak == packet) {
|
|
+ return pak;
|
|
+ }
|
|
+ }
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static struct packet *packet_alloc(tpmif_t * tpmif,
|
|
+ u32 size, u8 req_tag, u8 flags)
|
|
+{
|
|
+ struct packet *pak = NULL;
|
|
+ pak = kzalloc(sizeof (struct packet), GFP_ATOMIC);
|
|
+ if (NULL != pak) {
|
|
+ if (tpmif) {
|
|
+ pak->tpmif = tpmif;
|
|
+ pak->tpm_instance = tpmback_get_instance(tpmif->bi);
|
|
+ tpmif_get(tpmif);
|
|
+ }
|
|
+ pak->data_len = size;
|
|
+ pak->req_tag = req_tag;
|
|
+ pak->last_read = 0;
|
|
+ pak->flags = flags;
|
|
+
|
|
+ /*
|
|
+ * cannot do tpmif_get(tpmif); bad things happen
|
|
+ * on the last tpmif_put()
|
|
+ */
|
|
+ init_timer(&pak->processing_timer);
|
|
+ pak->processing_timer.function = processing_timeout;
|
|
+ pak->processing_timer.data = (unsigned long)pak;
|
|
+ }
|
|
+ return pak;
|
|
+}
|
|
+
|
|
+static void inline packet_reset(struct packet *pak)
|
|
+{
|
|
+ pak->last_read = 0;
|
|
+}
|
|
+
|
|
+static void packet_free(struct packet *pak)
|
|
+{
|
|
+ if (timer_pending(&pak->processing_timer)) {
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ if (pak->tpmif)
|
|
+ tpmif_put(pak->tpmif);
|
|
+ kfree(pak->data_buffer);
|
|
+ /*
|
|
+ * cannot do tpmif_put(pak->tpmif); bad things happen
|
|
+ * on the last tpmif_put()
|
|
+ */
|
|
+ kfree(pak);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Write data to the shared memory and send it to the FE.
|
|
+ */
|
|
+static int packet_write(struct packet *pak,
|
|
+ const char *data, size_t size, int isuserbuffer)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ if (0 != (pak->flags & PACKET_FLAG_DISCARD_RESPONSE)) {
|
|
+ /* Don't send a respone to this packet. Just acknowledge it. */
|
|
+ rc = size;
|
|
+ } else {
|
|
+ rc = _packet_write(pak, data, size, isuserbuffer);
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+int _packet_write(struct packet *pak,
|
|
+ const char *data, size_t size, int isuserbuffer)
|
|
+{
|
|
+ /*
|
|
+ * Write into the shared memory pages directly
|
|
+ * and send it to the front end.
|
|
+ */
|
|
+ tpmif_t *tpmif = pak->tpmif;
|
|
+ grant_handle_t handle;
|
|
+ int rc = 0;
|
|
+ unsigned int i = 0;
|
|
+ unsigned int offset = 0;
|
|
+
|
|
+ if (tpmif == NULL) {
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ if (tpmif->status == DISCONNECTED) {
|
|
+ return size;
|
|
+ }
|
|
+
|
|
+ while (offset < size && i < TPMIF_TX_RING_SIZE) {
|
|
+ unsigned int tocopy;
|
|
+ struct gnttab_map_grant_ref map_op;
|
|
+ struct gnttab_unmap_grant_ref unmap_op;
|
|
+ tpmif_tx_request_t *tx;
|
|
+
|
|
+ tx = &tpmif->tx->ring[i].req;
|
|
+
|
|
+ if (0 == tx->addr) {
|
|
+ DPRINTK("ERROR: Buffer for outgoing packet NULL?! i=%d\n", i);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
|
|
+ GNTMAP_host_map, tx->ref, tpmif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &map_op);
|
|
+
|
|
+ if (map_op.status != GNTST_okay) {
|
|
+ DPRINTK(" Grant table operation failure !\n");
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ handle = map_op.handle;
|
|
+
|
|
+ tocopy = min_t(size_t, size - offset, PAGE_SIZE);
|
|
+
|
|
+ if (copy_from_buffer((void *)(idx_to_kaddr(tpmif, i) |
|
|
+ (tx->addr & ~PAGE_MASK)),
|
|
+ &data[offset], tocopy, isuserbuffer)) {
|
|
+ tpmif_put(tpmif);
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ tx->size = tocopy;
|
|
+
|
|
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+
|
|
+ if (unlikely
|
|
+ (HYPERVISOR_grant_table_op
|
|
+ (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ offset += tocopy;
|
|
+ i++;
|
|
+ }
|
|
+
|
|
+ rc = offset;
|
|
+ DPRINTK("Notifying frontend via irq %d\n", tpmif->irq);
|
|
+ notify_remote_via_irq(tpmif->irq);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Read data from the shared memory and copy it directly into the
|
|
+ * provided buffer. Advance the read_last indicator which tells
|
|
+ * how many bytes have already been read.
|
|
+ */
|
|
+static int packet_read(struct packet *pak, size_t numbytes,
|
|
+ char *buffer, size_t buffersize, int isuserbuffer)
|
|
+{
|
|
+ tpmif_t *tpmif = pak->tpmif;
|
|
+
|
|
+ /*
|
|
+ * Read 'numbytes' of data from the buffer. The first 4
|
|
+ * bytes are the instance number in network byte order,
|
|
+ * after that come the data from the shared memory buffer.
|
|
+ */
|
|
+ u32 to_copy;
|
|
+ u32 offset = 0;
|
|
+ u32 room_left = buffersize;
|
|
+
|
|
+ if (pak->last_read < 4) {
|
|
+ /*
|
|
+ * copy the instance number into the buffer
|
|
+ */
|
|
+ u32 instance_no = htonl(pak->tpm_instance);
|
|
+ u32 last_read = pak->last_read;
|
|
+
|
|
+ to_copy = min_t(size_t, 4 - last_read, numbytes);
|
|
+
|
|
+ if (copy_to_buffer(&buffer[0],
|
|
+ &(((u8 *) & instance_no)[last_read]),
|
|
+ to_copy, isuserbuffer)) {
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ pak->last_read += to_copy;
|
|
+ offset += to_copy;
|
|
+ room_left -= to_copy;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * If the packet has a data buffer appended, read from it...
|
|
+ */
|
|
+
|
|
+ if (room_left > 0) {
|
|
+ if (pak->data_buffer) {
|
|
+ u32 to_copy = min_t(u32, pak->data_len - offset, room_left);
|
|
+ u32 last_read = pak->last_read - 4;
|
|
+
|
|
+ if (copy_to_buffer(&buffer[offset],
|
|
+ &pak->data_buffer[last_read],
|
|
+ to_copy, isuserbuffer)) {
|
|
+ return -EFAULT;
|
|
+ }
|
|
+ pak->last_read += to_copy;
|
|
+ offset += to_copy;
|
|
+ } else {
|
|
+ offset = packet_read_shmem(pak,
|
|
+ tpmif,
|
|
+ offset,
|
|
+ buffer,
|
|
+ isuserbuffer, room_left);
|
|
+ }
|
|
+ }
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+static int packet_read_shmem(struct packet *pak,
|
|
+ tpmif_t * tpmif,
|
|
+ u32 offset, char *buffer, int isuserbuffer,
|
|
+ u32 room_left)
|
|
+{
|
|
+ u32 last_read = pak->last_read - 4;
|
|
+ u32 i = (last_read / PAGE_SIZE);
|
|
+ u32 pg_offset = last_read & (PAGE_SIZE - 1);
|
|
+ u32 to_copy;
|
|
+ grant_handle_t handle;
|
|
+
|
|
+ tpmif_tx_request_t *tx;
|
|
+
|
|
+ tx = &tpmif->tx->ring[0].req;
|
|
+ /*
|
|
+ * Start copying data at the page with index 'index'
|
|
+ * and within that page at offset 'offset'.
|
|
+ * Copy a maximum of 'room_left' bytes.
|
|
+ */
|
|
+ to_copy = min_t(u32, PAGE_SIZE - pg_offset, room_left);
|
|
+ while (to_copy > 0) {
|
|
+ void *src;
|
|
+ struct gnttab_map_grant_ref map_op;
|
|
+ struct gnttab_unmap_grant_ref unmap_op;
|
|
+
|
|
+ tx = &tpmif->tx->ring[i].req;
|
|
+
|
|
+ gnttab_set_map_op(&map_op, idx_to_kaddr(tpmif, i),
|
|
+ GNTMAP_host_map, tx->ref, tpmif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &map_op);
|
|
+
|
|
+ if (map_op.status != GNTST_okay) {
|
|
+ DPRINTK(" Grant table operation failure !\n");
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ handle = map_op.handle;
|
|
+
|
|
+ if (to_copy > tx->size) {
|
|
+ /*
|
|
+ * User requests more than what's available
|
|
+ */
|
|
+ to_copy = min_t(u32, tx->size, to_copy);
|
|
+ }
|
|
+
|
|
+ DPRINTK("Copying from mapped memory at %08lx\n",
|
|
+ (unsigned long)(idx_to_kaddr(tpmif, i) |
|
|
+ (tx->addr & ~PAGE_MASK)));
|
|
+
|
|
+ src = (void *)(idx_to_kaddr(tpmif, i) |
|
|
+ ((tx->addr & ~PAGE_MASK) + pg_offset));
|
|
+ if (copy_to_buffer(&buffer[offset],
|
|
+ src, to_copy, isuserbuffer)) {
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ DPRINTK("Data from TPM-FE of domain %d are %d %d %d %d\n",
|
|
+ tpmif->domid, buffer[offset], buffer[offset + 1],
|
|
+ buffer[offset + 2], buffer[offset + 3]);
|
|
+
|
|
+ gnttab_set_unmap_op(&unmap_op, idx_to_kaddr(tpmif, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+
|
|
+ if (unlikely
|
|
+ (HYPERVISOR_grant_table_op
|
|
+ (GNTTABOP_unmap_grant_ref, &unmap_op, 1))) {
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ offset += to_copy;
|
|
+ pg_offset = 0;
|
|
+ last_read += to_copy;
|
|
+ room_left -= to_copy;
|
|
+
|
|
+ to_copy = min_t(u32, PAGE_SIZE, room_left);
|
|
+ i++;
|
|
+ } /* while (to_copy > 0) */
|
|
+ /*
|
|
+ * Adjust the last_read pointer
|
|
+ */
|
|
+ pak->last_read = last_read + 4;
|
|
+ return offset;
|
|
+}
|
|
+
|
|
+/* ============================================================
|
|
+ * The file layer for reading data from this device
|
|
+ * ============================================================
|
|
+ */
|
|
+static int vtpm_op_open(struct inode *inode, struct file *f)
|
|
+{
|
|
+ int rc = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ if (dataex.has_opener == 0) {
|
|
+ dataex.has_opener = 1;
|
|
+ } else {
|
|
+ rc = -EPERM;
|
|
+ }
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static ssize_t vtpm_op_read(struct file *file,
|
|
+ char __user * data, size_t size, loff_t * offset)
|
|
+{
|
|
+ int ret_size = -ENODATA;
|
|
+ struct packet *pak = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ if (dataex.aborted) {
|
|
+ dataex.aborted = 0;
|
|
+ dataex.copied_so_far = 0;
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ return -EIO;
|
|
+ }
|
|
+
|
|
+ if (list_empty(&dataex.pending_pak)) {
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ wait_event_interruptible(dataex.wait_queue,
|
|
+ !list_empty(&dataex.pending_pak));
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ dataex.copied_so_far = 0;
|
|
+ }
|
|
+
|
|
+ if (!list_empty(&dataex.pending_pak)) {
|
|
+ unsigned int left;
|
|
+
|
|
+ pak = list_entry(dataex.pending_pak.next, struct packet, next);
|
|
+ left = pak->data_len - dataex.copied_so_far;
|
|
+ list_del(&pak->next);
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+
|
|
+ DPRINTK("size given by app: %d, available: %d\n", size, left);
|
|
+
|
|
+ ret_size = min_t(size_t, size, left);
|
|
+
|
|
+ ret_size = packet_read(pak, ret_size, data, size, 1);
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+
|
|
+ if (ret_size < 0) {
|
|
+ del_singleshot_timer_sync(&pak->processing_timer);
|
|
+ packet_free(pak);
|
|
+ dataex.copied_so_far = 0;
|
|
+ } else {
|
|
+ DPRINTK("Copied %d bytes to user buffer\n", ret_size);
|
|
+
|
|
+ dataex.copied_so_far += ret_size;
|
|
+ if (dataex.copied_so_far >= pak->data_len + 4) {
|
|
+ DPRINTK("All data from this packet given to app.\n");
|
|
+ /* All data given to app */
|
|
+
|
|
+ del_singleshot_timer_sync(&pak->
|
|
+ processing_timer);
|
|
+ list_add_tail(&pak->next, &dataex.current_pak);
|
|
+ /*
|
|
+ * The more fontends that are handled at the same time,
|
|
+ * the more time we give the TPM to process the request.
|
|
+ */
|
|
+ mod_timer(&pak->processing_timer,
|
|
+ jiffies + (num_frontends * 60 * HZ));
|
|
+ dataex.copied_so_far = 0;
|
|
+ } else {
|
|
+ list_add(&pak->next, &dataex.pending_pak);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+
|
|
+ DPRINTK("Returning result from read to app: %d\n", ret_size);
|
|
+
|
|
+ return ret_size;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Write operation - only works after a previous read operation!
|
|
+ */
|
|
+static ssize_t vtpm_op_write(struct file *file,
|
|
+ const char __user * data, size_t size,
|
|
+ loff_t * offset)
|
|
+{
|
|
+ struct packet *pak;
|
|
+ int rc = 0;
|
|
+ unsigned int off = 4;
|
|
+ unsigned long flags;
|
|
+ struct vtpm_resp_hdr vrh;
|
|
+
|
|
+ /*
|
|
+ * Minimum required packet size is:
|
|
+ * 4 bytes for instance number
|
|
+ * 2 bytes for tag
|
|
+ * 4 bytes for paramSize
|
|
+ * 4 bytes for the ordinal
|
|
+ * sum: 14 bytes
|
|
+ */
|
|
+ if (size < sizeof (vrh))
|
|
+ return -EFAULT;
|
|
+
|
|
+ if (copy_from_user(&vrh, data, sizeof (vrh)))
|
|
+ return -EFAULT;
|
|
+
|
|
+ /* malformed packet? */
|
|
+ if ((off + ntohl(vrh.len_no)) != size)
|
|
+ return -EFAULT;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ pak = packet_find_instance(&dataex.current_pak,
|
|
+ ntohl(vrh.instance_no));
|
|
+
|
|
+ if (pak == NULL) {
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ DPRINTK(KERN_ALERT "No associated packet! (inst=%d)\n",
|
|
+ ntohl(vrh.instance_no));
|
|
+ return -EFAULT;
|
|
+ }
|
|
+
|
|
+ del_singleshot_timer_sync(&pak->processing_timer);
|
|
+ list_del(&pak->next);
|
|
+
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * The first 'offset' bytes must be the instance number - skip them.
|
|
+ */
|
|
+ size -= off;
|
|
+
|
|
+ rc = packet_write(pak, &data[off], size, 1);
|
|
+
|
|
+ if (rc > 0) {
|
|
+ /* I neglected the first 4 bytes */
|
|
+ rc += off;
|
|
+ }
|
|
+ packet_free(pak);
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static int vtpm_op_release(struct inode *inode, struct file *file)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ vtpm_release_packets(NULL, 1);
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ dataex.has_opener = 0;
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static unsigned int vtpm_op_poll(struct file *file,
|
|
+ struct poll_table_struct *pts)
|
|
+{
|
|
+ unsigned int flags = POLLOUT | POLLWRNORM;
|
|
+
|
|
+ poll_wait(file, &dataex.wait_queue, pts);
|
|
+ if (!list_empty(&dataex.pending_pak)) {
|
|
+ flags |= POLLIN | POLLRDNORM;
|
|
+ }
|
|
+ return flags;
|
|
+}
|
|
+
|
|
+static const struct file_operations vtpm_ops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .llseek = no_llseek,
|
|
+ .open = vtpm_op_open,
|
|
+ .read = vtpm_op_read,
|
|
+ .write = vtpm_op_write,
|
|
+ .release = vtpm_op_release,
|
|
+ .poll = vtpm_op_poll,
|
|
+};
|
|
+
|
|
+static struct miscdevice vtpms_miscdevice = {
|
|
+ .minor = 225,
|
|
+ .name = "vtpm",
|
|
+ .fops = &vtpm_ops,
|
|
+};
|
|
+
|
|
+/***************************************************************
|
|
+ Utility functions
|
|
+***************************************************************/
|
|
+
|
|
+static int tpm_send_fail_message(struct packet *pak, u8 req_tag)
|
|
+{
|
|
+ int rc;
|
|
+ static const unsigned char tpm_error_message_fail[] = {
|
|
+ 0x00, 0x00,
|
|
+ 0x00, 0x00, 0x00, 0x0a,
|
|
+ 0x00, 0x00, 0x00, 0x09 /* TPM_FAIL */
|
|
+ };
|
|
+ unsigned char buffer[sizeof (tpm_error_message_fail)];
|
|
+
|
|
+ memcpy(buffer, tpm_error_message_fail,
|
|
+ sizeof (tpm_error_message_fail));
|
|
+ /*
|
|
+ * Insert the right response tag depending on the given tag
|
|
+ * All response tags are '+3' to the request tag.
|
|
+ */
|
|
+ buffer[1] = req_tag + 3;
|
|
+
|
|
+ /*
|
|
+ * Write the data to shared memory and notify the front-end
|
|
+ */
|
|
+ rc = packet_write(pak, buffer, sizeof (buffer), 0);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static int _vtpm_release_packets(struct list_head *head,
|
|
+ tpmif_t * tpmif, int send_msgs)
|
|
+{
|
|
+ int aborted = 0;
|
|
+ int c = 0;
|
|
+ struct packet *pak;
|
|
+ struct list_head *pos, *tmp;
|
|
+
|
|
+ list_for_each_safe(pos, tmp, head) {
|
|
+ pak = list_entry(pos, struct packet, next);
|
|
+ c += 1;
|
|
+
|
|
+ if (tpmif == NULL || pak->tpmif == tpmif) {
|
|
+ int can_send = 0;
|
|
+
|
|
+ del_singleshot_timer_sync(&pak->processing_timer);
|
|
+ list_del(&pak->next);
|
|
+
|
|
+ if (pak->tpmif && pak->tpmif->status == CONNECTED) {
|
|
+ can_send = 1;
|
|
+ }
|
|
+
|
|
+ if (send_msgs && can_send) {
|
|
+ tpm_send_fail_message(pak, pak->req_tag);
|
|
+ }
|
|
+ packet_free(pak);
|
|
+ if (c == 1)
|
|
+ aborted = 1;
|
|
+ }
|
|
+ }
|
|
+ return aborted;
|
|
+}
|
|
+
|
|
+int vtpm_release_packets(tpmif_t * tpmif, int send_msgs)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+
|
|
+ dataex.aborted = _vtpm_release_packets(&dataex.pending_pak,
|
|
+ tpmif,
|
|
+ send_msgs);
|
|
+ _vtpm_release_packets(&dataex.current_pak, tpmif, send_msgs);
|
|
+
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int vtpm_queue_packet(struct packet *pak)
|
|
+{
|
|
+ int rc = 0;
|
|
+
|
|
+ if (dataex.has_opener) {
|
|
+ unsigned long flags;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ list_add_tail(&pak->next, &dataex.pending_pak);
|
|
+ /* give the TPM some time to pick up the request */
|
|
+ mod_timer(&pak->processing_timer, jiffies + (30 * HZ));
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+
|
|
+ wake_up_interruptible(&dataex.wait_queue);
|
|
+ } else {
|
|
+ rc = -EFAULT;
|
|
+ }
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static int vtpm_receive(tpmif_t * tpmif, u32 size)
|
|
+{
|
|
+ int rc = 0;
|
|
+ unsigned char buffer[10];
|
|
+ __be32 *native_size;
|
|
+ struct packet *pak = packet_alloc(tpmif, size, 0, 0);
|
|
+
|
|
+ if (!pak)
|
|
+ return -ENOMEM;
|
|
+ /*
|
|
+ * Read 10 bytes from the received buffer to test its
|
|
+ * content for validity.
|
|
+ */
|
|
+ if (sizeof (buffer) != packet_read(pak,
|
|
+ sizeof (buffer), buffer,
|
|
+ sizeof (buffer), 0)) {
|
|
+ goto failexit;
|
|
+ }
|
|
+ /*
|
|
+ * Reset the packet read pointer so we can read all its
|
|
+ * contents again.
|
|
+ */
|
|
+ packet_reset(pak);
|
|
+
|
|
+ native_size = (__force __be32 *) (&buffer[4 + 2]);
|
|
+ /*
|
|
+ * Verify that the size of the packet is correct
|
|
+ * as indicated and that there's actually someone reading packets.
|
|
+ * The minimum size of the packet is '10' for tag, size indicator
|
|
+ * and ordinal.
|
|
+ */
|
|
+ if (size < 10 ||
|
|
+ be32_to_cpu(*native_size) != size ||
|
|
+ 0 == dataex.has_opener || tpmif->status != CONNECTED) {
|
|
+ rc = -EINVAL;
|
|
+ goto failexit;
|
|
+ } else {
|
|
+ rc = vtpm_queue_packet(pak);
|
|
+ if (rc < 0)
|
|
+ goto failexit;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+ failexit:
|
|
+ if (pak) {
|
|
+ tpm_send_fail_message(pak, buffer[4 + 1]);
|
|
+ packet_free(pak);
|
|
+ }
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Timeout function that gets invoked when a packet has not been processed
|
|
+ * during the timeout period.
|
|
+ * The packet must be on a list when this function is invoked. This
|
|
+ * also means that once its taken off a list, the timer must be
|
|
+ * destroyed as well.
|
|
+ */
|
|
+static void processing_timeout(unsigned long ptr)
|
|
+{
|
|
+ struct packet *pak = (struct packet *)ptr;
|
|
+ unsigned long flags;
|
|
+
|
|
+ write_lock_irqsave(&dataex.pak_lock, flags);
|
|
+ /*
|
|
+ * The packet needs to be searched whether it
|
|
+ * is still on the list.
|
|
+ */
|
|
+ if (pak == packet_find_packet(&dataex.pending_pak, pak) ||
|
|
+ pak == packet_find_packet(&dataex.current_pak, pak)) {
|
|
+ if ((pak->flags & PACKET_FLAG_DISCARD_RESPONSE) == 0) {
|
|
+ tpm_send_fail_message(pak, pak->req_tag);
|
|
+ }
|
|
+ /* discard future responses */
|
|
+ pak->flags |= PACKET_FLAG_DISCARD_RESPONSE;
|
|
+ }
|
|
+
|
|
+ write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
+}
|
|
+
|
|
+static void tpm_tx_action(unsigned long unused);
|
|
+static DECLARE_TASKLET(tpm_tx_tasklet, tpm_tx_action, 0);
|
|
+
|
|
+static struct list_head tpm_schedule_list;
|
|
+static spinlock_t tpm_schedule_list_lock;
|
|
+
|
|
+static inline void maybe_schedule_tx_action(void)
|
|
+{
|
|
+ smp_mb();
|
|
+ tasklet_schedule(&tpm_tx_tasklet);
|
|
+}
|
|
+
|
|
+static inline int __on_tpm_schedule_list(tpmif_t * tpmif)
|
|
+{
|
|
+ return tpmif->list.next != NULL;
|
|
+}
|
|
+
|
|
+static void remove_from_tpm_schedule_list(tpmif_t * tpmif)
|
|
+{
|
|
+ spin_lock_irq(&tpm_schedule_list_lock);
|
|
+ if (likely(__on_tpm_schedule_list(tpmif))) {
|
|
+ list_del(&tpmif->list);
|
|
+ tpmif->list.next = NULL;
|
|
+ tpmif_put(tpmif);
|
|
+ }
|
|
+ spin_unlock_irq(&tpm_schedule_list_lock);
|
|
+}
|
|
+
|
|
+static void add_to_tpm_schedule_list_tail(tpmif_t * tpmif)
|
|
+{
|
|
+ if (__on_tpm_schedule_list(tpmif))
|
|
+ return;
|
|
+
|
|
+ spin_lock_irq(&tpm_schedule_list_lock);
|
|
+ if (!__on_tpm_schedule_list(tpmif) && tpmif->active) {
|
|
+ list_add_tail(&tpmif->list, &tpm_schedule_list);
|
|
+ tpmif_get(tpmif);
|
|
+ }
|
|
+ spin_unlock_irq(&tpm_schedule_list_lock);
|
|
+}
|
|
+
|
|
+void tpmif_schedule_work(tpmif_t * tpmif)
|
|
+{
|
|
+ add_to_tpm_schedule_list_tail(tpmif);
|
|
+ maybe_schedule_tx_action();
|
|
+}
|
|
+
|
|
+void tpmif_deschedule_work(tpmif_t * tpmif)
|
|
+{
|
|
+ remove_from_tpm_schedule_list(tpmif);
|
|
+}
|
|
+
|
|
+static void tpm_tx_action(unsigned long unused)
|
|
+{
|
|
+ struct list_head *ent;
|
|
+ tpmif_t *tpmif;
|
|
+ tpmif_tx_request_t *tx;
|
|
+
|
|
+ DPRINTK("%s: Getting data from front-end(s)!\n", __FUNCTION__);
|
|
+
|
|
+ while (!list_empty(&tpm_schedule_list)) {
|
|
+ /* Get a tpmif from the list with work to do. */
|
|
+ ent = tpm_schedule_list.next;
|
|
+ tpmif = list_entry(ent, tpmif_t, list);
|
|
+ tpmif_get(tpmif);
|
|
+ remove_from_tpm_schedule_list(tpmif);
|
|
+
|
|
+ tx = &tpmif->tx->ring[0].req;
|
|
+
|
|
+ /* pass it up */
|
|
+ vtpm_receive(tpmif, tx->size);
|
|
+
|
|
+ tpmif_put(tpmif);
|
|
+ }
|
|
+}
|
|
+
|
|
+irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ tpmif_t *tpmif = (tpmif_t *) dev_id;
|
|
+
|
|
+ add_to_tpm_schedule_list_tail(tpmif);
|
|
+ maybe_schedule_tx_action();
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static int __init tpmback_init(void)
|
|
+{
|
|
+ int rc;
|
|
+
|
|
+ if ((rc = misc_register(&vtpms_miscdevice)) != 0) {
|
|
+ printk(KERN_ALERT
|
|
+ "Could not register misc device for TPM BE.\n");
|
|
+ return rc;
|
|
+ }
|
|
+
|
|
+ dataex_init(&dataex);
|
|
+
|
|
+ spin_lock_init(&tpm_schedule_list_lock);
|
|
+ INIT_LIST_HEAD(&tpm_schedule_list);
|
|
+
|
|
+ tpmif_interface_init();
|
|
+ tpmif_xenbus_init();
|
|
+
|
|
+ printk(KERN_ALERT "Successfully initialized TPM backend driver.\n");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+module_init(tpmback_init);
|
|
+
|
|
+void __exit tpmback_exit(void)
|
|
+{
|
|
+ vtpm_release_packets(NULL, 0);
|
|
+ tpmif_xenbus_exit();
|
|
+ tpmif_interface_exit();
|
|
+ misc_deregister(&vtpms_miscdevice);
|
|
+}
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/tpmback/xenbus.c 2008-03-06 08:54:32.000000000 +0100
|
|
@@ -0,0 +1,289 @@
|
|
+/* Xenbus code for tpmif backend
|
|
+ Copyright (C) 2005 IBM Corporation
|
|
+ Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
|
|
+
|
|
+ This program is free software; you can redistribute it and/or modify
|
|
+ it under the terms of the GNU General Public License as published by
|
|
+ the Free Software Foundation; either version 2 of the License, or
|
|
+ (at your option) any later version.
|
|
+
|
|
+ This program is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ GNU General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU General Public License
|
|
+ along with this program; if not, write to the Free Software
|
|
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
+*/
|
|
+#include <stdarg.h>
|
|
+#include <linux/module.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include "common.h"
|
|
+
|
|
+struct backend_info
|
|
+{
|
|
+ struct xenbus_device *dev;
|
|
+
|
|
+ /* our communications channel */
|
|
+ tpmif_t *tpmif;
|
|
+
|
|
+ long int frontend_id;
|
|
+ long int instance; // instance of TPM
|
|
+ u8 is_instance_set;// whether instance number has been set
|
|
+
|
|
+ /* watch front end for changes */
|
|
+ struct xenbus_watch backend_watch;
|
|
+};
|
|
+
|
|
+static void maybe_connect(struct backend_info *be);
|
|
+static void connect(struct backend_info *be);
|
|
+static int connect_ring(struct backend_info *be);
|
|
+static void backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len);
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state);
|
|
+
|
|
+long int tpmback_get_instance(struct backend_info *bi)
|
|
+{
|
|
+ long int res = -1;
|
|
+ if (bi && bi->is_instance_set)
|
|
+ res = bi->instance;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+static int tpmback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+
|
|
+ if (!be) return 0;
|
|
+
|
|
+ if (be->backend_watch.node) {
|
|
+ unregister_xenbus_watch(&be->backend_watch);
|
|
+ kfree(be->backend_watch.node);
|
|
+ be->backend_watch.node = NULL;
|
|
+ }
|
|
+ if (be->tpmif) {
|
|
+ be->tpmif->bi = NULL;
|
|
+ vtpm_release_packets(be->tpmif, 0);
|
|
+ tpmif_put(be->tpmif);
|
|
+ be->tpmif = NULL;
|
|
+ }
|
|
+ kfree(be);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int tpmback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct backend_info *be = kzalloc(sizeof(struct backend_info),
|
|
+ GFP_KERNEL);
|
|
+
|
|
+ if (!be) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM,
|
|
+ "allocating backend structure");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ be->is_instance_set = 0;
|
|
+ be->dev = dev;
|
|
+ dev->dev.driver_data = be;
|
|
+
|
|
+ err = xenbus_watch_path2(dev, dev->nodename,
|
|
+ "instance", &be->backend_watch,
|
|
+ backend_changed);
|
|
+ if (err) {
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err) {
|
|
+ goto fail;
|
|
+ }
|
|
+ return 0;
|
|
+fail:
|
|
+ tpmback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+
|
|
+static void backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ int err;
|
|
+ long instance;
|
|
+ struct backend_info *be
|
|
+ = container_of(watch, struct backend_info, backend_watch);
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
|
|
+ "instance","%li", &instance);
|
|
+ if (XENBUS_EXIST_ERR(err)) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading instance");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (be->is_instance_set == 0) {
|
|
+ be->instance = instance;
|
|
+ be->is_instance_set = 1;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ struct backend_info *be = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitialised:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ err = connect_ring(be);
|
|
+ if (err) {
|
|
+ return;
|
|
+ }
|
|
+ maybe_connect(be);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ be->instance = -1;
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateUnknown: /* keep it here */
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ device_unregister(&be->dev->dev);
|
|
+ tpmback_remove(dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL,
|
|
+ "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+
|
|
+static void maybe_connect(struct backend_info *be)
|
|
+{
|
|
+ if (be->tpmif == NULL || be->tpmif->status == CONNECTED)
|
|
+ return;
|
|
+
|
|
+ connect(be);
|
|
+}
|
|
+
|
|
+
|
|
+static void connect(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ready = 1;
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(be->dev, err, "starting transaction");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, be->dev->nodename,
|
|
+ "ready", "%lu", ready);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(be->dev, err, "writing 'ready'");
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(be->dev, err, "end of transaction");
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ if (!err)
|
|
+ be->tpmif->status = CONNECTED;
|
|
+ return;
|
|
+abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+}
|
|
+
|
|
+
|
|
+static int connect_ring(struct backend_info *be)
|
|
+{
|
|
+ struct xenbus_device *dev = be->dev;
|
|
+ unsigned long ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ int err;
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "ring-ref", "%lu", &ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ if (!be->tpmif) {
|
|
+ be->tpmif = tpmif_find(dev->otherend_id, be);
|
|
+ if (IS_ERR(be->tpmif)) {
|
|
+ err = PTR_ERR(be->tpmif);
|
|
+ be->tpmif = NULL;
|
|
+ xenbus_dev_fatal(dev,err,"creating vtpm interface");
|
|
+ return err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (be->tpmif != NULL) {
|
|
+ err = tpmif_map(be->tpmif, ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_error(dev, err,
|
|
+ "mapping shared-frame %lu port %u",
|
|
+ ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static const struct xenbus_device_id tpmback_ids[] = {
|
|
+ { "vtpm" },
|
|
+ { "" }
|
|
+};
|
|
+
|
|
+
|
|
+static struct xenbus_driver tpmback = {
|
|
+ .name = "vtpm",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = tpmback_ids,
|
|
+ .probe = tpmback_probe,
|
|
+ .remove = tpmback_remove,
|
|
+ .otherend_changed = frontend_changed,
|
|
+};
|
|
+
|
|
+
|
|
+void tpmif_xenbus_init(void)
|
|
+{
|
|
+ xenbus_register_backend(&tpmback);
|
|
+}
|
|
+
|
|
+void tpmif_xenbus_exit(void)
|
|
+{
|
|
+ xenbus_unregister_driver(&tpmback);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/Makefile 2009-04-07 13:58:49.000000000 +0200
|
|
@@ -0,0 +1,4 @@
|
|
+obj-$(CONFIG_XEN_USB_BACKEND) := usbbk.o
|
|
+
|
|
+usbbk-y := usbstub.o xenbus.o interface.o usbback.o
|
|
+
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/interface.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,248 @@
|
|
+/*
|
|
+ * interface.c
|
|
+ *
|
|
+ * Xen USB backend interface management.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/delay.h>
|
|
+#include "usbback.h"
|
|
+
|
|
+static LIST_HEAD(usbif_list);
|
|
+static DEFINE_SPINLOCK(usbif_list_lock);
|
|
+
|
|
+usbif_t *find_usbif(domid_t domid, unsigned int handle)
|
|
+{
|
|
+ usbif_t *usbif;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&usbif_list_lock, flags);
|
|
+ list_for_each_entry(usbif, &usbif_list, usbif_list) {
|
|
+ if (usbif->domid == domid
|
|
+ && usbif->handle == handle) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&usbif_list_lock, flags);
|
|
+
|
|
+ if (found)
|
|
+ return usbif;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+usbif_t *usbif_alloc(domid_t domid, unsigned int handle)
|
|
+{
|
|
+ usbif_t *usbif;
|
|
+ unsigned long flags;
|
|
+ int i;
|
|
+
|
|
+ usbif = kzalloc(sizeof(usbif_t), GFP_KERNEL);
|
|
+ if (!usbif)
|
|
+ return NULL;
|
|
+
|
|
+ usbif->domid = domid;
|
|
+ usbif->handle = handle;
|
|
+ spin_lock_init(&usbif->urb_ring_lock);
|
|
+ spin_lock_init(&usbif->conn_ring_lock);
|
|
+ atomic_set(&usbif->refcnt, 0);
|
|
+ init_waitqueue_head(&usbif->wq);
|
|
+ init_waitqueue_head(&usbif->waiting_to_free);
|
|
+ spin_lock_init(&usbif->stub_lock);
|
|
+ INIT_LIST_HEAD(&usbif->stub_list);
|
|
+ spin_lock_init(&usbif->addr_lock);
|
|
+ for (i = 0; i < USB_DEV_ADDR_SIZE; i++)
|
|
+ usbif->addr_table[i] = NULL;
|
|
+
|
|
+ spin_lock_irqsave(&usbif_list_lock, flags);
|
|
+ list_add(&usbif->usbif_list, &usbif_list);
|
|
+ spin_unlock_irqrestore(&usbif_list_lock, flags);
|
|
+
|
|
+ return usbif;
|
|
+}
|
|
+
|
|
+static int map_frontend_pages(usbif_t *usbif,
|
|
+ grant_ref_t urb_ring_ref,
|
|
+ grant_ref_t conn_ring_ref)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)usbif->urb_ring_area->addr,
|
|
+ GNTMAP_host_map, urb_ring_ref, usbif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ printk(KERN_ERR "grant table failure mapping urb_ring_ref %d\n", (int)op.status);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ usbif->urb_shmem_ref = urb_ring_ref;
|
|
+ usbif->urb_shmem_handle = op.handle;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)usbif->conn_ring_area->addr,
|
|
+ GNTMAP_host_map, conn_ring_ref, usbif->domid);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ struct gnttab_unmap_grant_ref unop;
|
|
+ gnttab_set_unmap_op(&unop,
|
|
+ (unsigned long) usbif->urb_ring_area->addr,
|
|
+ GNTMAP_host_map, usbif->urb_shmem_handle);
|
|
+ VOID(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop,
|
|
+ 1));
|
|
+ printk(KERN_ERR "grant table failure mapping conn_ring_ref %d\n", (int)op.status);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ usbif->conn_shmem_ref = conn_ring_ref;
|
|
+ usbif->conn_shmem_handle = op.handle;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void unmap_frontend_pages(usbif_t *usbif)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)usbif->urb_ring_area->addr,
|
|
+ GNTMAP_host_map, usbif->urb_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)usbif->conn_ring_area->addr,
|
|
+ GNTMAP_host_map, usbif->conn_shmem_handle);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int usbif_map(usbif_t *usbif, unsigned long urb_ring_ref,
|
|
+ unsigned long conn_ring_ref, unsigned int evtchn)
|
|
+{
|
|
+ int err = -ENOMEM;
|
|
+
|
|
+ usbif_urb_sring_t *urb_sring;
|
|
+ usbif_conn_sring_t *conn_sring;
|
|
+
|
|
+ if (usbif->irq)
|
|
+ return 0;
|
|
+
|
|
+ if ((usbif->urb_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL)
|
|
+ return err;
|
|
+ if ((usbif->conn_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL)
|
|
+ goto fail_alloc;
|
|
+
|
|
+ err = map_frontend_pages(usbif, urb_ring_ref, conn_ring_ref);
|
|
+ if (err)
|
|
+ goto fail_map;
|
|
+
|
|
+ err = bind_interdomain_evtchn_to_irqhandler(
|
|
+ usbif->domid, evtchn, usbbk_be_int, 0,
|
|
+ "usbif-backend", usbif);
|
|
+ if (err < 0)
|
|
+ goto fail_evtchn;
|
|
+ usbif->irq = err;
|
|
+
|
|
+ urb_sring = (usbif_urb_sring_t *) usbif->urb_ring_area->addr;
|
|
+ BACK_RING_INIT(&usbif->urb_ring, urb_sring, PAGE_SIZE);
|
|
+
|
|
+ conn_sring = (usbif_conn_sring_t *) usbif->conn_ring_area->addr;
|
|
+ BACK_RING_INIT(&usbif->conn_ring, conn_sring, PAGE_SIZE);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_evtchn:
|
|
+ unmap_frontend_pages(usbif);
|
|
+fail_map:
|
|
+ free_vm_area(usbif->conn_ring_area);
|
|
+fail_alloc:
|
|
+ free_vm_area(usbif->urb_ring_area);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void usbif_disconnect(usbif_t *usbif)
|
|
+{
|
|
+ struct usbstub *stub, *tmp;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (usbif->xenusbd) {
|
|
+ kthread_stop(usbif->xenusbd);
|
|
+ usbif->xenusbd = NULL;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&usbif->stub_lock, flags);
|
|
+ list_for_each_entry_safe(stub, tmp, &usbif->stub_list, dev_list) {
|
|
+ usbbk_unlink_urbs(stub);
|
|
+ detach_device_without_lock(usbif, stub);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&usbif->stub_lock, flags);
|
|
+
|
|
+ wait_event(usbif->waiting_to_free, atomic_read(&usbif->refcnt) == 0);
|
|
+
|
|
+ if (usbif->irq) {
|
|
+ unbind_from_irqhandler(usbif->irq, usbif);
|
|
+ usbif->irq = 0;
|
|
+ }
|
|
+
|
|
+ if (usbif->urb_ring.sring) {
|
|
+ unmap_frontend_pages(usbif);
|
|
+ free_vm_area(usbif->urb_ring_area);
|
|
+ free_vm_area(usbif->conn_ring_area);
|
|
+ usbif->urb_ring.sring = NULL;
|
|
+ usbif->conn_ring.sring = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void usbif_free(usbif_t *usbif)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&usbif_list_lock, flags);
|
|
+ list_del(&usbif->usbif_list);
|
|
+ spin_unlock_irqrestore(&usbif_list_lock, flags);
|
|
+ kfree(usbif);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/usbback.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,1159 @@
|
|
+/*
|
|
+ * usbback.c
|
|
+ *
|
|
+ * Xen USB backend driver
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/mm.h>
|
|
+#include <xen/balloon.h>
|
|
+#include "usbback.h"
|
|
+
|
|
+#if 0
|
|
+#include "../../usb/core/hub.h"
|
|
+#endif
|
|
+
|
|
+int usbif_reqs = USBIF_BACK_MAX_PENDING_REQS;
|
|
+module_param_named(reqs, usbif_reqs, int, 0);
|
|
+MODULE_PARM_DESC(reqs, "Number of usbback requests to allocate");
|
|
+
|
|
+struct pending_req_segment {
|
|
+ uint16_t offset;
|
|
+ uint16_t length;
|
|
+};
|
|
+
|
|
+typedef struct {
|
|
+ usbif_t *usbif;
|
|
+
|
|
+ uint16_t id; /* request id */
|
|
+
|
|
+ struct usbstub *stub;
|
|
+ struct list_head urb_list;
|
|
+
|
|
+ /* urb */
|
|
+ struct urb *urb;
|
|
+ void *buffer;
|
|
+ dma_addr_t transfer_dma;
|
|
+ struct usb_ctrlrequest *setup;
|
|
+ dma_addr_t setup_dma;
|
|
+
|
|
+ /* request segments */
|
|
+ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
|
|
+ uint16_t nr_extra_segs; /* number of iso_frame_desc segments (ISO) */
|
|
+ struct pending_req_segment *seg;
|
|
+
|
|
+ struct list_head free_list;
|
|
+} pending_req_t;
|
|
+
|
|
+static pending_req_t *pending_reqs;
|
|
+static struct list_head pending_free;
|
|
+static DEFINE_SPINLOCK(pending_free_lock);
|
|
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
|
|
+
|
|
+#define USBBACK_INVALID_HANDLE (~0)
|
|
+
|
|
+static struct page **pending_pages;
|
|
+static grant_handle_t *pending_grant_handles;
|
|
+
|
|
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
|
|
+{
|
|
+ return (req - pending_reqs) * USBIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
|
+}
|
|
+
|
|
+static inline unsigned long vaddr(pending_req_t *req, int seg)
|
|
+{
|
|
+ unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]);
|
|
+ return (unsigned long)pfn_to_kaddr(pfn);
|
|
+}
|
|
+
|
|
+#define pending_handle(_req, _seg) \
|
|
+ (pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
|
+
|
|
+static pending_req_t *alloc_req(void)
|
|
+{
|
|
+ pending_req_t *req = NULL;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ if (!list_empty(&pending_free)) {
|
|
+ req = list_entry(pending_free.next, pending_req_t, free_list);
|
|
+ list_del(&req->free_list);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ return req;
|
|
+}
|
|
+
|
|
+static void free_req(pending_req_t *req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int was_empty;
|
|
+
|
|
+ spin_lock_irqsave(&pending_free_lock, flags);
|
|
+ was_empty = list_empty(&pending_free);
|
|
+ list_add(&req->free_list, &pending_free);
|
|
+ spin_unlock_irqrestore(&pending_free_lock, flags);
|
|
+ if (was_empty)
|
|
+ wake_up(&pending_free_wq);
|
|
+}
|
|
+
|
|
+static inline void add_req_to_submitting_list(struct usbstub *stub, pending_req_t *pending_req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&stub->submitting_lock, flags);
|
|
+ list_add_tail(&pending_req->urb_list, &stub->submitting_list);
|
|
+ spin_unlock_irqrestore(&stub->submitting_lock, flags);
|
|
+}
|
|
+
|
|
+static inline void remove_req_from_submitting_list(struct usbstub *stub, pending_req_t *pending_req)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&stub->submitting_lock, flags);
|
|
+ list_del_init(&pending_req->urb_list);
|
|
+ spin_unlock_irqrestore(&stub->submitting_lock, flags);
|
|
+}
|
|
+
|
|
+void usbbk_unlink_urbs(struct usbstub *stub)
|
|
+{
|
|
+ pending_req_t *req, *tmp;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&stub->submitting_lock, flags);
|
|
+ list_for_each_entry_safe(req, tmp, &stub->submitting_list, urb_list) {
|
|
+ usb_unlink_urb(req->urb);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&stub->submitting_lock, flags);
|
|
+}
|
|
+
|
|
+static void fast_flush_area(pending_req_t *pending_req)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref unmap[USBIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+ unsigned int i, nr_segs, invcount = 0;
|
|
+ grant_handle_t handle;
|
|
+ int ret;
|
|
+
|
|
+ nr_segs = pending_req->nr_buffer_segs + pending_req->nr_extra_segs;
|
|
+
|
|
+ if (nr_segs) {
|
|
+ for (i = 0; i < nr_segs; i++) {
|
|
+ handle = pending_handle(pending_req, i);
|
|
+ if (handle == USBBACK_INVALID_HANDLE)
|
|
+ continue;
|
|
+ gnttab_set_unmap_op(&unmap[invcount], vaddr(pending_req, i),
|
|
+ GNTMAP_host_map, handle);
|
|
+ pending_handle(pending_req, i) = USBBACK_INVALID_HANDLE;
|
|
+ invcount++;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(
|
|
+ GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ kfree(pending_req->seg);
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+static void copy_buff_to_pages(void *buff, pending_req_t *pending_req,
|
|
+ int start, int nr_pages)
|
|
+{
|
|
+ unsigned long copied = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = start; i < start + nr_pages; i++) {
|
|
+ memcpy((void *) vaddr(pending_req, i) + pending_req->seg[i].offset,
|
|
+ buff + copied,
|
|
+ pending_req->seg[i].length);
|
|
+ copied += pending_req->seg[i].length;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void copy_pages_to_buff(void *buff, pending_req_t *pending_req,
|
|
+ int start, int nr_pages)
|
|
+{
|
|
+ unsigned long copied = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = start; i < start + nr_pages; i++) {
|
|
+ memcpy(buff + copied,
|
|
+ (void *) vaddr(pending_req, i) + pending_req->seg[i].offset,
|
|
+ pending_req->seg[i].length);
|
|
+ copied += pending_req->seg[i].length;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int usbbk_alloc_urb(usbif_urb_request_t *req, pending_req_t *pending_req)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ if (usb_pipeisoc(req->pipe))
|
|
+ pending_req->urb = usb_alloc_urb(req->u.isoc.number_of_packets, GFP_KERNEL);
|
|
+ else
|
|
+ pending_req->urb = usb_alloc_urb(0, GFP_KERNEL);
|
|
+ if (!pending_req->urb) {
|
|
+ printk(KERN_ERR "usbback: can't alloc urb\n");
|
|
+ ret = -ENOMEM;
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (req->buffer_length) {
|
|
+ pending_req->buffer = usb_buffer_alloc(pending_req->stub->udev,
|
|
+ req->buffer_length, GFP_KERNEL,
|
|
+ &pending_req->transfer_dma);
|
|
+ if (!pending_req->buffer) {
|
|
+ printk(KERN_ERR "usbback: can't alloc urb buffer\n");
|
|
+ ret = -ENOMEM;
|
|
+ goto fail_free_urb;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (usb_pipecontrol(req->pipe)) {
|
|
+ pending_req->setup = usb_buffer_alloc(pending_req->stub->udev,
|
|
+ sizeof(struct usb_ctrlrequest), GFP_KERNEL,
|
|
+ &pending_req->setup_dma);
|
|
+ if (!pending_req->setup) {
|
|
+ printk(KERN_ERR "usbback: can't alloc usb_ctrlrequest\n");
|
|
+ ret = -ENOMEM;
|
|
+ goto fail_free_buffer;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_free_buffer:
|
|
+ if (req->buffer_length)
|
|
+ usb_buffer_free(pending_req->stub->udev, req->buffer_length,
|
|
+ pending_req->buffer, pending_req->transfer_dma);
|
|
+fail_free_urb:
|
|
+ usb_free_urb(pending_req->urb);
|
|
+fail:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void usbbk_free_urb(struct urb *urb)
|
|
+{
|
|
+ if (usb_pipecontrol(urb->pipe))
|
|
+ usb_buffer_free(urb->dev, sizeof(struct usb_ctrlrequest),
|
|
+ urb->setup_packet, urb->setup_dma);
|
|
+ if (urb->transfer_buffer_length)
|
|
+ usb_buffer_free(urb->dev, urb->transfer_buffer_length,
|
|
+ urb->transfer_buffer, urb->transfer_dma);
|
|
+ barrier();
|
|
+ usb_free_urb(urb);
|
|
+}
|
|
+
|
|
+static void usbbk_notify_work(usbif_t *usbif)
|
|
+{
|
|
+ usbif->waiting_reqs = 1;
|
|
+ wake_up(&usbif->wq);
|
|
+}
|
|
+
|
|
+irqreturn_t usbbk_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+{
|
|
+ usbbk_notify_work(dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+static void usbbk_do_response(pending_req_t *pending_req, int32_t status,
|
|
+ int32_t actual_length, int32_t error_count, uint16_t start_frame)
|
|
+{
|
|
+ usbif_t *usbif = pending_req->usbif;
|
|
+ usbif_urb_response_t *res;
|
|
+ unsigned long flags;
|
|
+ int notify;
|
|
+
|
|
+ spin_lock_irqsave(&usbif->urb_ring_lock, flags);
|
|
+ res = RING_GET_RESPONSE(&usbif->urb_ring, usbif->urb_ring.rsp_prod_pvt);
|
|
+ res->id = pending_req->id;
|
|
+ res->status = status;
|
|
+ res->actual_length = actual_length;
|
|
+ res->error_count = error_count;
|
|
+ res->start_frame = start_frame;
|
|
+ usbif->urb_ring.rsp_prod_pvt++;
|
|
+ barrier();
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&usbif->urb_ring, notify);
|
|
+ spin_unlock_irqrestore(&usbif->urb_ring_lock, flags);
|
|
+
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(usbif->irq);
|
|
+}
|
|
+
|
|
+static void usbbk_urb_complete(struct urb *urb, struct pt_regs *regs)
|
|
+{
|
|
+ pending_req_t *pending_req = (pending_req_t *)urb->context;
|
|
+
|
|
+ if (usb_pipein(urb->pipe) && urb->status == 0 && urb->actual_length > 0)
|
|
+ copy_buff_to_pages(pending_req->buffer, pending_req,
|
|
+ 0, pending_req->nr_buffer_segs);
|
|
+
|
|
+ if (usb_pipeisoc(urb->pipe))
|
|
+ copy_buff_to_pages(&urb->iso_frame_desc[0], pending_req,
|
|
+ pending_req->nr_buffer_segs, pending_req->nr_extra_segs);
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ fast_flush_area(pending_req);
|
|
+
|
|
+ usbbk_do_response(pending_req, urb->status, urb->actual_length,
|
|
+ urb->error_count, urb->start_frame);
|
|
+
|
|
+ remove_req_from_submitting_list(pending_req->stub, pending_req);
|
|
+
|
|
+ barrier();
|
|
+ usbbk_free_urb(urb);
|
|
+ usbif_put(pending_req->usbif);
|
|
+ free_req(pending_req);
|
|
+}
|
|
+
|
|
+static int usbbk_gnttab_map(usbif_t *usbif,
|
|
+ usbif_urb_request_t *req, pending_req_t *pending_req)
|
|
+{
|
|
+ int i, ret;
|
|
+ unsigned int nr_segs;
|
|
+ uint32_t flags;
|
|
+ struct gnttab_map_grant_ref map[USBIF_MAX_SEGMENTS_PER_REQUEST];
|
|
+
|
|
+ nr_segs = pending_req->nr_buffer_segs + pending_req->nr_extra_segs;
|
|
+
|
|
+ if (nr_segs > USBIF_MAX_SEGMENTS_PER_REQUEST) {
|
|
+ printk(KERN_ERR "Bad number of segments in request\n");
|
|
+ ret = -EINVAL;
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (nr_segs) {
|
|
+ pending_req->seg = kmalloc(sizeof(struct pending_req_segment)
|
|
+ * nr_segs, GFP_KERNEL);
|
|
+ if (!pending_req->seg) {
|
|
+ ret = -ENOMEM;
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ if (pending_req->nr_buffer_segs) {
|
|
+ flags = GNTMAP_host_map;
|
|
+ if (usb_pipeout(req->pipe))
|
|
+ flags |= GNTMAP_readonly;
|
|
+ for (i = 0; i < pending_req->nr_buffer_segs; i++)
|
|
+ gnttab_set_map_op(&map[i], vaddr(
|
|
+ pending_req, i), flags,
|
|
+ req->seg[i].gref,
|
|
+ usbif->domid);
|
|
+ }
|
|
+
|
|
+ if (pending_req->nr_extra_segs) {
|
|
+ flags = GNTMAP_host_map;
|
|
+ for (i = req->nr_buffer_segs; i < nr_segs; i++)
|
|
+ gnttab_set_map_op(&map[i], vaddr(
|
|
+ pending_req, i), flags,
|
|
+ req->seg[i].gref,
|
|
+ usbif->domid);
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
|
|
+ map, nr_segs);
|
|
+ BUG_ON(ret);
|
|
+
|
|
+ for (i = 0; i < nr_segs; i++) {
|
|
+ /* Make sure than none of the map ops failed with GNTST_eagain */
|
|
+ if (unlikely(map[i].status == GNTST_eagain))
|
|
+ gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]);
|
|
+
|
|
+ if (unlikely(map[i].status != GNTST_okay)) {
|
|
+ printk(KERN_ERR "usbback: invalid buffer -- could not remap it\n");
|
|
+ map[i].handle = USBBACK_INVALID_HANDLE;
|
|
+ ret |= 1;
|
|
+ }
|
|
+
|
|
+ pending_handle(pending_req, i) = map[i].handle;
|
|
+
|
|
+ if (ret)
|
|
+ continue;
|
|
+
|
|
+ set_phys_to_machine(__pa(vaddr(
|
|
+ pending_req, i)) >> PAGE_SHIFT,
|
|
+ FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
|
|
+
|
|
+ pending_req->seg[i].offset = req->seg[i].offset;
|
|
+ pending_req->seg[i].length = req->seg[i].length;
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ if (pending_req->seg[i].offset >= PAGE_SIZE ||
|
|
+ pending_req->seg[i].length > PAGE_SIZE ||
|
|
+ pending_req->seg[i].offset + pending_req->seg[i].length > PAGE_SIZE)
|
|
+ ret |= 1;
|
|
+ }
|
|
+
|
|
+ if (ret)
|
|
+ goto fail_flush;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail_flush:
|
|
+ fast_flush_area(pending_req);
|
|
+ ret = -ENOMEM;
|
|
+
|
|
+fail:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void usbbk_init_urb(usbif_urb_request_t *req, pending_req_t *pending_req)
|
|
+{
|
|
+ unsigned int pipe;
|
|
+ struct usb_device *udev = pending_req->stub->udev;
|
|
+ struct urb *urb = pending_req->urb;
|
|
+
|
|
+ switch (usb_pipetype(req->pipe)) {
|
|
+ case PIPE_ISOCHRONOUS:
|
|
+ if (usb_pipein(req->pipe))
|
|
+ pipe = usb_rcvisocpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+ else
|
|
+ pipe = usb_sndisocpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+
|
|
+ urb->dev = udev;
|
|
+ urb->pipe = pipe;
|
|
+ urb->transfer_flags = req->transfer_flags;
|
|
+ urb->transfer_flags |= URB_ISO_ASAP;
|
|
+ urb->transfer_buffer = pending_req->buffer;
|
|
+ urb->transfer_buffer_length = req->buffer_length;
|
|
+ urb->complete = usbbk_urb_complete;
|
|
+ urb->context = pending_req;
|
|
+ urb->interval = req->u.isoc.interval;
|
|
+ urb->start_frame = req->u.isoc.start_frame;
|
|
+ urb->number_of_packets = req->u.isoc.number_of_packets;
|
|
+
|
|
+ break;
|
|
+ case PIPE_INTERRUPT:
|
|
+ if (usb_pipein(req->pipe))
|
|
+ pipe = usb_rcvintpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+ else
|
|
+ pipe = usb_sndintpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+
|
|
+ usb_fill_int_urb(urb, udev, pipe,
|
|
+ pending_req->buffer, req->buffer_length,
|
|
+ usbbk_urb_complete,
|
|
+ pending_req, req->u.intr.interval);
|
|
+ /*
|
|
+ * high speed interrupt endpoints use a logarithmic encoding of
|
|
+ * the endpoint interval, and usb_fill_int_urb() initializes a
|
|
+ * interrupt urb with the encoded interval value.
|
|
+ *
|
|
+ * req->u.intr.interval is the interval value that already
|
|
+ * encoded in the frontend part, and the above usb_fill_int_urb()
|
|
+ * initializes the urb->interval with double encoded value.
|
|
+ *
|
|
+ * so, simply overwrite the urb->interval with original value.
|
|
+ */
|
|
+ urb->interval = req->u.intr.interval;
|
|
+ urb->transfer_flags = req->transfer_flags;
|
|
+
|
|
+ break;
|
|
+ case PIPE_CONTROL:
|
|
+ if (usb_pipein(req->pipe))
|
|
+ pipe = usb_rcvctrlpipe(udev, 0);
|
|
+ else
|
|
+ pipe = usb_sndctrlpipe(udev, 0);
|
|
+
|
|
+ usb_fill_control_urb(urb, udev, pipe,
|
|
+ (unsigned char *) pending_req->setup,
|
|
+ pending_req->buffer, req->buffer_length,
|
|
+ usbbk_urb_complete, pending_req);
|
|
+ memcpy(pending_req->setup, req->u.ctrl, 8);
|
|
+ urb->setup_dma = pending_req->setup_dma;
|
|
+ urb->transfer_flags = req->transfer_flags;
|
|
+ urb->transfer_flags |= URB_NO_SETUP_DMA_MAP;
|
|
+
|
|
+ break;
|
|
+ case PIPE_BULK:
|
|
+ if (usb_pipein(req->pipe))
|
|
+ pipe = usb_rcvbulkpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+ else
|
|
+ pipe = usb_sndbulkpipe(udev, usb_pipeendpoint(req->pipe));
|
|
+
|
|
+ usb_fill_bulk_urb(urb, udev, pipe,
|
|
+ pending_req->buffer, req->buffer_length,
|
|
+ usbbk_urb_complete, pending_req);
|
|
+ urb->transfer_flags = req->transfer_flags;
|
|
+
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (req->buffer_length) {
|
|
+ urb->transfer_dma = pending_req->transfer_dma;
|
|
+ urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
|
|
+ }
|
|
+}
|
|
+
|
|
+struct set_interface_request {
|
|
+ pending_req_t *pending_req;
|
|
+ int interface;
|
|
+ int alternate;
|
|
+ struct work_struct work;
|
|
+};
|
|
+
|
|
+static void usbbk_set_interface_work(void *data)
|
|
+{
|
|
+ struct set_interface_request *req = (struct set_interface_request *) data;
|
|
+ pending_req_t *pending_req = req->pending_req;
|
|
+ struct usb_device *udev = req->pending_req->stub->udev;
|
|
+
|
|
+ int ret;
|
|
+
|
|
+ usb_lock_device(udev);
|
|
+ ret = usb_set_interface(udev, req->interface, req->alternate);
|
|
+ usb_unlock_device(udev);
|
|
+ usb_put_dev(udev);
|
|
+
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(pending_req->usbif);
|
|
+ free_req(pending_req);
|
|
+ kfree(req);
|
|
+}
|
|
+
|
|
+static int usbbk_set_interface(pending_req_t *pending_req, int interface, int alternate)
|
|
+{
|
|
+ struct set_interface_request *req;
|
|
+ struct usb_device *udev = pending_req->stub->udev;
|
|
+
|
|
+ req = kmalloc(sizeof(*req), GFP_KERNEL);
|
|
+ if (!req)
|
|
+ return -ENOMEM;
|
|
+ req->pending_req = pending_req;
|
|
+ req->interface = interface;
|
|
+ req->alternate = alternate;
|
|
+ INIT_WORK(&req->work, usbbk_set_interface_work, req);
|
|
+ usb_get_dev(udev);
|
|
+ schedule_work(&req->work);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+struct clear_halt_request {
|
|
+ pending_req_t *pending_req;
|
|
+ int pipe;
|
|
+ struct work_struct work;
|
|
+};
|
|
+
|
|
+static void usbbk_clear_halt_work(void *data)
|
|
+{
|
|
+ struct clear_halt_request *req = (struct clear_halt_request *) data;
|
|
+ pending_req_t *pending_req = req->pending_req;
|
|
+ struct usb_device *udev = req->pending_req->stub->udev;
|
|
+ int ret;
|
|
+
|
|
+ usb_lock_device(udev);
|
|
+ ret = usb_clear_halt(req->pending_req->stub->udev, req->pipe);
|
|
+ usb_unlock_device(udev);
|
|
+ usb_put_dev(udev);
|
|
+
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(pending_req->usbif);
|
|
+ free_req(pending_req);
|
|
+ kfree(req);
|
|
+}
|
|
+
|
|
+static int usbbk_clear_halt(pending_req_t *pending_req, int pipe)
|
|
+{
|
|
+ struct clear_halt_request *req;
|
|
+ struct usb_device *udev = pending_req->stub->udev;
|
|
+
|
|
+ req = kmalloc(sizeof(*req), GFP_KERNEL);
|
|
+ if (!req)
|
|
+ return -ENOMEM;
|
|
+ req->pending_req = pending_req;
|
|
+ req->pipe = pipe;
|
|
+ INIT_WORK(&req->work, usbbk_clear_halt_work, req);
|
|
+
|
|
+ usb_get_dev(udev);
|
|
+ schedule_work(&req->work);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#if 0
|
|
+struct port_reset_request {
|
|
+ pending_req_t *pending_req;
|
|
+ struct work_struct work;
|
|
+};
|
|
+
|
|
+static void usbbk_port_reset_work(void *data)
|
|
+{
|
|
+ struct port_reset_request *req = (struct port_reset_request *) data;
|
|
+ pending_req_t *pending_req = req->pending_req;
|
|
+ struct usb_device *udev = pending_req->stub->udev;
|
|
+ int ret, ret_lock;
|
|
+
|
|
+ ret = ret_lock = usb_lock_device_for_reset(udev, NULL);
|
|
+ if (ret_lock >= 0) {
|
|
+ ret = usb_reset_device(udev);
|
|
+ if (ret_lock)
|
|
+ usb_unlock_device(udev);
|
|
+ }
|
|
+ usb_put_dev(udev);
|
|
+
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(pending_req->usbif);
|
|
+ free_req(pending_req);
|
|
+ kfree(req);
|
|
+}
|
|
+
|
|
+static int usbbk_port_reset(pending_req_t *pending_req)
|
|
+{
|
|
+ struct port_reset_request *req;
|
|
+ struct usb_device *udev = pending_req->stub->udev;
|
|
+
|
|
+ req = kmalloc(sizeof(*req), GFP_KERNEL);
|
|
+ if (!req)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ req->pending_req = pending_req;
|
|
+ INIT_WORK(&req->work, usbbk_port_reset_work, req);
|
|
+
|
|
+ usb_get_dev(udev);
|
|
+ schedule_work(&req->work);
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void usbbk_set_address(usbif_t *usbif, struct usbstub *stub, int cur_addr, int new_addr)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&usbif->addr_lock, flags);
|
|
+ if (cur_addr)
|
|
+ usbif->addr_table[cur_addr] = NULL;
|
|
+ if (new_addr)
|
|
+ usbif->addr_table[new_addr] = stub;
|
|
+ stub->addr = new_addr;
|
|
+ spin_unlock_irqrestore(&usbif->addr_lock, flags);
|
|
+}
|
|
+
|
|
+struct usbstub *find_attached_device(usbif_t *usbif, int portnum)
|
|
+{
|
|
+ struct usbstub *stub;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&usbif->stub_lock, flags);
|
|
+ list_for_each_entry(stub, &usbif->stub_list, dev_list) {
|
|
+ if (stub->portid->portnum == portnum) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&usbif->stub_lock, flags);
|
|
+
|
|
+ if (found)
|
|
+ return stub;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static void process_unlink_req(usbif_t *usbif,
|
|
+ usbif_urb_request_t *req, pending_req_t *pending_req)
|
|
+{
|
|
+ pending_req_t *unlink_req = NULL;
|
|
+ int devnum;
|
|
+ int ret = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ devnum = usb_pipedevice(req->pipe);
|
|
+ if (unlikely(devnum == 0)) {
|
|
+ pending_req->stub = find_attached_device(usbif, usbif_pipeportnum(req->pipe));
|
|
+ if (unlikely(!pending_req->stub)) {
|
|
+ ret = -ENODEV;
|
|
+ goto fail_response;
|
|
+ }
|
|
+ } else {
|
|
+ if (unlikely(!usbif->addr_table[devnum])) {
|
|
+ ret = -ENODEV;
|
|
+ goto fail_response;
|
|
+ }
|
|
+ pending_req->stub = usbif->addr_table[devnum];
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&pending_req->stub->submitting_lock, flags);
|
|
+ list_for_each_entry(unlink_req, &pending_req->stub->submitting_list, urb_list) {
|
|
+ if (unlink_req->id == req->u.unlink.unlink_id) {
|
|
+ ret = usb_unlink_urb(unlink_req->urb);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pending_req->stub->submitting_lock, flags);
|
|
+
|
|
+fail_response:
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(usbif);
|
|
+ free_req(pending_req);
|
|
+ return;
|
|
+}
|
|
+
|
|
+static int check_and_submit_special_ctrlreq(usbif_t *usbif,
|
|
+ usbif_urb_request_t *req, pending_req_t *pending_req)
|
|
+{
|
|
+ int devnum;
|
|
+ struct usbstub *stub = NULL;
|
|
+ struct usb_ctrlrequest *ctrl = (struct usb_ctrlrequest *) req->u.ctrl;
|
|
+ int ret;
|
|
+ int done = 0;
|
|
+
|
|
+ devnum = usb_pipedevice(req->pipe);
|
|
+
|
|
+ /*
|
|
+ * When the device is first connected or reseted, USB device has no address.
|
|
+ * In this initial state, following requests are send to device address (#0),
|
|
+ *
|
|
+ * 1. GET_DESCRIPTOR (with Descriptor Type is "DEVICE") is send,
|
|
+ * and OS knows what device is connected to.
|
|
+ *
|
|
+ * 2. SET_ADDRESS is send, and then, device has its address.
|
|
+ *
|
|
+ * In the next step, SET_CONFIGURATION is send to addressed device, and then,
|
|
+ * the device is finally ready to use.
|
|
+ */
|
|
+ if (unlikely(devnum == 0)) {
|
|
+ stub = find_attached_device(usbif, usbif_pipeportnum(req->pipe));
|
|
+ if (unlikely(!stub)) {
|
|
+ ret = -ENODEV;
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ switch (ctrl->bRequest) {
|
|
+ case USB_REQ_GET_DESCRIPTOR:
|
|
+ /*
|
|
+ * GET_DESCRIPTOR request to device #0.
|
|
+ * through to normal urb transfer.
|
|
+ */
|
|
+ pending_req->stub = stub;
|
|
+ return 0;
|
|
+ break;
|
|
+ case USB_REQ_SET_ADDRESS:
|
|
+ /*
|
|
+ * SET_ADDRESS request to device #0.
|
|
+ * add attached device to addr_table.
|
|
+ */
|
|
+ {
|
|
+ __u16 addr = le16_to_cpu(ctrl->wValue);
|
|
+ usbbk_set_address(usbif, stub, 0, addr);
|
|
+ }
|
|
+ ret = 0;
|
|
+ goto fail_response;
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EINVAL;
|
|
+ goto fail_response;
|
|
+ }
|
|
+ } else {
|
|
+ if (unlikely(!usbif->addr_table[devnum])) {
|
|
+ ret = -ENODEV;
|
|
+ goto fail_response;
|
|
+ }
|
|
+ pending_req->stub = usbif->addr_table[devnum];
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Check special request
|
|
+ */
|
|
+ switch (ctrl->bRequest) {
|
|
+ case USB_REQ_SET_ADDRESS:
|
|
+ /*
|
|
+ * SET_ADDRESS request to addressed device.
|
|
+ * change addr or remove from addr_table.
|
|
+ */
|
|
+ {
|
|
+ __u16 addr = le16_to_cpu(ctrl->wValue);
|
|
+ usbbk_set_address(usbif, stub, devnum, addr);
|
|
+ }
|
|
+ ret = 0;
|
|
+ goto fail_response;
|
|
+ break;
|
|
+#if 0
|
|
+ case USB_REQ_SET_CONFIGURATION:
|
|
+ /*
|
|
+ * linux 2.6.27 or later version only!
|
|
+ */
|
|
+ if (ctrl->RequestType == USB_RECIP_DEVICE) {
|
|
+ __u16 config = le16_to_cpu(ctrl->wValue);
|
|
+ usb_driver_set_configuration(pending_req->stub->udev, config);
|
|
+ done = 1;
|
|
+ }
|
|
+ break;
|
|
+#endif
|
|
+ case USB_REQ_SET_INTERFACE:
|
|
+ if (ctrl->bRequestType == USB_RECIP_INTERFACE) {
|
|
+ __u16 alt = le16_to_cpu(ctrl->wValue);
|
|
+ __u16 intf = le16_to_cpu(ctrl->wIndex);
|
|
+ usbbk_set_interface(pending_req, intf, alt);
|
|
+ done = 1;
|
|
+ }
|
|
+ break;
|
|
+ case USB_REQ_CLEAR_FEATURE:
|
|
+ if (ctrl->bRequestType == USB_RECIP_ENDPOINT
|
|
+ && ctrl->wValue == USB_ENDPOINT_HALT) {
|
|
+ int pipe;
|
|
+ int ep = le16_to_cpu(ctrl->wIndex) & 0x0f;
|
|
+ int dir = le16_to_cpu(ctrl->wIndex)
|
|
+ & USB_DIR_IN;
|
|
+ if (dir)
|
|
+ pipe = usb_rcvctrlpipe(pending_req->stub->udev, ep);
|
|
+ else
|
|
+ pipe = usb_sndctrlpipe(pending_req->stub->udev, ep);
|
|
+ usbbk_clear_halt(pending_req, pipe);
|
|
+ done = 1;
|
|
+ }
|
|
+ break;
|
|
+#if 0 /* not tested yet */
|
|
+ case USB_REQ_SET_FEATURE:
|
|
+ if (ctrl->bRequestType == USB_RT_PORT) {
|
|
+ __u16 feat = le16_to_cpu(ctrl->wValue);
|
|
+ if (feat == USB_PORT_FEAT_RESET) {
|
|
+ usbbk_port_reset(pending_req);
|
|
+ done = 1;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+#endif
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return done;
|
|
+
|
|
+fail_response:
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(usbif);
|
|
+ free_req(pending_req);
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static void dispatch_request_to_pending_reqs(usbif_t *usbif,
|
|
+ usbif_urb_request_t *req,
|
|
+ pending_req_t *pending_req)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ pending_req->id = req->id;
|
|
+ pending_req->usbif = usbif;
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ usbif_get(usbif);
|
|
+
|
|
+ /* unlink request */
|
|
+ if (unlikely(usbif_pipeunlink(req->pipe))) {
|
|
+ process_unlink_req(usbif, req, pending_req);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (usb_pipecontrol(req->pipe)) {
|
|
+ if (check_and_submit_special_ctrlreq(usbif, req, pending_req))
|
|
+ return;
|
|
+ } else {
|
|
+ int devnum = usb_pipedevice(req->pipe);
|
|
+ if (unlikely(!usbif->addr_table[devnum])) {
|
|
+ ret = -ENODEV;
|
|
+ goto fail_response;
|
|
+ }
|
|
+ pending_req->stub = usbif->addr_table[devnum];
|
|
+ }
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ ret = usbbk_alloc_urb(req, pending_req);
|
|
+ if (ret) {
|
|
+ ret = -ESHUTDOWN;
|
|
+ goto fail_response;
|
|
+ }
|
|
+
|
|
+ add_req_to_submitting_list(pending_req->stub, pending_req);
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ usbbk_init_urb(req, pending_req);
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ pending_req->nr_buffer_segs = req->nr_buffer_segs;
|
|
+ if (usb_pipeisoc(req->pipe))
|
|
+ pending_req->nr_extra_segs = req->u.isoc.nr_frame_desc_segs;
|
|
+ else
|
|
+ pending_req->nr_extra_segs = 0;
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ ret = usbbk_gnttab_map(usbif, req, pending_req);
|
|
+ if (ret) {
|
|
+ printk(KERN_ERR "usbback: invalid buffer\n");
|
|
+ ret = -ESHUTDOWN;
|
|
+ goto fail_free_urb;
|
|
+ }
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ if (usb_pipeout(req->pipe) && req->buffer_length)
|
|
+ copy_pages_to_buff(pending_req->buffer,
|
|
+ pending_req,
|
|
+ 0,
|
|
+ pending_req->nr_buffer_segs);
|
|
+ if (usb_pipeisoc(req->pipe)) {
|
|
+ copy_pages_to_buff(&pending_req->urb->iso_frame_desc[0],
|
|
+ pending_req,
|
|
+ pending_req->nr_buffer_segs,
|
|
+ pending_req->nr_extra_segs);
|
|
+ }
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ ret = usb_submit_urb(pending_req->urb, GFP_KERNEL);
|
|
+ if (ret) {
|
|
+ printk(KERN_ERR "usbback: failed submitting urb, error %d\n", ret);
|
|
+ ret = -ESHUTDOWN;
|
|
+ goto fail_flush_area;
|
|
+ }
|
|
+ return;
|
|
+
|
|
+fail_flush_area:
|
|
+ fast_flush_area(pending_req);
|
|
+fail_free_urb:
|
|
+ remove_req_from_submitting_list(pending_req->stub, pending_req);
|
|
+ barrier();
|
|
+ usbbk_free_urb(pending_req->urb);
|
|
+fail_response:
|
|
+ usbbk_do_response(pending_req, ret, 0, 0, 0);
|
|
+ usbif_put(usbif);
|
|
+ free_req(pending_req);
|
|
+}
|
|
+
|
|
+static int usbbk_start_submit_urb(usbif_t *usbif)
|
|
+{
|
|
+ usbif_urb_back_ring_t *urb_ring = &usbif->urb_ring;
|
|
+ usbif_urb_request_t *req;
|
|
+ pending_req_t *pending_req;
|
|
+ RING_IDX rc, rp;
|
|
+ int more_to_do = 0;
|
|
+
|
|
+ rc = urb_ring->req_cons;
|
|
+ rp = urb_ring->sring->req_prod;
|
|
+ rmb();
|
|
+
|
|
+ while (rc != rp) {
|
|
+ if (RING_REQUEST_CONS_OVERFLOW(urb_ring, rc)) {
|
|
+ printk(KERN_WARNING "RING_REQUEST_CONS_OVERFLOW\n");
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ pending_req = alloc_req();
|
|
+ if (NULL == pending_req) {
|
|
+ more_to_do = 1;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ req = RING_GET_REQUEST(urb_ring, rc);
|
|
+ urb_ring->req_cons = ++rc;
|
|
+
|
|
+ dispatch_request_to_pending_reqs(usbif, req,
|
|
+ pending_req);
|
|
+ }
|
|
+
|
|
+ RING_FINAL_CHECK_FOR_REQUESTS(&usbif->urb_ring, more_to_do);
|
|
+
|
|
+ cond_resched();
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+void usbbk_hotplug_notify(usbif_t *usbif, int portnum, int speed)
|
|
+{
|
|
+ usbif_conn_back_ring_t *ring = &usbif->conn_ring;
|
|
+ usbif_conn_request_t *req;
|
|
+ usbif_conn_response_t *res;
|
|
+ unsigned long flags;
|
|
+ u16 id;
|
|
+ int notify;
|
|
+
|
|
+ spin_lock_irqsave(&usbif->conn_ring_lock, flags);
|
|
+
|
|
+ req = RING_GET_REQUEST(ring, ring->req_cons);;
|
|
+ id = req->id;
|
|
+ ring->req_cons++;
|
|
+ ring->sring->req_event = ring->req_cons + 1;
|
|
+
|
|
+ res = RING_GET_RESPONSE(ring, ring->rsp_prod_pvt);
|
|
+ res->id = id;
|
|
+ res->portnum = portnum;
|
|
+ res->speed = speed;
|
|
+ ring->rsp_prod_pvt++;
|
|
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(ring, notify);
|
|
+
|
|
+ spin_unlock_irqrestore(&usbif->conn_ring_lock, flags);
|
|
+
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(usbif->irq);
|
|
+}
|
|
+
|
|
+int usbbk_schedule(void *arg)
|
|
+{
|
|
+ usbif_t *usbif = (usbif_t *) arg;
|
|
+
|
|
+ usbif_get(usbif);
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ wait_event_interruptible(
|
|
+ usbif->wq,
|
|
+ usbif->waiting_reqs || kthread_should_stop());
|
|
+ wait_event_interruptible(
|
|
+ pending_free_wq,
|
|
+ !list_empty(&pending_free) || kthread_should_stop());
|
|
+ usbif->waiting_reqs = 0;
|
|
+ smp_mb();
|
|
+
|
|
+ if (usbbk_start_submit_urb(usbif))
|
|
+ usbif->waiting_reqs = 1;
|
|
+ }
|
|
+
|
|
+ usbif->xenusbd = NULL;
|
|
+ usbif_put(usbif);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * attach usbstub device to usbif.
|
|
+ */
|
|
+void usbbk_attach_device(usbif_t *usbif, struct usbstub *stub)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&usbif->stub_lock, flags);
|
|
+ list_add(&stub->dev_list, &usbif->stub_list);
|
|
+ spin_unlock_irqrestore(&usbif->stub_lock, flags);
|
|
+ stub->usbif = usbif;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * detach usbstub device from usbif.
|
|
+ */
|
|
+void usbbk_detach_device(usbif_t *usbif, struct usbstub *stub)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (stub->addr)
|
|
+ usbbk_set_address(usbif, stub, stub->addr, 0);
|
|
+ spin_lock_irqsave(&usbif->stub_lock, flags);
|
|
+ list_del(&stub->dev_list);
|
|
+ spin_unlock_irqrestore(&usbif->stub_lock, flags);
|
|
+ stub->usbif = NULL;
|
|
+}
|
|
+
|
|
+void detach_device_without_lock(usbif_t *usbif, struct usbstub *stub)
|
|
+{
|
|
+ if (stub->addr)
|
|
+ usbbk_set_address(usbif, stub, stub->addr, 0);
|
|
+ list_del(&stub->dev_list);
|
|
+ stub->usbif = NULL;
|
|
+}
|
|
+
|
|
+static int __init usbback_init(void)
|
|
+{
|
|
+ int i, mmap_pages;
|
|
+ int err = 0;
|
|
+
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ mmap_pages = usbif_reqs * USBIF_MAX_SEGMENTS_PER_REQUEST;
|
|
+ pending_reqs = kmalloc(sizeof(pending_reqs[0]) *
|
|
+ usbif_reqs, GFP_KERNEL);
|
|
+ pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
|
|
+ mmap_pages, GFP_KERNEL);
|
|
+ pending_pages = alloc_empty_pages_and_pagevec(mmap_pages);
|
|
+
|
|
+ if (!pending_reqs || !pending_grant_handles || !pending_pages) {
|
|
+ err = -ENOMEM;
|
|
+ goto out_mem;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < mmap_pages; i++)
|
|
+ pending_grant_handles[i] = USBBACK_INVALID_HANDLE;
|
|
+
|
|
+ memset(pending_reqs, 0, sizeof(pending_reqs));
|
|
+ INIT_LIST_HEAD(&pending_free);
|
|
+
|
|
+ for (i = 0; i < usbif_reqs; i++)
|
|
+ list_add_tail(&pending_reqs[i].free_list, &pending_free);
|
|
+
|
|
+ err = usbstub_init();
|
|
+ if (err)
|
|
+ goto out_mem;
|
|
+
|
|
+ err = usbback_xenbus_init();
|
|
+ if (err)
|
|
+ goto out_xenbus;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out_xenbus:
|
|
+ usbstub_exit();
|
|
+out_mem:
|
|
+ kfree(pending_reqs);
|
|
+ kfree(pending_grant_handles);
|
|
+ free_empty_pages_and_pagevec(pending_pages, mmap_pages);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit usbback_exit(void)
|
|
+{
|
|
+ usbback_xenbus_exit();
|
|
+ usbstub_exit();
|
|
+ kfree(pending_reqs);
|
|
+ kfree(pending_grant_handles);
|
|
+ free_empty_pages_and_pagevec(pending_pages, usbif_reqs * USBIF_MAX_SEGMENTS_PER_REQUEST);
|
|
+}
|
|
+
|
|
+module_init(usbback_init);
|
|
+module_exit(usbback_exit);
|
|
+
|
|
+MODULE_AUTHOR("");
|
|
+MODULE_DESCRIPTION("Xen USB backend driver (usbback)");
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/usbback.h 2009-11-06 10:23:23.000000000 +0100
|
|
@@ -0,0 +1,173 @@
|
|
+/*
|
|
+ * usbback.h
|
|
+ *
|
|
+ * This file is part of Xen USB backend driver.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_USBBACK_H__
|
|
+#define __XEN_USBBACK_H__
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/usb.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/kref.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/interface/io/usbif.h>
|
|
+
|
|
+struct usbstub;
|
|
+
|
|
+#define USB_DEV_ADDR_SIZE 128
|
|
+
|
|
+typedef struct usbif_st {
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+ int num_ports;
|
|
+ enum usb_spec_version usb_ver;
|
|
+
|
|
+ struct xenbus_device *xbdev;
|
|
+ struct list_head usbif_list;
|
|
+
|
|
+ unsigned int irq;
|
|
+
|
|
+ usbif_urb_back_ring_t urb_ring;
|
|
+ usbif_conn_back_ring_t conn_ring;
|
|
+ struct vm_struct *urb_ring_area;
|
|
+ struct vm_struct *conn_ring_area;
|
|
+
|
|
+ spinlock_t urb_ring_lock;
|
|
+ spinlock_t conn_ring_lock;
|
|
+ atomic_t refcnt;
|
|
+
|
|
+ grant_handle_t urb_shmem_handle;
|
|
+ grant_ref_t urb_shmem_ref;
|
|
+ grant_handle_t conn_shmem_handle;
|
|
+ grant_ref_t conn_shmem_ref;
|
|
+
|
|
+ struct xenbus_watch backend_watch;
|
|
+
|
|
+ /* device address lookup table */
|
|
+ struct usbstub *addr_table[USB_DEV_ADDR_SIZE];
|
|
+ spinlock_t addr_lock;
|
|
+
|
|
+ /* connected device list */
|
|
+ struct list_head stub_list;
|
|
+ spinlock_t stub_lock;
|
|
+
|
|
+ /* request schedule */
|
|
+ struct task_struct *xenusbd;
|
|
+ unsigned int waiting_reqs;
|
|
+ wait_queue_head_t waiting_to_free;
|
|
+ wait_queue_head_t wq;
|
|
+} usbif_t;
|
|
+
|
|
+struct vusb_port_id {
|
|
+ struct list_head id_list;
|
|
+
|
|
+ char phys_bus[BUS_ID_SIZE];
|
|
+ domid_t domid;
|
|
+ unsigned int handle;
|
|
+ int portnum;
|
|
+ unsigned is_connected:1;
|
|
+};
|
|
+
|
|
+struct usbstub {
|
|
+ struct kref kref;
|
|
+ struct list_head dev_list;
|
|
+
|
|
+ struct vusb_port_id *portid;
|
|
+ struct usb_device *udev;
|
|
+ usbif_t *usbif;
|
|
+ int addr;
|
|
+
|
|
+ struct list_head submitting_list;
|
|
+ spinlock_t submitting_lock;
|
|
+};
|
|
+
|
|
+usbif_t *usbif_alloc(domid_t domid, unsigned int handle);
|
|
+void usbif_disconnect(usbif_t *usbif);
|
|
+void usbif_free(usbif_t *usbif);
|
|
+int usbif_map(usbif_t *usbif, unsigned long urb_ring_ref,
|
|
+ unsigned long conn_ring_ref, unsigned int evtchn);
|
|
+
|
|
+#define usbif_get(_b) (atomic_inc(&(_b)->refcnt))
|
|
+#define usbif_put(_b) \
|
|
+ do { \
|
|
+ if (atomic_dec_and_test(&(_b)->refcnt)) \
|
|
+ wake_up(&(_b)->waiting_to_free); \
|
|
+ } while (0)
|
|
+
|
|
+usbif_t *find_usbif(domid_t domid, unsigned int handle);
|
|
+int usbback_xenbus_init(void);
|
|
+void usbback_xenbus_exit(void);
|
|
+struct vusb_port_id *find_portid_by_busid(const char *busid);
|
|
+struct vusb_port_id *find_portid(const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum);
|
|
+int portid_add(const char *busid,
|
|
+ const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum);
|
|
+int portid_remove(const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum);
|
|
+irqreturn_t usbbk_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+int usbbk_schedule(void *arg);
|
|
+struct usbstub *find_attached_device(usbif_t *usbif, int port);
|
|
+void usbbk_attach_device(usbif_t *usbif, struct usbstub *stub);
|
|
+void usbbk_detach_device(usbif_t *usbif, struct usbstub *stub);
|
|
+void usbbk_hotplug_notify(usbif_t *usbif, int portnum, int speed);
|
|
+void detach_device_without_lock(usbif_t *usbif, struct usbstub *stub);
|
|
+void usbbk_unlink_urbs(struct usbstub *stub);
|
|
+
|
|
+int usbstub_init(void);
|
|
+void usbstub_exit(void);
|
|
+
|
|
+#endif /* __XEN_USBBACK_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/usbstub.c 2011-03-02 12:00:16.000000000 +0100
|
|
@@ -0,0 +1,324 @@
|
|
+/*
|
|
+ * usbstub.c
|
|
+ *
|
|
+ * USB stub driver - grabbing and managing USB devices.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "usbback.h"
|
|
+
|
|
+static LIST_HEAD(port_list);
|
|
+static DEFINE_SPINLOCK(port_list_lock);
|
|
+
|
|
+struct vusb_port_id *find_portid_by_busid(const char *busid)
|
|
+{
|
|
+ struct vusb_port_id *portid;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&port_list_lock, flags);
|
|
+ list_for_each_entry(portid, &port_list, id_list) {
|
|
+ if (!(strncmp(portid->phys_bus, busid, BUS_ID_SIZE))) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&port_list_lock, flags);
|
|
+
|
|
+ if (found)
|
|
+ return portid;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+struct vusb_port_id *find_portid(const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum)
|
|
+{
|
|
+ struct vusb_port_id *portid;
|
|
+ int found = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&port_list_lock, flags);
|
|
+ list_for_each_entry(portid, &port_list, id_list) {
|
|
+ if ((portid->domid == domid)
|
|
+ && (portid->handle == handle)
|
|
+ && (portid->portnum == portnum)) {
|
|
+ found = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&port_list_lock, flags);
|
|
+
|
|
+ if (found)
|
|
+ return portid;
|
|
+
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+int portid_add(const char *busid,
|
|
+ const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum)
|
|
+{
|
|
+ struct vusb_port_id *portid;
|
|
+ unsigned long flags;
|
|
+
|
|
+ portid = kzalloc(sizeof(*portid), GFP_KERNEL);
|
|
+ if (!portid)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ portid->domid = domid;
|
|
+ portid->handle = handle;
|
|
+ portid->portnum = portnum;
|
|
+
|
|
+ strncpy(portid->phys_bus, busid, BUS_ID_SIZE);
|
|
+
|
|
+ spin_lock_irqsave(&port_list_lock, flags);
|
|
+ list_add(&portid->id_list, &port_list);
|
|
+ spin_unlock_irqrestore(&port_list_lock, flags);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int portid_remove(const domid_t domid,
|
|
+ const unsigned int handle,
|
|
+ const int portnum)
|
|
+{
|
|
+ struct vusb_port_id *portid, *tmp;
|
|
+ int err = -ENOENT;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&port_list_lock, flags);
|
|
+ list_for_each_entry_safe(portid, tmp, &port_list, id_list) {
|
|
+ if (portid->domid == domid
|
|
+ && portid->handle == handle
|
|
+ && portid->portnum == portnum) {
|
|
+ list_del(&portid->id_list);
|
|
+ kfree(portid);
|
|
+
|
|
+ err = 0;
|
|
+ }
|
|
+ }
|
|
+ spin_unlock_irqrestore(&port_list_lock, flags);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static struct usbstub *usbstub_alloc(struct usb_device *udev,
|
|
+ struct vusb_port_id *portid)
|
|
+{
|
|
+ struct usbstub *stub;
|
|
+
|
|
+ stub = kzalloc(sizeof(*stub), GFP_KERNEL);
|
|
+ if (!stub) {
|
|
+ printk(KERN_ERR "no memory for alloc usbstub\n");
|
|
+ return NULL;
|
|
+ }
|
|
+ kref_init(&stub->kref);
|
|
+ stub->udev = usb_get_dev(udev);
|
|
+ stub->portid = portid;
|
|
+ spin_lock_init(&stub->submitting_lock);
|
|
+ INIT_LIST_HEAD(&stub->submitting_list);
|
|
+
|
|
+ return stub;
|
|
+}
|
|
+
|
|
+static void usbstub_release(struct kref *kref)
|
|
+{
|
|
+ struct usbstub *stub;
|
|
+
|
|
+ stub = container_of(kref, struct usbstub, kref);
|
|
+
|
|
+ usb_put_dev(stub->udev);
|
|
+ stub->udev = NULL;
|
|
+ stub->portid = NULL;
|
|
+ kfree(stub);
|
|
+}
|
|
+
|
|
+static inline void usbstub_get(struct usbstub *stub)
|
|
+{
|
|
+ kref_get(&stub->kref);
|
|
+}
|
|
+
|
|
+static inline void usbstub_put(struct usbstub *stub)
|
|
+{
|
|
+ kref_put(&stub->kref, usbstub_release);
|
|
+}
|
|
+
|
|
+static int usbstub_probe(struct usb_interface *intf,
|
|
+ const struct usb_device_id *id)
|
|
+{
|
|
+ struct usb_device *udev = interface_to_usbdev(intf);
|
|
+ char *busid = intf->dev.parent->bus_id;
|
|
+ struct vusb_port_id *portid = NULL;
|
|
+ struct usbstub *stub = NULL;
|
|
+ usbif_t *usbif = NULL;
|
|
+ int retval = -ENODEV;
|
|
+
|
|
+ /* hub currently not supported, so skip. */
|
|
+ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB)
|
|
+ goto out;
|
|
+
|
|
+ portid = find_portid_by_busid(busid);
|
|
+ if (!portid)
|
|
+ goto out;
|
|
+
|
|
+ usbif = find_usbif(portid->domid, portid->handle);
|
|
+ if (!usbif)
|
|
+ goto out;
|
|
+
|
|
+ switch (udev->speed) {
|
|
+ case USB_SPEED_LOW:
|
|
+ case USB_SPEED_FULL:
|
|
+ break;
|
|
+ case USB_SPEED_HIGH:
|
|
+ if (usbif->usb_ver >= USB_VER_USB20)
|
|
+ break;
|
|
+ /* fall through */
|
|
+ default:
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ stub = find_attached_device(usbif, portid->portnum);
|
|
+ if (!stub) {
|
|
+ /* new connection */
|
|
+ stub = usbstub_alloc(udev, portid);
|
|
+ if (!stub)
|
|
+ return -ENOMEM;
|
|
+ usbbk_attach_device(usbif, stub);
|
|
+ usbbk_hotplug_notify(usbif, portid->portnum, udev->speed);
|
|
+ } else {
|
|
+ /* maybe already called and connected by other intf */
|
|
+ if (strncmp(stub->portid->phys_bus, busid, BUS_ID_SIZE))
|
|
+ goto out; /* invalid call */
|
|
+ }
|
|
+
|
|
+ usbstub_get(stub);
|
|
+ usb_set_intfdata(intf, stub);
|
|
+ retval = 0;
|
|
+
|
|
+out:
|
|
+ return retval;
|
|
+}
|
|
+
|
|
+static void usbstub_disconnect(struct usb_interface *intf)
|
|
+{
|
|
+ struct usbstub *stub
|
|
+ = (struct usbstub *) usb_get_intfdata(intf);
|
|
+
|
|
+ usb_set_intfdata(intf, NULL);
|
|
+
|
|
+ if (!stub)
|
|
+ return;
|
|
+
|
|
+ if (stub->usbif) {
|
|
+ usbbk_hotplug_notify(stub->usbif, stub->portid->portnum, 0);
|
|
+ usbbk_detach_device(stub->usbif, stub);
|
|
+ }
|
|
+ usbbk_unlink_urbs(stub);
|
|
+ usbstub_put(stub);
|
|
+}
|
|
+
|
|
+static ssize_t usbstub_show_portids(struct device_driver *driver,
|
|
+ char *buf)
|
|
+{
|
|
+ struct vusb_port_id *portid;
|
|
+ size_t count = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&port_list_lock, flags);
|
|
+ list_for_each_entry(portid, &port_list, id_list) {
|
|
+ if (count >= PAGE_SIZE)
|
|
+ break;
|
|
+ count += scnprintf((char *)buf + count, PAGE_SIZE - count,
|
|
+ "%s:%d:%d:%d\n",
|
|
+ &portid->phys_bus[0],
|
|
+ portid->domid,
|
|
+ portid->handle,
|
|
+ portid->portnum);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&port_list_lock, flags);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+static DRIVER_ATTR(port_ids, S_IRUSR, usbstub_show_portids, NULL);
|
|
+
|
|
+/* table of devices that matches any usbdevice */
|
|
+static struct usb_device_id usbstub_table[] = {
|
|
+ { .driver_info = 1 }, /* wildcard, see usb_match_id() */
|
|
+ { } /* Terminating entry */
|
|
+};
|
|
+MODULE_DEVICE_TABLE(usb, usbstub_table);
|
|
+
|
|
+static struct usb_driver usbback_usb_driver = {
|
|
+ .name = "usbback",
|
|
+ .probe = usbstub_probe,
|
|
+ .disconnect = usbstub_disconnect,
|
|
+ .id_table = usbstub_table,
|
|
+ .no_dynamic_id = 1,
|
|
+};
|
|
+
|
|
+int __init usbstub_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ err = usb_register(&usbback_usb_driver);
|
|
+ if (err < 0) {
|
|
+ printk(KERN_ERR "usbback: usb_register failed (error %d)\n", err);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ err = driver_create_file(&usbback_usb_driver.driver,
|
|
+ &driver_attr_port_ids);
|
|
+ if (err)
|
|
+ usb_deregister(&usbback_usb_driver);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+void usbstub_exit(void)
|
|
+{
|
|
+ driver_remove_file(&usbback_usb_driver.driver,
|
|
+ &driver_attr_port_ids);
|
|
+ usb_deregister(&usbback_usb_driver);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbback/xenbus.c 2009-11-06 10:23:23.000000000 +0100
|
|
@@ -0,0 +1,338 @@
|
|
+/*
|
|
+ * xenbus.c
|
|
+ *
|
|
+ * Xenbus interface for USB backend driver.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "usbback.h"
|
|
+
|
|
+static int start_xenusbd(usbif_t *usbif)
|
|
+{
|
|
+ int err = 0;
|
|
+ char name[TASK_COMM_LEN];
|
|
+
|
|
+ snprintf(name, TASK_COMM_LEN, "usbback.%d.%d", usbif->domid,
|
|
+ usbif->handle);
|
|
+ usbif->xenusbd = kthread_run(usbbk_schedule, usbif, name);
|
|
+ if (IS_ERR(usbif->xenusbd)) {
|
|
+ err = PTR_ERR(usbif->xenusbd);
|
|
+ usbif->xenusbd = NULL;
|
|
+ xenbus_dev_error(usbif->xbdev, err, "start xenusbd");
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void backend_changed(struct xenbus_watch *watch,
|
|
+ const char **vec, unsigned int len)
|
|
+{
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+ int i;
|
|
+ char node[8];
|
|
+ char *busid;
|
|
+ struct vusb_port_id *portid = NULL;
|
|
+
|
|
+ usbif_t *usbif = container_of(watch, usbif_t, backend_watch);
|
|
+ struct xenbus_device *dev = usbif->xbdev;
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for (i = 1; i <= usbif->num_ports; i++) {
|
|
+ sprintf(node, "port/%d", i);
|
|
+ busid = xenbus_read(xbt, dev->nodename, node, NULL);
|
|
+ if (IS_ERR(busid)) {
|
|
+ err = PTR_ERR(busid);
|
|
+ xenbus_dev_fatal(dev, err, "reading port/%d", i);
|
|
+ goto abort;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * remove portid, if the port is not connected,
|
|
+ */
|
|
+ if (strlen(busid) == 0) {
|
|
+ portid = find_portid(usbif->domid, usbif->handle, i);
|
|
+ if (portid) {
|
|
+ if (portid->is_connected)
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "can't remove port/%d, unbind first", i);
|
|
+ else
|
|
+ portid_remove(usbif->domid, usbif->handle, i);
|
|
+ }
|
|
+ continue; /* never configured, ignore */
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * add portid,
|
|
+ * if the port is not configured and not used from other usbif.
|
|
+ */
|
|
+ portid = find_portid(usbif->domid, usbif->handle, i);
|
|
+ if (portid) {
|
|
+ if ((strncmp(portid->phys_bus, busid, BUS_ID_SIZE)))
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "can't add port/%d, remove first", i);
|
|
+ else
|
|
+ continue; /* already configured, ignore */
|
|
+ } else {
|
|
+ if (find_portid_by_busid(busid))
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "can't add port/%d, busid already used", i);
|
|
+ else
|
|
+ portid_add(busid, usbif->domid, usbif->handle, i);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ if (err)
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+
|
|
+ return;
|
|
+
|
|
+abort:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+static int usbback_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ usbif_t *usbif = dev->dev.driver_data;
|
|
+ int i;
|
|
+
|
|
+ if (usbif->backend_watch.node) {
|
|
+ unregister_xenbus_watch(&usbif->backend_watch);
|
|
+ kfree(usbif->backend_watch.node);
|
|
+ usbif->backend_watch.node = NULL;
|
|
+ }
|
|
+
|
|
+ if (usbif) {
|
|
+ /* remove all ports */
|
|
+ for (i = 1; i <= usbif->num_ports; i++)
|
|
+ portid_remove(usbif->domid, usbif->handle, i);
|
|
+ usbif_disconnect(usbif);
|
|
+ usbif_free(usbif);;
|
|
+ }
|
|
+ dev->dev.driver_data = NULL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int usbback_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ usbif_t *usbif;
|
|
+ unsigned int handle;
|
|
+ int num_ports;
|
|
+ int usb_ver;
|
|
+ int err;
|
|
+
|
|
+ if (usb_disabled())
|
|
+ return -ENODEV;
|
|
+
|
|
+ handle = simple_strtoul(strrchr(dev->otherend, '/') + 1, NULL, 0);
|
|
+ usbif = usbif_alloc(dev->otherend_id, handle);
|
|
+ if (!usbif) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating backend interface");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ usbif->xbdev = dev;
|
|
+ dev->dev.driver_data = usbif;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
|
|
+ "num-ports", "%d", &num_ports);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading num-ports");
|
|
+ goto fail;
|
|
+ }
|
|
+ if (num_ports < 1 || num_ports > USB_MAXCHILDREN) {
|
|
+ xenbus_dev_fatal(dev, err, "invalid num-ports");
|
|
+ goto fail;
|
|
+ }
|
|
+ usbif->num_ports = num_ports;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->nodename,
|
|
+ "usb-ver", "%d", &usb_ver);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading usb-ver");
|
|
+ goto fail;
|
|
+ }
|
|
+ switch (usb_ver) {
|
|
+ case USB_VER_USB11:
|
|
+ case USB_VER_USB20:
|
|
+ usbif->usb_ver = usb_ver;
|
|
+ break;
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, err, "invalid usb-ver");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ err = xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ if (err)
|
|
+ goto fail;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ usbback_remove(dev);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int connect_rings(usbif_t *usbif)
|
|
+{
|
|
+ struct xenbus_device *dev = usbif->xbdev;
|
|
+ unsigned long urb_ring_ref;
|
|
+ unsigned long conn_ring_ref;
|
|
+ unsigned int evtchn;
|
|
+ int err;
|
|
+
|
|
+ err = xenbus_gather(XBT_NIL, dev->otherend,
|
|
+ "urb-ring-ref", "%lu", &urb_ring_ref,
|
|
+ "conn-ring-ref", "%lu", &conn_ring_ref,
|
|
+ "event-channel", "%u", &evtchn, NULL);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "reading %s/ring-ref and event-channel",
|
|
+ dev->otherend);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ printk("usbback: urb-ring-ref %ld, conn-ring-ref %ld, event-channel %d\n",
|
|
+ urb_ring_ref, conn_ring_ref, evtchn);
|
|
+
|
|
+ err = usbif_map(usbif, urb_ring_ref, conn_ring_ref, evtchn);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "mapping urb-ring-ref %lu conn-ring-ref %lu port %u",
|
|
+ urb_ring_ref, conn_ring_ref, evtchn);
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void frontend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state frontend_state)
|
|
+{
|
|
+ usbif_t *usbif = dev->dev.driver_data;
|
|
+ int err;
|
|
+
|
|
+ switch (frontend_state) {
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitialising:
|
|
+ if (dev->state == XenbusStateClosed) {
|
|
+ printk("%s: %s: prepare for reconnect\n",
|
|
+ __FUNCTION__, dev->nodename);
|
|
+ xenbus_switch_state(dev, XenbusStateInitWait);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ case XenbusStateConnected:
|
|
+ if (dev->state == XenbusStateConnected)
|
|
+ break;
|
|
+ err = connect_rings(usbif);
|
|
+ if (err)
|
|
+ break;
|
|
+ err = start_xenusbd(usbif);
|
|
+ if (err)
|
|
+ break;
|
|
+ err = xenbus_watch_path2(dev, dev->nodename, "port",
|
|
+ &usbif->backend_watch, backend_changed);
|
|
+ if (err)
|
|
+ break;
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ usbif_disconnect(usbif);
|
|
+ xenbus_switch_state(dev, XenbusStateClosing);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosed:
|
|
+ xenbus_switch_state(dev, XenbusStateClosed);
|
|
+ if (xenbus_dev_is_online(dev))
|
|
+ break;
|
|
+ /* fall through if not online */
|
|
+ case XenbusStateUnknown:
|
|
+ device_unregister(&dev->dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ frontend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id usbback_ids[] = {
|
|
+ { "vusb" },
|
|
+ { "" },
|
|
+};
|
|
+
|
|
+static struct xenbus_driver usbback_driver = {
|
|
+ .name = "vusb",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = usbback_ids,
|
|
+ .probe = usbback_probe,
|
|
+ .otherend_changed = frontend_changed,
|
|
+ .remove = usbback_remove,
|
|
+};
|
|
+
|
|
+int __init usbback_xenbus_init(void)
|
|
+{
|
|
+ return xenbus_register_backend(&usbback_driver);
|
|
+}
|
|
+
|
|
+void __exit usbback_xenbus_exit(void)
|
|
+{
|
|
+ xenbus_unregister_driver(&usbback_driver);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/Makefile 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,11 @@
|
|
+obj-$(CONFIG_XEN_USB_FRONTEND) := xen-hcd.o
|
|
+
|
|
+xen-hcd-y := usbfront-hcd.o xenbus.o
|
|
+
|
|
+ifeq ($(CONFIG_XEN_USB_FRONTEND_HCD_STATS),y)
|
|
+EXTRA_CFLAGS += -DXENHCD_STATS
|
|
+endif
|
|
+
|
|
+ifeq ($(CONFIG_XEN_USB_FRONTEND_HCD_PM),y)
|
|
+EXTRA_CFLAGS += -DXENHCD_PM
|
|
+endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/usbfront-dbg.c 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,100 @@
|
|
+/*
|
|
+ * usbfront-dbg.c
|
|
+ *
|
|
+ * Xen USB Virtual Host Controller - debugging
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+static ssize_t show_statistics(struct class_device *class_dev, char *buf)
|
|
+{
|
|
+ struct usb_bus *bus;
|
|
+ struct usb_hcd *hcd;
|
|
+ struct usbfront_info *info;
|
|
+ unsigned long flags;
|
|
+ unsigned temp, size;
|
|
+ char *next;
|
|
+
|
|
+ bus = class_get_devdata(class_dev);
|
|
+ hcd = bus->hcpriv;
|
|
+ info = hcd_to_info(hcd);
|
|
+ next = buf;
|
|
+ size = PAGE_SIZE;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ temp = scnprintf(next, size,
|
|
+ "bus %s, device %s\n"
|
|
+ "%s\n"
|
|
+ "xenhcd, hcd state %d\n",
|
|
+ hcd->self.controller->bus->name,
|
|
+ hcd->self.controller->bus_id,
|
|
+ hcd->product_desc,
|
|
+ hcd->state);
|
|
+ size -= temp;
|
|
+ next += temp;
|
|
+
|
|
+#ifdef XENHCD_STATS
|
|
+ temp = scnprintf(next, size,
|
|
+ "complete %ld unlink %ld ring_full %ld\n",
|
|
+ info->stats.complete, info->stats.unlink,
|
|
+ info->stats.ring_full);
|
|
+ size -= temp;
|
|
+ next += temp;
|
|
+#endif
|
|
+
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+
|
|
+ return PAGE_SIZE - size;
|
|
+}
|
|
+
|
|
+static CLASS_DEVICE_ATTR(statistics, S_IRUGO, show_statistics, NULL);
|
|
+
|
|
+static inline void create_debug_file(struct usbfront_info *info)
|
|
+{
|
|
+ struct class_device *cldev = info_to_hcd(info)->self.class_dev;
|
|
+ class_device_create_file(cldev, &class_device_attr_statistics);
|
|
+}
|
|
+
|
|
+static inline void remove_debug_file(struct usbfront_info *info)
|
|
+{
|
|
+ struct class_device *cldev = info_to_hcd(info)->self.class_dev;
|
|
+ class_device_remove_file(cldev, &class_device_attr_statistics);
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/usbfront-hcd.c 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,231 @@
|
|
+/*
|
|
+ * usbfront-hcd.c
|
|
+ *
|
|
+ * Xen USB Virtual Host Controller driver
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "usbfront.h"
|
|
+#include "usbfront-dbg.c"
|
|
+#include "usbfront-hub.c"
|
|
+#include "usbfront-q.c"
|
|
+
|
|
+static void xenhcd_watchdog(unsigned long param)
|
|
+{
|
|
+ struct usbfront_info *info = (struct usbfront_info *) param;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+ if (likely(HC_IS_RUNNING(info_to_hcd(info)->state))) {
|
|
+ timer_action_done(info, TIMER_RING_WATCHDOG);
|
|
+ xenhcd_giveback_unlinked_urbs(info);
|
|
+ xenhcd_kick_pending_urbs(info);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * one-time HC init
|
|
+ */
|
|
+static int xenhcd_setup(struct usb_hcd *hcd)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+
|
|
+ spin_lock_init(&info->lock);
|
|
+ INIT_LIST_HEAD(&info->pending_submit_list);
|
|
+ INIT_LIST_HEAD(&info->pending_unlink_list);
|
|
+ INIT_LIST_HEAD(&info->in_progress_list);
|
|
+ INIT_LIST_HEAD(&info->giveback_waiting_list);
|
|
+ init_timer(&info->watchdog);
|
|
+ info->watchdog.function = xenhcd_watchdog;
|
|
+ info->watchdog.data = (unsigned long) info;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * start HC running
|
|
+ */
|
|
+static int xenhcd_run(struct usb_hcd *hcd)
|
|
+{
|
|
+ hcd->uses_new_polling = 1;
|
|
+ hcd->poll_rh = 0;
|
|
+ hcd->state = HC_STATE_RUNNING;
|
|
+ create_debug_file(hcd_to_info(hcd));
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * stop running HC
|
|
+ */
|
|
+static void xenhcd_stop(struct usb_hcd *hcd)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+
|
|
+ del_timer_sync(&info->watchdog);
|
|
+ remove_debug_file(info);
|
|
+ spin_lock_irq(&info->lock);
|
|
+ /* cancel all urbs */
|
|
+ hcd->state = HC_STATE_HALT;
|
|
+ xenhcd_cancel_all_enqueued_urbs(info);
|
|
+ xenhcd_giveback_unlinked_urbs(info);
|
|
+ spin_unlock_irq(&info->lock);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called as .urb_enqueue()
|
|
+ * non-error returns are promise to giveback the urb later
|
|
+ */
|
|
+static int xenhcd_urb_enqueue(struct usb_hcd *hcd,
|
|
+ struct usb_host_endpoint *ep,
|
|
+ struct urb *urb,
|
|
+ gfp_t mem_flags)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+ struct urb_priv *urbp;
|
|
+ unsigned long flags;
|
|
+ int ret = 0;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ urbp = alloc_urb_priv(urb);
|
|
+ if (!urbp) {
|
|
+ ret = -ENOMEM;
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ ret = xenhcd_submit_urb(info, urbp);
|
|
+ if (ret != 0)
|
|
+ free_urb_priv(urbp);
|
|
+
|
|
+done:
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called as .urb_dequeue()
|
|
+ */
|
|
+static int xenhcd_urb_dequeue(struct usb_hcd *hcd,
|
|
+ struct urb *urb)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+ struct urb_priv *urbp;
|
|
+ unsigned long flags;
|
|
+ int ret = 0;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ urbp = urb->hcpriv;
|
|
+ if (!urbp)
|
|
+ goto done;
|
|
+
|
|
+ ret = xenhcd_unlink_urb(info, urbp);
|
|
+
|
|
+done:
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * called from usb_get_current_frame_number(),
|
|
+ * but, almost all drivers not use such function.
|
|
+ */
|
|
+static int xenhcd_get_frame(struct usb_hcd *hcd)
|
|
+{
|
|
+ /* it means error, but probably no problem :-) */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const char hcd_name[] = "xen_hcd";
|
|
+
|
|
+struct hc_driver xen_usb20_hc_driver = {
|
|
+ .description = hcd_name,
|
|
+ .product_desc = "Xen USB2.0 Virtual Host Controller",
|
|
+ .hcd_priv_size = sizeof(struct usbfront_info),
|
|
+ .flags = HCD_USB2,
|
|
+
|
|
+ /* basic HC lifecycle operations */
|
|
+ .reset = xenhcd_setup,
|
|
+ .start = xenhcd_run,
|
|
+ .stop = xenhcd_stop,
|
|
+
|
|
+ /* managing urb I/O */
|
|
+ .urb_enqueue = xenhcd_urb_enqueue,
|
|
+ .urb_dequeue = xenhcd_urb_dequeue,
|
|
+ .get_frame_number = xenhcd_get_frame,
|
|
+
|
|
+ /* root hub operations */
|
|
+ .hub_status_data = xenhcd_hub_status_data,
|
|
+ .hub_control = xenhcd_hub_control,
|
|
+#ifdef XENHCD_PM
|
|
+#ifdef CONFIG_PM
|
|
+ .bus_suspend = xenhcd_bus_suspend,
|
|
+ .bus_resume = xenhcd_bus_resume,
|
|
+#endif
|
|
+#endif
|
|
+};
|
|
+
|
|
+struct hc_driver xen_usb11_hc_driver = {
|
|
+ .description = hcd_name,
|
|
+ .product_desc = "Xen USB1.1 Virtual Host Controller",
|
|
+ .hcd_priv_size = sizeof(struct usbfront_info),
|
|
+ .flags = HCD_USB11,
|
|
+
|
|
+ /* basic HC lifecycle operations */
|
|
+ .reset = xenhcd_setup,
|
|
+ .start = xenhcd_run,
|
|
+ .stop = xenhcd_stop,
|
|
+
|
|
+ /* managing urb I/O */
|
|
+ .urb_enqueue = xenhcd_urb_enqueue,
|
|
+ .urb_dequeue = xenhcd_urb_dequeue,
|
|
+ .get_frame_number = xenhcd_get_frame,
|
|
+
|
|
+ /* root hub operations */
|
|
+ .hub_status_data = xenhcd_hub_status_data,
|
|
+ .hub_control = xenhcd_hub_control,
|
|
+#ifdef XENHCD_PM
|
|
+#ifdef CONFIG_PM
|
|
+ .bus_suspend = xenhcd_bus_suspend,
|
|
+ .bus_resume = xenhcd_bus_resume,
|
|
+#endif
|
|
+#endif
|
|
+};
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/usbfront-hub.c 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,471 @@
|
|
+/*
|
|
+ * usbfront-hub.c
|
|
+ *
|
|
+ * Xen USB Virtual Host Controller - Root Hub Emulations
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * set virtual port connection status
|
|
+ */
|
|
+void set_connect_state(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ if (info->ports[port].status & USB_PORT_STAT_POWER) {
|
|
+ switch (info->devices[port].speed) {
|
|
+ case USB_SPEED_UNKNOWN:
|
|
+ info->ports[port].status &=
|
|
+ ~(USB_PORT_STAT_CONNECTION |
|
|
+ USB_PORT_STAT_ENABLE |
|
|
+ USB_PORT_STAT_LOW_SPEED |
|
|
+ USB_PORT_STAT_HIGH_SPEED |
|
|
+ USB_PORT_STAT_SUSPEND);
|
|
+ break;
|
|
+ case USB_SPEED_LOW:
|
|
+ info->ports[port].status |= USB_PORT_STAT_CONNECTION;
|
|
+ info->ports[port].status |= USB_PORT_STAT_LOW_SPEED;
|
|
+ break;
|
|
+ case USB_SPEED_FULL:
|
|
+ info->ports[port].status |= USB_PORT_STAT_CONNECTION;
|
|
+ break;
|
|
+ case USB_SPEED_HIGH:
|
|
+ info->ports[port].status |= USB_PORT_STAT_CONNECTION;
|
|
+ info->ports[port].status |= USB_PORT_STAT_HIGH_SPEED;
|
|
+ break;
|
|
+ default: /* error */
|
|
+ return;
|
|
+ }
|
|
+ info->ports[port].status |= (USB_PORT_STAT_C_CONNECTION << 16);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * set virtual device connection status
|
|
+ */
|
|
+void rhport_connect(struct usbfront_info *info,
|
|
+ int portnum, enum usb_device_speed speed)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ if (portnum < 1 || portnum > info->rh_numports)
|
|
+ return; /* invalid port number */
|
|
+
|
|
+ port = portnum - 1;
|
|
+ if (info->devices[port].speed != speed) {
|
|
+ switch (speed) {
|
|
+ case USB_SPEED_UNKNOWN: /* disconnect */
|
|
+ info->devices[port].status = USB_STATE_NOTATTACHED;
|
|
+ break;
|
|
+ case USB_SPEED_LOW:
|
|
+ case USB_SPEED_FULL:
|
|
+ case USB_SPEED_HIGH:
|
|
+ info->devices[port].status = USB_STATE_ATTACHED;
|
|
+ break;
|
|
+ default: /* error */
|
|
+ return;
|
|
+ }
|
|
+ info->devices[port].speed = speed;
|
|
+ info->ports[port].c_connection = 1;
|
|
+
|
|
+ set_connect_state(info, portnum);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * SetPortFeature(PORT_SUSPENDED)
|
|
+ */
|
|
+void rhport_suspend(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ info->ports[port].status |= USB_PORT_STAT_SUSPEND;
|
|
+ info->devices[port].status = USB_STATE_SUSPENDED;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * ClearPortFeature(PORT_SUSPENDED)
|
|
+ */
|
|
+void rhport_resume(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ if (info->ports[port].status & USB_PORT_STAT_SUSPEND) {
|
|
+ info->ports[port].resuming = 1;
|
|
+ info->ports[port].timeout = jiffies + msecs_to_jiffies(20);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * SetPortFeature(PORT_POWER)
|
|
+ */
|
|
+void rhport_power_on(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ if ((info->ports[port].status & USB_PORT_STAT_POWER) == 0) {
|
|
+ info->ports[port].status |= USB_PORT_STAT_POWER;
|
|
+ if (info->devices[port].status != USB_STATE_NOTATTACHED)
|
|
+ info->devices[port].status = USB_STATE_POWERED;
|
|
+ if (info->ports[port].c_connection)
|
|
+ set_connect_state(info, portnum);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * ClearPortFeature(PORT_POWER)
|
|
+ * SetConfiguration(non-zero)
|
|
+ * Power_Source_Off
|
|
+ * Over-current
|
|
+ */
|
|
+void rhport_power_off(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ if (info->ports[port].status & USB_PORT_STAT_POWER) {
|
|
+ info->ports[port].status = 0;
|
|
+ if (info->devices[port].status != USB_STATE_NOTATTACHED)
|
|
+ info->devices[port].status = USB_STATE_ATTACHED;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * ClearPortFeature(PORT_ENABLE)
|
|
+ */
|
|
+void rhport_disable(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ info->ports[port].status &= ~USB_PORT_STAT_ENABLE;
|
|
+ info->ports[port].status &= ~USB_PORT_STAT_SUSPEND;
|
|
+ info->ports[port].resuming = 0;
|
|
+ if (info->devices[port].status != USB_STATE_NOTATTACHED)
|
|
+ info->devices[port].status = USB_STATE_POWERED;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * SetPortFeature(PORT_RESET)
|
|
+ */
|
|
+void rhport_reset(struct usbfront_info *info, int portnum)
|
|
+{
|
|
+ int port;
|
|
+
|
|
+ port = portnum - 1;
|
|
+ info->ports[port].status &= ~(USB_PORT_STAT_ENABLE
|
|
+ | USB_PORT_STAT_LOW_SPEED
|
|
+ | USB_PORT_STAT_HIGH_SPEED);
|
|
+ info->ports[port].status |= USB_PORT_STAT_RESET;
|
|
+
|
|
+ if (info->devices[port].status != USB_STATE_NOTATTACHED)
|
|
+ info->devices[port].status = USB_STATE_ATTACHED;
|
|
+
|
|
+ /* 10msec reset signaling */
|
|
+ info->ports[port].timeout = jiffies + msecs_to_jiffies(10);
|
|
+}
|
|
+
|
|
+#ifdef XENHCD_PM
|
|
+#ifdef CONFIG_PM
|
|
+static int xenhcd_bus_suspend(struct usb_hcd *hcd)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+ int ret = 0;
|
|
+ int i, ports;
|
|
+
|
|
+ ports = info->rh_numports;
|
|
+
|
|
+ spin_lock_irq(&info->lock);
|
|
+ if (!test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))
|
|
+ ret = -ESHUTDOWN;
|
|
+ else {
|
|
+ /* suspend any active ports*/
|
|
+ for (i = 1; i <= ports; i++)
|
|
+ rhport_suspend(info, i);
|
|
+ }
|
|
+ spin_unlock_irq(&info->lock);
|
|
+
|
|
+ del_timer_sync(&info->watchdog);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenhcd_bus_resume(struct usb_hcd *hcd)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+ int ret = 0;
|
|
+ int i, ports;
|
|
+
|
|
+ ports = info->rh_numports;
|
|
+
|
|
+ spin_lock_irq(&info->lock);
|
|
+ if (!test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))
|
|
+ ret = -ESHUTDOWN;
|
|
+ else {
|
|
+ /* resume any suspended ports*/
|
|
+ for (i = 1; i <= ports; i++)
|
|
+ rhport_resume(info, i);
|
|
+ }
|
|
+ spin_unlock_irq(&info->lock);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+static void xenhcd_hub_descriptor(struct usbfront_info *info,
|
|
+ struct usb_hub_descriptor *desc)
|
|
+{
|
|
+ u16 temp;
|
|
+ int ports = info->rh_numports;
|
|
+
|
|
+ desc->bDescriptorType = 0x29;
|
|
+ desc->bPwrOn2PwrGood = 10; /* EHCI says 20ms max */
|
|
+ desc->bHubContrCurrent = 0;
|
|
+ desc->bNbrPorts = ports;
|
|
+
|
|
+ /* size of DeviceRemovable and PortPwrCtrlMask fields*/
|
|
+ temp = 1 + (ports / 8);
|
|
+ desc->bDescLength = 7 + 2 * temp;
|
|
+
|
|
+ /* bitmaps for DeviceRemovable and PortPwrCtrlMask */
|
|
+ memset(&desc->bitmap[0], 0, temp);
|
|
+ memset(&desc->bitmap[temp], 0xff, temp);
|
|
+
|
|
+ /* per-port over current reporting and no power switching */
|
|
+ temp = 0x000a;
|
|
+ desc->wHubCharacteristics = cpu_to_le16(temp);
|
|
+}
|
|
+
|
|
+/* port status change mask for hub_status_data */
|
|
+#define PORT_C_MASK \
|
|
+ ((USB_PORT_STAT_C_CONNECTION \
|
|
+ | USB_PORT_STAT_C_ENABLE \
|
|
+ | USB_PORT_STAT_C_SUSPEND \
|
|
+ | USB_PORT_STAT_C_OVERCURRENT \
|
|
+ | USB_PORT_STAT_C_RESET) << 16)
|
|
+
|
|
+/*
|
|
+ * See USB 2.0 Spec, 11.12.4 Hub and Port Status Change Bitmap.
|
|
+ * If port status changed, writes the bitmap to buf and return
|
|
+ * that length(number of bytes).
|
|
+ * If Nothing changed, return 0.
|
|
+ */
|
|
+static int xenhcd_hub_status_data(struct usb_hcd *hcd, char *buf)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+
|
|
+ int ports;
|
|
+ int i;
|
|
+ int length;
|
|
+
|
|
+ unsigned long flags;
|
|
+ int ret = 0;
|
|
+
|
|
+ int changed = 0;
|
|
+
|
|
+ if (!HC_IS_RUNNING(hcd->state))
|
|
+ return 0;
|
|
+
|
|
+ /* initialize the status to no-changes */
|
|
+ ports = info->rh_numports;
|
|
+ length = 1 + (ports / 8);
|
|
+ for (i = 0; i < length; i++) {
|
|
+ buf[i] = 0;
|
|
+ ret++;
|
|
+ }
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ for (i = 0; i < ports; i++) {
|
|
+ /* check status for each port */
|
|
+ if (info->ports[i].status & PORT_C_MASK) {
|
|
+ if (i < 7)
|
|
+ buf[0] |= 1 << (i + 1);
|
|
+ else if (i < 15)
|
|
+ buf[1] |= 1 << (i - 7);
|
|
+ else if (i < 23)
|
|
+ buf[2] |= 1 << (i - 15);
|
|
+ else
|
|
+ buf[3] |= 1 << (i - 23);
|
|
+ changed = 1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (!changed)
|
|
+ ret = 0;
|
|
+
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenhcd_hub_control(struct usb_hcd *hcd,
|
|
+ u16 typeReq,
|
|
+ u16 wValue,
|
|
+ u16 wIndex,
|
|
+ char *buf,
|
|
+ u16 wLength)
|
|
+{
|
|
+ struct usbfront_info *info = hcd_to_info(hcd);
|
|
+ int ports = info->rh_numports;
|
|
+ unsigned long flags;
|
|
+ int ret = 0;
|
|
+ int i;
|
|
+ int changed = 0;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+ switch (typeReq) {
|
|
+ case ClearHubFeature:
|
|
+ /* ignore this request */
|
|
+ break;
|
|
+ case ClearPortFeature:
|
|
+ if (!wIndex || wIndex > ports)
|
|
+ goto error;
|
|
+
|
|
+ switch (wValue) {
|
|
+ case USB_PORT_FEAT_SUSPEND:
|
|
+ rhport_resume(info, wIndex);
|
|
+ break;
|
|
+ case USB_PORT_FEAT_POWER:
|
|
+ rhport_power_off(info, wIndex);
|
|
+ break;
|
|
+ case USB_PORT_FEAT_ENABLE:
|
|
+ rhport_disable(info, wIndex);
|
|
+ break;
|
|
+ case USB_PORT_FEAT_C_CONNECTION:
|
|
+ info->ports[wIndex-1].c_connection = 0;
|
|
+ /* falling through */
|
|
+ default:
|
|
+ info->ports[wIndex-1].status &= ~(1 << wValue);
|
|
+ break;
|
|
+ }
|
|
+ break;
|
|
+ case GetHubDescriptor:
|
|
+ xenhcd_hub_descriptor(info,
|
|
+ (struct usb_hub_descriptor *) buf);
|
|
+ break;
|
|
+ case GetHubStatus:
|
|
+ /* always local power supply good and no over-current exists. */
|
|
+ *(__le32 *)buf = cpu_to_le32(0);
|
|
+ break;
|
|
+ case GetPortStatus:
|
|
+ if (!wIndex || wIndex > ports)
|
|
+ goto error;
|
|
+
|
|
+ wIndex--;
|
|
+
|
|
+ /* resume completion */
|
|
+ if (info->ports[wIndex].resuming &&
|
|
+ time_after_eq(jiffies, info->ports[wIndex].timeout)) {
|
|
+ info->ports[wIndex].status |= (USB_PORT_STAT_C_SUSPEND << 16);
|
|
+ info->ports[wIndex].status &= ~USB_PORT_STAT_SUSPEND;
|
|
+ }
|
|
+
|
|
+ /* reset completion */
|
|
+ if ((info->ports[wIndex].status & USB_PORT_STAT_RESET) != 0 &&
|
|
+ time_after_eq(jiffies, info->ports[wIndex].timeout)) {
|
|
+ info->ports[wIndex].status |= (USB_PORT_STAT_C_RESET << 16);
|
|
+ info->ports[wIndex].status &= ~USB_PORT_STAT_RESET;
|
|
+
|
|
+ if (info->devices[wIndex].status != USB_STATE_NOTATTACHED) {
|
|
+ info->ports[wIndex].status |= USB_PORT_STAT_ENABLE;
|
|
+ info->devices[wIndex].status = USB_STATE_DEFAULT;
|
|
+ }
|
|
+
|
|
+ switch (info->devices[wIndex].speed) {
|
|
+ case USB_SPEED_LOW:
|
|
+ info->ports[wIndex].status |= USB_PORT_STAT_LOW_SPEED;
|
|
+ break;
|
|
+ case USB_SPEED_HIGH:
|
|
+ info->ports[wIndex].status |= USB_PORT_STAT_HIGH_SPEED;
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ((u16 *) buf)[0] = cpu_to_le16 (info->ports[wIndex].status);
|
|
+ ((u16 *) buf)[1] = cpu_to_le16 (info->ports[wIndex].status >> 16);
|
|
+ break;
|
|
+ case SetHubFeature:
|
|
+ /* not supported */
|
|
+ goto error;
|
|
+ case SetPortFeature:
|
|
+ if (!wIndex || wIndex > ports)
|
|
+ goto error;
|
|
+
|
|
+ switch (wValue) {
|
|
+ case USB_PORT_FEAT_POWER:
|
|
+ rhport_power_on(info, wIndex);
|
|
+ break;
|
|
+ case USB_PORT_FEAT_RESET:
|
|
+ rhport_reset(info, wIndex);
|
|
+ break;
|
|
+ case USB_PORT_FEAT_SUSPEND:
|
|
+ rhport_suspend(info, wIndex);
|
|
+ break;
|
|
+ default:
|
|
+ if ((info->ports[wIndex-1].status & USB_PORT_STAT_POWER) != 0)
|
|
+ info->ports[wIndex-1].status |= (1 << wValue);
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+error:
|
|
+ ret = -EPIPE;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+
|
|
+ /* check status for each port */
|
|
+ for (i = 0; i < ports; i++) {
|
|
+ if (info->ports[i].status & PORT_C_MASK)
|
|
+ changed = 1;
|
|
+ }
|
|
+ if (changed)
|
|
+ usb_hcd_poll_rh_status(hcd);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/usbfront-q.c 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,541 @@
|
|
+/*
|
|
+ * usbfront-q.c
|
|
+ *
|
|
+ * Xen USB Virtual Host Controller - RING operations.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+struct kmem_cache *xenhcd_urbp_cachep;
|
|
+
|
|
+static struct urb_priv *alloc_urb_priv(struct urb *urb)
|
|
+{
|
|
+ struct urb_priv *urbp;
|
|
+
|
|
+ urbp = kmem_cache_zalloc(xenhcd_urbp_cachep, GFP_ATOMIC);
|
|
+ if (!urbp)
|
|
+ return NULL;
|
|
+
|
|
+ urbp->urb = urb;
|
|
+ urb->hcpriv = urbp;
|
|
+ urbp->req_id = ~0;
|
|
+ urbp->unlink_req_id = ~0;
|
|
+ INIT_LIST_HEAD(&urbp->list);
|
|
+
|
|
+ return urbp;
|
|
+}
|
|
+
|
|
+static void free_urb_priv(struct urb_priv *urbp)
|
|
+{
|
|
+ urbp->urb->hcpriv = NULL;
|
|
+ kmem_cache_free(xenhcd_urbp_cachep, urbp);
|
|
+}
|
|
+
|
|
+static inline int get_id_from_freelist(
|
|
+ struct usbfront_info *info)
|
|
+{
|
|
+ unsigned long free;
|
|
+ free = info->shadow_free;
|
|
+ BUG_ON(free >= USB_URB_RING_SIZE);
|
|
+ info->shadow_free = info->shadow[free].req.id;
|
|
+ info->shadow[free].req.id = (unsigned int)0x0fff; /* debug */
|
|
+ return free;
|
|
+}
|
|
+
|
|
+static inline void add_id_to_freelist(
|
|
+ struct usbfront_info *info, unsigned long id)
|
|
+{
|
|
+ info->shadow[id].req.id = info->shadow_free;
|
|
+ info->shadow[id].urb = NULL;
|
|
+ info->shadow_free = id;
|
|
+}
|
|
+
|
|
+static inline int count_pages(void *addr, int length)
|
|
+{
|
|
+ unsigned long start = (unsigned long) addr >> PAGE_SHIFT;
|
|
+ unsigned long end = (unsigned long) (addr + length + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ return end - start;
|
|
+}
|
|
+
|
|
+static inline void xenhcd_gnttab_map(struct usbfront_info *info,
|
|
+ void *addr, int length, grant_ref_t *gref_head,
|
|
+ struct usbif_request_segment *seg, int nr_pages, int flags)
|
|
+{
|
|
+ grant_ref_t ref;
|
|
+ struct page *page;
|
|
+ unsigned long buffer_pfn;
|
|
+ unsigned int offset;
|
|
+ unsigned int len;
|
|
+ unsigned int bytes;
|
|
+ int i;
|
|
+
|
|
+ len = length;
|
|
+
|
|
+ for (i = 0; i < nr_pages; i++) {
|
|
+ BUG_ON(!len);
|
|
+
|
|
+ page = virt_to_page(addr);
|
|
+ buffer_pfn = page_to_phys(page) >> PAGE_SHIFT;
|
|
+ offset = offset_in_page(addr);
|
|
+
|
|
+ bytes = PAGE_SIZE - offset;
|
|
+ if (bytes > len)
|
|
+ bytes = len;
|
|
+
|
|
+ ref = gnttab_claim_grant_reference(gref_head);
|
|
+ BUG_ON(ref == -ENOSPC);
|
|
+ gnttab_grant_foreign_access_ref(ref, info->xbdev->otherend_id, buffer_pfn, flags);
|
|
+ seg[i].gref = ref;
|
|
+ seg[i].offset = (uint16_t)offset;
|
|
+ seg[i].length = (uint16_t)bytes;
|
|
+
|
|
+ addr += bytes;
|
|
+ len -= bytes;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int map_urb_for_request(struct usbfront_info *info, struct urb *urb,
|
|
+ usbif_urb_request_t *req)
|
|
+{
|
|
+ grant_ref_t gref_head;
|
|
+ int nr_buff_pages = 0;
|
|
+ int nr_isodesc_pages = 0;
|
|
+ int ret = 0;
|
|
+
|
|
+ if (urb->transfer_buffer_length) {
|
|
+ nr_buff_pages = count_pages(urb->transfer_buffer, urb->transfer_buffer_length);
|
|
+
|
|
+ if (usb_pipeisoc(urb->pipe))
|
|
+ nr_isodesc_pages = count_pages(&urb->iso_frame_desc[0],
|
|
+ sizeof(struct usb_iso_packet_descriptor) * urb->number_of_packets);
|
|
+
|
|
+ if (nr_buff_pages + nr_isodesc_pages > USBIF_MAX_SEGMENTS_PER_REQUEST)
|
|
+ return -E2BIG;
|
|
+
|
|
+ ret = gnttab_alloc_grant_references(USBIF_MAX_SEGMENTS_PER_REQUEST, &gref_head);
|
|
+ if (ret) {
|
|
+ printk(KERN_ERR "usbfront: gnttab_alloc_grant_references() error\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ xenhcd_gnttab_map(info, urb->transfer_buffer,
|
|
+ urb->transfer_buffer_length,
|
|
+ &gref_head, &req->seg[0], nr_buff_pages,
|
|
+ usb_pipein(urb->pipe) ? 0 : GTF_readonly);
|
|
+
|
|
+ if (!usb_pipeisoc(urb->pipe))
|
|
+ gnttab_free_grant_references(gref_head);
|
|
+ }
|
|
+
|
|
+ req->pipe = usbif_setportnum_pipe(urb->pipe, urb->dev->portnum);
|
|
+ req->transfer_flags = urb->transfer_flags;
|
|
+ req->buffer_length = urb->transfer_buffer_length;
|
|
+ req->nr_buffer_segs = nr_buff_pages;
|
|
+
|
|
+ switch (usb_pipetype(urb->pipe)) {
|
|
+ case PIPE_ISOCHRONOUS:
|
|
+ req->u.isoc.interval = urb->interval;
|
|
+ req->u.isoc.start_frame = urb->start_frame;
|
|
+ req->u.isoc.number_of_packets = urb->number_of_packets;
|
|
+ req->u.isoc.nr_frame_desc_segs = nr_isodesc_pages;
|
|
+ /* urb->number_of_packets must be > 0 */
|
|
+ if (unlikely(urb->number_of_packets <= 0))
|
|
+ BUG();
|
|
+ xenhcd_gnttab_map(info, &urb->iso_frame_desc[0],
|
|
+ sizeof(struct usb_iso_packet_descriptor) * urb->number_of_packets,
|
|
+ &gref_head, &req->seg[nr_buff_pages], nr_isodesc_pages, 0);
|
|
+ gnttab_free_grant_references(gref_head);
|
|
+ break;
|
|
+ case PIPE_INTERRUPT:
|
|
+ req->u.intr.interval = urb->interval;
|
|
+ break;
|
|
+ case PIPE_CONTROL:
|
|
+ if (urb->setup_packet)
|
|
+ memcpy(req->u.ctrl, urb->setup_packet, 8);
|
|
+ break;
|
|
+ case PIPE_BULK:
|
|
+ break;
|
|
+ default:
|
|
+ ret = -EINVAL;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void xenhcd_gnttab_done(struct usb_shadow *shadow)
|
|
+{
|
|
+ int nr_segs = 0;
|
|
+ int i;
|
|
+
|
|
+ nr_segs = shadow->req.nr_buffer_segs;
|
|
+
|
|
+ if (usb_pipeisoc(shadow->req.pipe))
|
|
+ nr_segs += shadow->req.u.isoc.nr_frame_desc_segs;
|
|
+
|
|
+ for (i = 0; i < nr_segs; i++)
|
|
+ gnttab_end_foreign_access(shadow->req.seg[i].gref, 0UL);
|
|
+
|
|
+ shadow->req.nr_buffer_segs = 0;
|
|
+ shadow->req.u.isoc.nr_frame_desc_segs = 0;
|
|
+}
|
|
+
|
|
+static void xenhcd_giveback_urb(struct usbfront_info *info, struct urb *urb, int status)
|
|
+__releases(info->lock)
|
|
+__acquires(info->lock)
|
|
+{
|
|
+ struct urb_priv *urbp = (struct urb_priv *) urb->hcpriv;
|
|
+
|
|
+ list_del_init(&urbp->list);
|
|
+ free_urb_priv(urbp);
|
|
+ switch (urb->status) {
|
|
+ case -ECONNRESET:
|
|
+ case -ENOENT:
|
|
+ COUNT(info->stats.unlink);
|
|
+ break;
|
|
+ case -EINPROGRESS:
|
|
+ urb->status = status;
|
|
+ /* falling through */
|
|
+ default:
|
|
+ COUNT(info->stats.complete);
|
|
+ }
|
|
+ spin_unlock(&info->lock);
|
|
+ usb_hcd_giveback_urb(info_to_hcd(info), urb, NULL);
|
|
+ spin_lock(&info->lock);
|
|
+}
|
|
+
|
|
+static inline int xenhcd_do_request(struct usbfront_info *info, struct urb_priv *urbp)
|
|
+{
|
|
+ usbif_urb_request_t *req;
|
|
+ struct urb *urb = urbp->urb;
|
|
+ uint16_t id;
|
|
+ int notify;
|
|
+ int ret = 0;
|
|
+
|
|
+ req = RING_GET_REQUEST(&info->urb_ring, info->urb_ring.req_prod_pvt);
|
|
+ id = get_id_from_freelist(info);
|
|
+ req->id = id;
|
|
+
|
|
+ if (unlikely(urbp->unlinked)) {
|
|
+ req->u.unlink.unlink_id = urbp->req_id;
|
|
+ req->pipe = usbif_setunlink_pipe(usbif_setportnum_pipe(
|
|
+ urb->pipe, urb->dev->portnum));
|
|
+ urbp->unlink_req_id = id;
|
|
+ } else {
|
|
+ ret = map_urb_for_request(info, urb, req);
|
|
+ if (ret < 0) {
|
|
+ add_id_to_freelist(info, id);
|
|
+ return ret;
|
|
+ }
|
|
+ urbp->req_id = id;
|
|
+ }
|
|
+
|
|
+ info->urb_ring.req_prod_pvt++;
|
|
+ info->shadow[id].urb = urb;
|
|
+ info->shadow[id].req = *req;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->urb_ring, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(info->irq);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void xenhcd_kick_pending_urbs(struct usbfront_info *info)
|
|
+{
|
|
+ struct urb_priv *urbp;
|
|
+ int ret;
|
|
+
|
|
+ while (!list_empty(&info->pending_submit_list)) {
|
|
+ if (RING_FULL(&info->urb_ring)) {
|
|
+ COUNT(info->stats.ring_full);
|
|
+ timer_action(info, TIMER_RING_WATCHDOG);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ urbp = list_entry(info->pending_submit_list.next, struct urb_priv, list);
|
|
+ ret = xenhcd_do_request(info, urbp);
|
|
+ if (ret == 0)
|
|
+ list_move_tail(&urbp->list, &info->in_progress_list);
|
|
+ else
|
|
+ xenhcd_giveback_urb(info, urbp->urb, -ESHUTDOWN);
|
|
+ }
|
|
+ timer_action_done(info, TIMER_SCAN_PENDING_URBS);
|
|
+
|
|
+done:
|
|
+ return;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * caller must lock info->lock
|
|
+ */
|
|
+static void xenhcd_cancel_all_enqueued_urbs(struct usbfront_info *info)
|
|
+{
|
|
+ struct urb_priv *urbp, *tmp;
|
|
+
|
|
+ list_for_each_entry_safe(urbp, tmp, &info->in_progress_list, list) {
|
|
+ if (!urbp->unlinked) {
|
|
+ xenhcd_gnttab_done(&info->shadow[urbp->req_id]);
|
|
+ barrier();
|
|
+ if (urbp->urb->status == -EINPROGRESS) /* not dequeued */
|
|
+ xenhcd_giveback_urb(info, urbp->urb, -ESHUTDOWN);
|
|
+ else /* dequeued */
|
|
+ xenhcd_giveback_urb(info, urbp->urb, urbp->urb->status);
|
|
+ }
|
|
+ info->shadow[urbp->req_id].urb = NULL;
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(urbp, tmp, &info->pending_submit_list, list) {
|
|
+ xenhcd_giveback_urb(info, urbp->urb, -ESHUTDOWN);
|
|
+ }
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * caller must lock info->lock
|
|
+ */
|
|
+static void xenhcd_giveback_unlinked_urbs(struct usbfront_info *info)
|
|
+{
|
|
+ struct urb_priv *urbp, *tmp;
|
|
+
|
|
+ list_for_each_entry_safe(urbp, tmp, &info->giveback_waiting_list, list) {
|
|
+ xenhcd_giveback_urb(info, urbp->urb, urbp->urb->status);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int xenhcd_submit_urb(struct usbfront_info *info, struct urb_priv *urbp)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ if (RING_FULL(&info->urb_ring)) {
|
|
+ list_add_tail(&urbp->list, &info->pending_submit_list);
|
|
+ COUNT(info->stats.ring_full);
|
|
+ timer_action(info, TIMER_RING_WATCHDOG);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ if (!list_empty(&info->pending_submit_list)) {
|
|
+ list_add_tail(&urbp->list, &info->pending_submit_list);
|
|
+ timer_action(info, TIMER_SCAN_PENDING_URBS);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ ret = xenhcd_do_request(info, urbp);
|
|
+ if (ret == 0)
|
|
+ list_add_tail(&urbp->list, &info->in_progress_list);
|
|
+
|
|
+done:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenhcd_unlink_urb(struct usbfront_info *info, struct urb_priv *urbp)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ /* already unlinked? */
|
|
+ if (urbp->unlinked)
|
|
+ return -EBUSY;
|
|
+
|
|
+ urbp->unlinked = 1;
|
|
+
|
|
+ /* the urb is still in pending_submit queue */
|
|
+ if (urbp->req_id == ~0) {
|
|
+ list_move_tail(&urbp->list, &info->giveback_waiting_list);
|
|
+ timer_action(info, TIMER_SCAN_PENDING_URBS);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ /* send unlink request to backend */
|
|
+ if (RING_FULL(&info->urb_ring)) {
|
|
+ list_move_tail(&urbp->list, &info->pending_unlink_list);
|
|
+ COUNT(info->stats.ring_full);
|
|
+ timer_action(info, TIMER_RING_WATCHDOG);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ if (!list_empty(&info->pending_unlink_list)) {
|
|
+ list_move_tail(&urbp->list, &info->pending_unlink_list);
|
|
+ timer_action(info, TIMER_SCAN_PENDING_URBS);
|
|
+ goto done;
|
|
+ }
|
|
+
|
|
+ ret = xenhcd_do_request(info, urbp);
|
|
+ if (ret == 0)
|
|
+ list_move_tail(&urbp->list, &info->in_progress_list);
|
|
+
|
|
+done:
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenhcd_urb_request_done(struct usbfront_info *info)
|
|
+{
|
|
+ usbif_urb_response_t *res;
|
|
+ struct urb *urb;
|
|
+
|
|
+ RING_IDX i, rp;
|
|
+ uint16_t id;
|
|
+ int more_to_do = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ rp = info->urb_ring.sring->rsp_prod;
|
|
+ rmb(); /* ensure we see queued responses up to "rp" */
|
|
+
|
|
+ for (i = info->urb_ring.rsp_cons; i != rp; i++) {
|
|
+ res = RING_GET_RESPONSE(&info->urb_ring, i);
|
|
+ id = res->id;
|
|
+
|
|
+ if (likely(usbif_pipesubmit(info->shadow[id].req.pipe))) {
|
|
+ xenhcd_gnttab_done(&info->shadow[id]);
|
|
+ urb = info->shadow[id].urb;
|
|
+ barrier();
|
|
+ if (likely(urb)) {
|
|
+ urb->actual_length = res->actual_length;
|
|
+ urb->error_count = res->error_count;
|
|
+ urb->start_frame = res->start_frame;
|
|
+ barrier();
|
|
+ xenhcd_giveback_urb(info, urb, res->status);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ add_id_to_freelist(info, id);
|
|
+ }
|
|
+ info->urb_ring.rsp_cons = i;
|
|
+
|
|
+ if (i != info->urb_ring.req_prod_pvt)
|
|
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->urb_ring, more_to_do);
|
|
+ else
|
|
+ info->urb_ring.sring->rsp_event = i + 1;
|
|
+
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+
|
|
+ cond_resched();
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+static int xenhcd_conn_notify(struct usbfront_info *info)
|
|
+{
|
|
+ usbif_conn_response_t *res;
|
|
+ usbif_conn_request_t *req;
|
|
+ RING_IDX rc, rp;
|
|
+ uint16_t id;
|
|
+ uint8_t portnum, speed;
|
|
+ int more_to_do = 0;
|
|
+ int notify;
|
|
+ int port_changed = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&info->lock, flags);
|
|
+
|
|
+ rc = info->conn_ring.rsp_cons;
|
|
+ rp = info->conn_ring.sring->rsp_prod;
|
|
+ rmb(); /* ensure we see queued responses up to "rp" */
|
|
+
|
|
+ while (rc != rp) {
|
|
+ res = RING_GET_RESPONSE(&info->conn_ring, rc);
|
|
+ id = res->id;
|
|
+ portnum = res->portnum;
|
|
+ speed = res->speed;
|
|
+ info->conn_ring.rsp_cons = ++rc;
|
|
+
|
|
+ rhport_connect(info, portnum, speed);
|
|
+ if (info->ports[portnum-1].c_connection)
|
|
+ port_changed = 1;
|
|
+
|
|
+ barrier();
|
|
+
|
|
+ req = RING_GET_REQUEST(&info->conn_ring, info->conn_ring.req_prod_pvt);
|
|
+ req->id = id;
|
|
+ info->conn_ring.req_prod_pvt++;
|
|
+ }
|
|
+
|
|
+ if (rc != info->conn_ring.req_prod_pvt)
|
|
+ RING_FINAL_CHECK_FOR_RESPONSES(&info->conn_ring, more_to_do);
|
|
+ else
|
|
+ info->conn_ring.sring->rsp_event = rc + 1;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->conn_ring, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(info->irq);
|
|
+
|
|
+ spin_unlock_irqrestore(&info->lock, flags);
|
|
+
|
|
+ if (port_changed)
|
|
+ usb_hcd_poll_rh_status(info_to_hcd(info));
|
|
+
|
|
+ cond_resched();
|
|
+
|
|
+ return more_to_do;
|
|
+}
|
|
+
|
|
+int xenhcd_schedule(void *arg)
|
|
+{
|
|
+ struct usbfront_info *info = (struct usbfront_info *) arg;
|
|
+
|
|
+ while (!kthread_should_stop()) {
|
|
+ wait_event_interruptible(
|
|
+ info->wq,
|
|
+ info->waiting_resp || kthread_should_stop());
|
|
+ info->waiting_resp = 0;
|
|
+ smp_mb();
|
|
+
|
|
+ if (xenhcd_urb_request_done(info))
|
|
+ info->waiting_resp = 1;
|
|
+
|
|
+ if (xenhcd_conn_notify(info))
|
|
+ info->waiting_resp = 1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void xenhcd_notify_work(struct usbfront_info *info)
|
|
+{
|
|
+ info->waiting_resp = 1;
|
|
+ wake_up(&info->wq);
|
|
+}
|
|
+
|
|
+irqreturn_t xenhcd_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+{
|
|
+ xenhcd_notify_work((struct usbfront_info *) dev_id);
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/usbfront.h 2009-10-15 11:45:41.000000000 +0200
|
|
@@ -0,0 +1,203 @@
|
|
+/*
|
|
+ * usbfront.h
|
|
+ *
|
|
+ * This file is part of Xen USB Virtual Host Controller driver.
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#ifndef __XEN_USBFRONT_H__
|
|
+#define __XEN_USBFRONT_H__
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/usb.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/wait.h>
|
|
+#include <asm/io.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/io/usbif.h>
|
|
+
|
|
+/*
|
|
+ * usbfront needs USB HCD headers,
|
|
+ * drivers/usb/core/hcd.h and drivers/usb/core/hub.h,
|
|
+ * but, they are not in public include path.
|
|
+ */
|
|
+#include "../../usb/core/hcd.h"
|
|
+#include "../../usb/core/hub.h"
|
|
+
|
|
+static inline struct usbfront_info *hcd_to_info(struct usb_hcd *hcd)
|
|
+{
|
|
+ return (struct usbfront_info *) (hcd->hcd_priv);
|
|
+}
|
|
+
|
|
+static inline struct usb_hcd *info_to_hcd(struct usbfront_info *info)
|
|
+{
|
|
+ return container_of((void *) info, struct usb_hcd, hcd_priv);
|
|
+}
|
|
+
|
|
+/* Private per-URB data */
|
|
+struct urb_priv {
|
|
+ struct list_head list;
|
|
+ struct urb *urb;
|
|
+ int req_id; /* RING_REQUEST id for submitting */
|
|
+ int unlink_req_id; /* RING_REQUEST id for unlinking */
|
|
+ unsigned unlinked:1; /* dequeued marker */
|
|
+};
|
|
+
|
|
+/* virtual roothub port status */
|
|
+struct rhport_status {
|
|
+ u32 status;
|
|
+ unsigned resuming:1; /* in resuming */
|
|
+ unsigned c_connection:1; /* connection changed */
|
|
+ unsigned long timeout;
|
|
+};
|
|
+
|
|
+/* status of attached device */
|
|
+struct vdevice_status {
|
|
+ int devnum;
|
|
+ enum usb_device_state status;
|
|
+ enum usb_device_speed speed;
|
|
+};
|
|
+
|
|
+/* RING request shadow */
|
|
+struct usb_shadow {
|
|
+ usbif_urb_request_t req;
|
|
+ struct urb *urb;
|
|
+};
|
|
+
|
|
+/* statistics for tuning, monitoring, ... */
|
|
+struct xenhcd_stats {
|
|
+ unsigned long ring_full; /* RING_FULL conditions */
|
|
+ unsigned long complete; /* normal givebacked urbs */
|
|
+ unsigned long unlink; /* unlinked urbs */
|
|
+};
|
|
+
|
|
+struct usbfront_info {
|
|
+ /* Virtual Host Controller has 4 urb queues */
|
|
+ struct list_head pending_submit_list;
|
|
+ struct list_head pending_unlink_list;
|
|
+ struct list_head in_progress_list;
|
|
+ struct list_head giveback_waiting_list;
|
|
+
|
|
+ spinlock_t lock;
|
|
+
|
|
+ /* timer that kick pending and giveback waiting urbs */
|
|
+ struct timer_list watchdog;
|
|
+ unsigned long actions;
|
|
+
|
|
+ /* virtual root hub */
|
|
+ int rh_numports;
|
|
+ struct rhport_status ports[USB_MAXCHILDREN];
|
|
+ struct vdevice_status devices[USB_MAXCHILDREN];
|
|
+
|
|
+ /* Xen related staff */
|
|
+ struct xenbus_device *xbdev;
|
|
+ int urb_ring_ref;
|
|
+ int conn_ring_ref;
|
|
+ usbif_urb_front_ring_t urb_ring;
|
|
+ usbif_conn_front_ring_t conn_ring;
|
|
+
|
|
+ unsigned int irq; /* event channel */
|
|
+ struct usb_shadow shadow[USB_URB_RING_SIZE];
|
|
+ unsigned long shadow_free;
|
|
+
|
|
+ /* RING_RESPONSE thread */
|
|
+ struct task_struct *kthread;
|
|
+ wait_queue_head_t wq;
|
|
+ unsigned int waiting_resp;
|
|
+
|
|
+ /* xmit statistics */
|
|
+#ifdef XENHCD_STATS
|
|
+ struct xenhcd_stats stats;
|
|
+#define COUNT(x) do { (x)++; } while (0)
|
|
+#else
|
|
+#define COUNT(x) do {} while (0)
|
|
+#endif
|
|
+};
|
|
+
|
|
+#define XENHCD_RING_JIFFIES (HZ/200)
|
|
+#define XENHCD_SCAN_JIFFIES 1
|
|
+
|
|
+enum xenhcd_timer_action {
|
|
+ TIMER_RING_WATCHDOG,
|
|
+ TIMER_SCAN_PENDING_URBS,
|
|
+};
|
|
+
|
|
+static inline void
|
|
+timer_action_done(struct usbfront_info *info, enum xenhcd_timer_action action)
|
|
+{
|
|
+ clear_bit(action, &info->actions);
|
|
+}
|
|
+
|
|
+static inline void
|
|
+timer_action(struct usbfront_info *info, enum xenhcd_timer_action action)
|
|
+{
|
|
+ if (timer_pending(&info->watchdog)
|
|
+ && test_bit(TIMER_SCAN_PENDING_URBS, &info->actions))
|
|
+ return;
|
|
+
|
|
+ if (!test_and_set_bit(action, &info->actions)) {
|
|
+ unsigned long t;
|
|
+
|
|
+ switch (action) {
|
|
+ case TIMER_RING_WATCHDOG:
|
|
+ t = XENHCD_RING_JIFFIES;
|
|
+ break;
|
|
+ default:
|
|
+ t = XENHCD_SCAN_JIFFIES;
|
|
+ break;
|
|
+ }
|
|
+ mod_timer(&info->watchdog, t + jiffies);
|
|
+ }
|
|
+}
|
|
+
|
|
+extern struct kmem_cache *xenhcd_urbp_cachep;
|
|
+extern struct hc_driver xen_usb20_hc_driver;
|
|
+extern struct hc_driver xen_usb11_hc_driver;
|
|
+irqreturn_t xenhcd_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+void xenhcd_rhport_state_change(struct usbfront_info *info,
|
|
+ int port, enum usb_device_speed speed);
|
|
+int xenhcd_schedule(void *arg);
|
|
+
|
|
+#endif /* __XEN_USBFRONT_H__ */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/usbfront/xenbus.c 2010-03-31 09:56:02.000000000 +0200
|
|
@@ -0,0 +1,417 @@
|
|
+/*
|
|
+ * xenbus.c
|
|
+ *
|
|
+ * Xenbus interface for Xen USB Virtual Host Controller
|
|
+ *
|
|
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
|
|
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
+ *
|
|
+ * or, by your choice,
|
|
+ *
|
|
+ * When distributed separately from the Linux kernel or incorporated into
|
|
+ * other software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this software and associated documentation files (the "Software"), to
|
|
+ * deal in the Software without restriction, including without limitation the
|
|
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
+ * sell copies of the Software, and to permit persons to whom the Software is
|
|
+ * furnished to do so, subject to the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
+ * DEALINGS IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include "usbfront.h"
|
|
+
|
|
+#define GRANT_INVALID_REF 0
|
|
+
|
|
+static void destroy_rings(struct usbfront_info *info)
|
|
+{
|
|
+ if (info->irq)
|
|
+ unbind_from_irqhandler(info->irq, info);
|
|
+ info->irq = 0;
|
|
+
|
|
+ if (info->urb_ring_ref != GRANT_INVALID_REF) {
|
|
+ gnttab_end_foreign_access(info->urb_ring_ref,
|
|
+ (unsigned long)info->urb_ring.sring);
|
|
+ info->urb_ring_ref = GRANT_INVALID_REF;
|
|
+ }
|
|
+ info->urb_ring.sring = NULL;
|
|
+
|
|
+ if (info->conn_ring_ref != GRANT_INVALID_REF) {
|
|
+ gnttab_end_foreign_access(info->conn_ring_ref,
|
|
+ (unsigned long)info->conn_ring.sring);
|
|
+ info->conn_ring_ref = GRANT_INVALID_REF;
|
|
+ }
|
|
+ info->conn_ring.sring = NULL;
|
|
+}
|
|
+
|
|
+static int setup_rings(struct xenbus_device *dev,
|
|
+ struct usbfront_info *info)
|
|
+{
|
|
+ usbif_urb_sring_t *urb_sring;
|
|
+ usbif_conn_sring_t *conn_sring;
|
|
+ int err;
|
|
+
|
|
+ info->urb_ring_ref = GRANT_INVALID_REF;
|
|
+ info->conn_ring_ref = GRANT_INVALID_REF;
|
|
+
|
|
+ urb_sring = (usbif_urb_sring_t *)get_zeroed_page(GFP_NOIO|__GFP_HIGH);
|
|
+ if (!urb_sring) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating urb ring");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ SHARED_RING_INIT(urb_sring);
|
|
+ FRONT_RING_INIT(&info->urb_ring, urb_sring, PAGE_SIZE);
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->urb_ring.sring));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long)urb_sring);
|
|
+ info->urb_ring.sring = NULL;
|
|
+ goto fail;
|
|
+ }
|
|
+ info->urb_ring_ref = err;
|
|
+
|
|
+ conn_sring = (usbif_conn_sring_t *)get_zeroed_page(GFP_NOIO|__GFP_HIGH);
|
|
+ if (!conn_sring) {
|
|
+ xenbus_dev_fatal(dev, -ENOMEM, "allocating conn ring");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ SHARED_RING_INIT(conn_sring);
|
|
+ FRONT_RING_INIT(&info->conn_ring, conn_sring, PAGE_SIZE);
|
|
+
|
|
+ err = xenbus_grant_ring(dev, virt_to_mfn(info->conn_ring.sring));
|
|
+ if (err < 0) {
|
|
+ free_page((unsigned long)conn_sring);
|
|
+ info->conn_ring.sring = NULL;
|
|
+ goto fail;
|
|
+ }
|
|
+ info->conn_ring_ref = err;
|
|
+
|
|
+ err = bind_listening_port_to_irqhandler(
|
|
+ dev->otherend_id, xenhcd_int, SA_SAMPLE_RANDOM, "usbif", info);
|
|
+ if (err <= 0) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "bind_listening_port_to_irqhandler");
|
|
+ goto fail;
|
|
+ }
|
|
+ info->irq = err;
|
|
+
|
|
+ return 0;
|
|
+fail:
|
|
+ destroy_rings(info);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int talk_to_backend(struct xenbus_device *dev,
|
|
+ struct usbfront_info *info)
|
|
+{
|
|
+ const char *message;
|
|
+ struct xenbus_transaction xbt;
|
|
+ int err;
|
|
+
|
|
+ err = setup_rings(dev, info);
|
|
+ if (err)
|
|
+ goto out;
|
|
+
|
|
+again:
|
|
+ err = xenbus_transaction_start(&xbt);
|
|
+ if (err) {
|
|
+ xenbus_dev_fatal(dev, err, "starting transaction");
|
|
+ goto destroy_ring;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "urb-ring-ref", "%u",
|
|
+ info->urb_ring_ref);
|
|
+ if (err) {
|
|
+ message = "writing urb-ring-ref";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "conn-ring-ref", "%u",
|
|
+ info->conn_ring_ref);
|
|
+ if (err) {
|
|
+ message = "writing conn-ring-ref";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_printf(xbt, dev->nodename, "event-channel", "%u",
|
|
+ irq_to_evtchn_port(info->irq));
|
|
+ if (err) {
|
|
+ message = "writing event-channel";
|
|
+ goto abort_transaction;
|
|
+ }
|
|
+
|
|
+ err = xenbus_transaction_end(xbt, 0);
|
|
+ if (err) {
|
|
+ if (err == -EAGAIN)
|
|
+ goto again;
|
|
+ xenbus_dev_fatal(dev, err, "completing transaction");
|
|
+ goto destroy_ring;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+abort_transaction:
|
|
+ xenbus_transaction_end(xbt, 1);
|
|
+ xenbus_dev_fatal(dev, err, "%s", message);
|
|
+
|
|
+destroy_ring:
|
|
+ destroy_rings(info);
|
|
+
|
|
+out:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int connect(struct xenbus_device *dev)
|
|
+{
|
|
+ struct usbfront_info *info = dev->dev.driver_data;
|
|
+
|
|
+ usbif_conn_request_t *req;
|
|
+ int i, idx, err;
|
|
+ int notify;
|
|
+ char name[TASK_COMM_LEN];
|
|
+ struct usb_hcd *hcd;
|
|
+
|
|
+ hcd = info_to_hcd(info);
|
|
+ snprintf(name, TASK_COMM_LEN, "xenhcd.%d", hcd->self.busnum);
|
|
+
|
|
+ err = talk_to_backend(dev, info);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ info->kthread = kthread_run(xenhcd_schedule, info, name);
|
|
+ if (IS_ERR(info->kthread)) {
|
|
+ err = PTR_ERR(info->kthread);
|
|
+ info->kthread = NULL;
|
|
+ xenbus_dev_fatal(dev, err, "Error creating thread");
|
|
+ return err;
|
|
+ }
|
|
+ /* prepare ring for hotplug notification */
|
|
+ for (idx = 0, i = 0; i < USB_CONN_RING_SIZE; i++) {
|
|
+ req = RING_GET_REQUEST(&info->conn_ring, idx);
|
|
+ req->id = idx;
|
|
+ idx++;
|
|
+ }
|
|
+ info->conn_ring.req_prod_pvt = idx;
|
|
+
|
|
+ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->conn_ring, notify);
|
|
+ if (notify)
|
|
+ notify_remote_via_irq(info->irq);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct usb_hcd *create_hcd(struct xenbus_device *dev)
|
|
+{
|
|
+ int i;
|
|
+ int err = 0;
|
|
+ int num_ports;
|
|
+ int usb_ver;
|
|
+ struct usb_hcd *hcd = NULL;
|
|
+ struct usbfront_info *info = NULL;
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend,
|
|
+ "num-ports", "%d", &num_ports);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading num-ports");
|
|
+ return ERR_PTR(-EINVAL);
|
|
+ }
|
|
+ if (num_ports < 1 || num_ports > USB_MAXCHILDREN) {
|
|
+ xenbus_dev_fatal(dev, err, "invalid num-ports");
|
|
+ return ERR_PTR(-EINVAL);
|
|
+ }
|
|
+
|
|
+ err = xenbus_scanf(XBT_NIL, dev->otherend,
|
|
+ "usb-ver", "%d", &usb_ver);
|
|
+ if (err != 1) {
|
|
+ xenbus_dev_fatal(dev, err, "reading usb-ver");
|
|
+ return ERR_PTR(-EINVAL);
|
|
+ }
|
|
+ switch (usb_ver) {
|
|
+ case USB_VER_USB11:
|
|
+ hcd = usb_create_hcd(&xen_usb11_hc_driver, &dev->dev, dev->dev.bus_id);
|
|
+ break;
|
|
+ case USB_VER_USB20:
|
|
+ hcd = usb_create_hcd(&xen_usb20_hc_driver, &dev->dev, dev->dev.bus_id);
|
|
+ break;
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, err, "invalid usb-ver");
|
|
+ return ERR_PTR(-EINVAL);
|
|
+ }
|
|
+ if (!hcd) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "fail to allocate USB host controller");
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+ }
|
|
+
|
|
+ info = hcd_to_info(hcd);
|
|
+ info->xbdev = dev;
|
|
+ info->rh_numports = num_ports;
|
|
+
|
|
+ for (i = 0; i < USB_URB_RING_SIZE; i++) {
|
|
+ info->shadow[i].req.id = i + 1;
|
|
+ info->shadow[i].urb = NULL;
|
|
+ }
|
|
+ info->shadow[USB_URB_RING_SIZE-1].req.id = 0x0fff;
|
|
+
|
|
+ return hcd;
|
|
+}
|
|
+
|
|
+static int usbfront_probe(struct xenbus_device *dev,
|
|
+ const struct xenbus_device_id *id)
|
|
+{
|
|
+ int err;
|
|
+ struct usb_hcd *hcd;
|
|
+ struct usbfront_info *info;
|
|
+
|
|
+ if (usb_disabled())
|
|
+ return -ENODEV;
|
|
+
|
|
+ hcd = create_hcd(dev);
|
|
+ if (IS_ERR(hcd)) {
|
|
+ err = PTR_ERR(hcd);
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "fail to create usb host controller");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ info = hcd_to_info(hcd);
|
|
+ dev->dev.driver_data = info;
|
|
+
|
|
+ err = usb_add_hcd(hcd, 0, 0);
|
|
+ if (err != 0) {
|
|
+ xenbus_dev_fatal(dev, err,
|
|
+ "fail to adding USB host controller");
|
|
+ goto fail;
|
|
+ }
|
|
+
|
|
+ init_waitqueue_head(&info->wq);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+fail:
|
|
+ usb_put_hcd(hcd);
|
|
+ dev->dev.driver_data = NULL;
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void usbfront_disconnect(struct xenbus_device *dev)
|
|
+{
|
|
+ struct usbfront_info *info = dev->dev.driver_data;
|
|
+ struct usb_hcd *hcd = info_to_hcd(info);
|
|
+
|
|
+ usb_remove_hcd(hcd);
|
|
+ if (info->kthread) {
|
|
+ kthread_stop(info->kthread);
|
|
+ info->kthread = NULL;
|
|
+ }
|
|
+ xenbus_frontend_closed(dev);
|
|
+}
|
|
+
|
|
+static void backend_changed(struct xenbus_device *dev,
|
|
+ enum xenbus_state backend_state)
|
|
+{
|
|
+ switch (backend_state) {
|
|
+ case XenbusStateInitialising:
|
|
+ case XenbusStateInitialised:
|
|
+ case XenbusStateConnected:
|
|
+ case XenbusStateReconfiguring:
|
|
+ case XenbusStateReconfigured:
|
|
+ case XenbusStateUnknown:
|
|
+ case XenbusStateClosed:
|
|
+ break;
|
|
+
|
|
+ case XenbusStateInitWait:
|
|
+ if (dev->state != XenbusStateInitialising)
|
|
+ break;
|
|
+ if (!connect(dev))
|
|
+ xenbus_switch_state(dev, XenbusStateConnected);
|
|
+ break;
|
|
+
|
|
+ case XenbusStateClosing:
|
|
+ usbfront_disconnect(dev);
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
|
|
+ backend_state);
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int usbfront_remove(struct xenbus_device *dev)
|
|
+{
|
|
+ struct usbfront_info *info = dev->dev.driver_data;
|
|
+ struct usb_hcd *hcd = info_to_hcd(info);
|
|
+
|
|
+ destroy_rings(info);
|
|
+ usb_put_hcd(hcd);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const struct xenbus_device_id usbfront_ids[] = {
|
|
+ { "vusb" },
|
|
+ { "" },
|
|
+};
|
|
+MODULE_ALIAS("xen:vusb");
|
|
+
|
|
+static struct xenbus_driver usbfront_driver = {
|
|
+ .name = "vusb",
|
|
+ .owner = THIS_MODULE,
|
|
+ .ids = usbfront_ids,
|
|
+ .probe = usbfront_probe,
|
|
+ .otherend_changed = backend_changed,
|
|
+ .remove = usbfront_remove,
|
|
+};
|
|
+
|
|
+static int __init usbfront_init(void)
|
|
+{
|
|
+ if (!is_running_on_xen())
|
|
+ return -ENODEV;
|
|
+
|
|
+ xenhcd_urbp_cachep = kmem_cache_create("xenhcd_urb_priv",
|
|
+ sizeof(struct urb_priv), 0, 0, NULL, NULL);
|
|
+ if (!xenhcd_urbp_cachep) {
|
|
+ printk(KERN_ERR "usbfront failed to create kmem cache\n");
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return xenbus_register_frontend(&usbfront_driver);
|
|
+}
|
|
+
|
|
+static void __exit usbfront_exit(void)
|
|
+{
|
|
+ kmem_cache_destroy(xenhcd_urbp_cachep);
|
|
+ xenbus_unregister_driver(&usbfront_driver);
|
|
+}
|
|
+
|
|
+module_init(usbfront_init);
|
|
+module_exit(usbfront_exit);
|
|
+
|
|
+MODULE_AUTHOR("");
|
|
+MODULE_DESCRIPTION("Xen USB Virtual Host Controller driver (usbfront)");
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/util.c 2007-07-10 09:42:30.000000000 +0200
|
|
@@ -0,0 +1,65 @@
|
|
+#include <linux/mm.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <xen/driver_util.h>
|
|
+
|
|
+struct class *get_xen_class(void)
|
|
+{
|
|
+ static struct class *xen_class;
|
|
+
|
|
+ if (xen_class)
|
|
+ return xen_class;
|
|
+
|
|
+ xen_class = class_create(THIS_MODULE, "xen");
|
|
+ if (IS_ERR(xen_class)) {
|
|
+ printk("Failed to create xen sysfs class.\n");
|
|
+ xen_class = NULL;
|
|
+ }
|
|
+
|
|
+ return xen_class;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(get_xen_class);
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+static int f(pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
|
|
+{
|
|
+ /* apply_to_page_range() does all the hard work. */
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+struct vm_struct *alloc_vm_area(unsigned long size)
|
|
+{
|
|
+ struct vm_struct *area;
|
|
+
|
|
+ area = get_vm_area(size, VM_IOREMAP);
|
|
+ if (area == NULL)
|
|
+ return NULL;
|
|
+
|
|
+ /*
|
|
+ * This ensures that page tables are constructed for this region
|
|
+ * of kernel virtual address space and mapped into init_mm.
|
|
+ */
|
|
+ if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
|
|
+ area->size, f, NULL)) {
|
|
+ free_vm_area(area);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ /* Map page directories into every address space. */
|
|
+ vmalloc_sync_all();
|
|
+
|
|
+ return area;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(alloc_vm_area);
|
|
+
|
|
+void free_vm_area(struct vm_struct *area)
|
|
+{
|
|
+ struct vm_struct *ret;
|
|
+ ret = remove_vm_area(area->addr);
|
|
+ BUG_ON(ret != area);
|
|
+ kfree(area);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(free_vm_area);
|
|
+#endif /* CONFIG_X86 */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/xenbus/xenbus_backend_client.c 2010-09-23 15:39:04.000000000 +0200
|
|
@@ -0,0 +1,151 @@
|
|
+/******************************************************************************
|
|
+ * Backend-client-facing interface for the Xenbus driver. In other words, the
|
|
+ * interface between the Xenbus and the device-specific code in the backend
|
|
+ * driver.
|
|
+ *
|
|
+ * Copyright (C) 2005-2006 XenSource Ltd
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/err.h>
|
|
+#include <linux/delay.h>
|
|
+#include <xen/gnttab.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/driver_util.h>
|
|
+
|
|
+/* Based on Rusty Russell's skeleton driver's map_page */
|
|
+struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ struct vm_struct *area;
|
|
+
|
|
+ area = alloc_vm_area(PAGE_SIZE);
|
|
+ if (!area)
|
|
+ return ERR_PTR(-ENOMEM);
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
|
|
+ gnt_ref, dev->otherend_id);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ free_vm_area(area);
|
|
+ xenbus_dev_fatal(dev, op.status,
|
|
+ "mapping in shared page %d from domain %d",
|
|
+ gnt_ref, dev->otherend_id);
|
|
+ BUG_ON(!IS_ERR(ERR_PTR(op.status)));
|
|
+ return ERR_PTR(-EINVAL);
|
|
+ }
|
|
+
|
|
+ /* Stuff the handle in an unused field */
|
|
+ area->phys_addr = (unsigned long)op.handle;
|
|
+
|
|
+ return area;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
|
|
+
|
|
+
|
|
+int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
|
|
+ grant_handle_t *handle, void *vaddr)
|
|
+{
|
|
+ struct gnttab_map_grant_ref op;
|
|
+ int ret;
|
|
+
|
|
+ gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
|
|
+ gnt_ref, dev->otherend_id);
|
|
+
|
|
+ gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op);
|
|
+
|
|
+ if (op.status != GNTST_okay) {
|
|
+ xenbus_dev_fatal(dev, op.status,
|
|
+ "mapping in shared page %d from domain %d",
|
|
+ gnt_ref, dev->otherend_id);
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ *handle = op.handle;
|
|
+ ret = 0;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xenbus_map_ring);
|
|
+
|
|
+
|
|
+/* Based on Rusty Russell's skeleton driver's unmap_page */
|
|
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *area)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)area->addr, GNTMAP_host_map,
|
|
+ (grant_handle_t)area->phys_addr);
|
|
+
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ if (op.status == GNTST_okay)
|
|
+ free_vm_area(area);
|
|
+ else
|
|
+ xenbus_dev_error(dev, op.status,
|
|
+ "unmapping page at handle %d error %d",
|
|
+ (int16_t)area->phys_addr, op.status);
|
|
+
|
|
+ return op.status == GNTST_okay ? 0 : -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
|
|
+
|
|
+
|
|
+int xenbus_unmap_ring(struct xenbus_device *dev,
|
|
+ grant_handle_t handle, void *vaddr)
|
|
+{
|
|
+ struct gnttab_unmap_grant_ref op;
|
|
+
|
|
+ gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
|
|
+ handle);
|
|
+ if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
|
|
+ BUG();
|
|
+
|
|
+ if (op.status != GNTST_okay)
|
|
+ xenbus_dev_error(dev, op.status,
|
|
+ "unmapping page at handle %d error %d",
|
|
+ handle, op.status);
|
|
+
|
|
+ return op.status == GNTST_okay ? 0 : -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
|
|
+
|
|
+int xenbus_dev_is_online(struct xenbus_device *dev)
|
|
+{
|
|
+ int rc, val;
|
|
+
|
|
+ rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val);
|
|
+ if (rc != 1)
|
|
+ val = 0; /* no online node present */
|
|
+
|
|
+ return val;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(xenbus_dev_is_online);
|
|
+
|
|
+MODULE_LICENSE("Dual BSD/GPL");
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/xenbus/xenbus_dev.c 2011-01-03 12:43:21.000000000 +0100
|
|
@@ -0,0 +1,468 @@
|
|
+/*
|
|
+ * xenbus_dev.c
|
|
+ *
|
|
+ * Driver giving user-space access to the kernel's xenbus connection
|
|
+ * to xenstore.
|
|
+ *
|
|
+ * Copyright (c) 2005, Christian Limpach
|
|
+ * Copyright (c) 2005, Rusty Russell, IBM Corporation
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License version 2
|
|
+ * as published by the Free Software Foundation; or, when distributed
|
|
+ * separately from the Linux kernel or incorporated into other
|
|
+ * software packages, subject to the following license:
|
|
+ *
|
|
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
+ * of this source file (the "Software"), to deal in the Software without
|
|
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
+ * the following conditions:
|
|
+ *
|
|
+ * The above copyright notice and this permission notice shall be included in
|
|
+ * all copies or substantial portions of the Software.
|
|
+ *
|
|
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
+ * IN THE SOFTWARE.
|
|
+ */
|
|
+
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/uio.h>
|
|
+#include <linux/notifier.h>
|
|
+#include <linux/wait.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/poll.h>
|
|
+#include <linux/mutex.h>
|
|
+
|
|
+#include "xenbus_comms.h"
|
|
+
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/xenbus.h>
|
|
+#include <xen/xen_proc.h>
|
|
+#include <asm/hypervisor.h>
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include <xen/platform-compat.h>
|
|
+#endif
|
|
+
|
|
+#include <xen/public/xenbus.h>
|
|
+
|
|
+struct xenbus_dev_transaction {
|
|
+ struct list_head list;
|
|
+ struct xenbus_transaction handle;
|
|
+};
|
|
+
|
|
+struct read_buffer {
|
|
+ struct list_head list;
|
|
+ unsigned int cons;
|
|
+ unsigned int len;
|
|
+ char msg[];
|
|
+};
|
|
+
|
|
+struct xenbus_dev_data {
|
|
+ /* In-progress transaction. */
|
|
+ struct list_head transactions;
|
|
+
|
|
+ /* Active watches. */
|
|
+ struct list_head watches;
|
|
+
|
|
+ /* Partial request. */
|
|
+ unsigned int len;
|
|
+ union {
|
|
+ struct xsd_sockmsg msg;
|
|
+ char buffer[PAGE_SIZE];
|
|
+ } u;
|
|
+
|
|
+ /* Response queue. */
|
|
+ struct list_head read_buffers;
|
|
+ wait_queue_head_t read_waitq;
|
|
+
|
|
+ struct mutex reply_mutex;
|
|
+};
|
|
+
|
|
+static struct proc_dir_entry *xenbus_dev_intf;
|
|
+
|
|
+static ssize_t xenbus_dev_read(struct file *filp,
|
|
+ char __user *ubuf,
|
|
+ size_t len, loff_t *ppos)
|
|
+{
|
|
+ struct xenbus_dev_data *u = filp->private_data;
|
|
+ struct read_buffer *rb;
|
|
+ int i, ret;
|
|
+
|
|
+ if (!is_xenstored_ready())
|
|
+ return -ENODEV;
|
|
+
|
|
+ mutex_lock(&u->reply_mutex);
|
|
+ while (list_empty(&u->read_buffers)) {
|
|
+ mutex_unlock(&u->reply_mutex);
|
|
+ if (filp->f_flags & O_NONBLOCK)
|
|
+ return -EAGAIN;
|
|
+
|
|
+ ret = wait_event_interruptible(u->read_waitq,
|
|
+ !list_empty(&u->read_buffers));
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ mutex_lock(&u->reply_mutex);
|
|
+ }
|
|
+
|
|
+ rb = list_entry(u->read_buffers.next, struct read_buffer, list);
|
|
+ for (i = 0; i < len;) {
|
|
+ put_user(rb->msg[rb->cons], ubuf + i);
|
|
+ i++;
|
|
+ rb->cons++;
|
|
+ if (rb->cons == rb->len) {
|
|
+ list_del(&rb->list);
|
|
+ kfree(rb);
|
|
+ if (list_empty(&u->read_buffers))
|
|
+ break;
|
|
+ rb = list_entry(u->read_buffers.next,
|
|
+ struct read_buffer, list);
|
|
+ }
|
|
+ }
|
|
+ mutex_unlock(&u->reply_mutex);
|
|
+
|
|
+ return i;
|
|
+}
|
|
+
|
|
+static void queue_reply(struct xenbus_dev_data *u,
|
|
+ char *data, unsigned int len)
|
|
+{
|
|
+ struct read_buffer *rb;
|
|
+
|
|
+ if (len == 0)
|
|
+ return;
|
|
+
|
|
+ rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL);
|
|
+ BUG_ON(rb == NULL);
|
|
+
|
|
+ rb->cons = 0;
|
|
+ rb->len = len;
|
|
+
|
|
+ memcpy(rb->msg, data, len);
|
|
+
|
|
+ list_add_tail(&rb->list, &u->read_buffers);
|
|
+
|
|
+ wake_up(&u->read_waitq);
|
|
+}
|
|
+
|
|
+struct watch_adapter
|
|
+{
|
|
+ struct list_head list;
|
|
+ struct xenbus_watch watch;
|
|
+ struct xenbus_dev_data *dev_data;
|
|
+ char *token;
|
|
+};
|
|
+
|
|
+static void free_watch_adapter (struct watch_adapter *watch)
|
|
+{
|
|
+ kfree(watch->watch.node);
|
|
+ kfree(watch->token);
|
|
+ kfree(watch);
|
|
+}
|
|
+
|
|
+static void watch_fired(struct xenbus_watch *watch,
|
|
+ const char **vec,
|
|
+ unsigned int len)
|
|
+{
|
|
+ struct watch_adapter *adap =
|
|
+ container_of(watch, struct watch_adapter, watch);
|
|
+ struct xsd_sockmsg hdr;
|
|
+ const char *path, *token;
|
|
+ int path_len, tok_len, body_len, data_len = 0;
|
|
+
|
|
+ path = vec[XS_WATCH_PATH];
|
|
+ token = adap->token;
|
|
+
|
|
+ path_len = strlen(path) + 1;
|
|
+ tok_len = strlen(token) + 1;
|
|
+ if (len > 2)
|
|
+ data_len = vec[len] - vec[2] + 1;
|
|
+ body_len = path_len + tok_len + data_len;
|
|
+
|
|
+ hdr.type = XS_WATCH_EVENT;
|
|
+ hdr.len = body_len;
|
|
+
|
|
+ mutex_lock(&adap->dev_data->reply_mutex);
|
|
+ queue_reply(adap->dev_data, (char *)&hdr, sizeof(hdr));
|
|
+ queue_reply(adap->dev_data, (char *)path, path_len);
|
|
+ queue_reply(adap->dev_data, (char *)token, tok_len);
|
|
+ if (len > 2)
|
|
+ queue_reply(adap->dev_data, (char *)vec[2], data_len);
|
|
+ mutex_unlock(&adap->dev_data->reply_mutex);
|
|
+}
|
|
+
|
|
+static LIST_HEAD(watch_list);
|
|
+
|
|
+static ssize_t xenbus_dev_write(struct file *filp,
|
|
+ const char __user *ubuf,
|
|
+ size_t len, loff_t *ppos)
|
|
+{
|
|
+ struct xenbus_dev_data *u = filp->private_data;
|
|
+ struct xenbus_dev_transaction *trans = NULL;
|
|
+ uint32_t msg_type;
|
|
+ void *reply;
|
|
+ char *path, *token;
|
|
+ struct watch_adapter *watch, *tmp_watch;
|
|
+ int err, rc = len;
|
|
+
|
|
+ if (!is_xenstored_ready())
|
|
+ return -ENODEV;
|
|
+
|
|
+ if ((len + u->len) > sizeof(u->u.buffer)) {
|
|
+ rc = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
|
|
+ rc = -EFAULT;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ u->len += len;
|
|
+ if ((u->len < sizeof(u->u.msg)) ||
|
|
+ (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
|
|
+ return rc;
|
|
+
|
|
+ msg_type = u->u.msg.type;
|
|
+
|
|
+ switch (msg_type) {
|
|
+ case XS_WATCH:
|
|
+ case XS_UNWATCH: {
|
|
+ static const char *XS_RESP = "OK";
|
|
+ struct xsd_sockmsg hdr;
|
|
+
|
|
+ path = u->u.buffer + sizeof(u->u.msg);
|
|
+ token = memchr(path, 0, u->u.msg.len);
|
|
+ if (token == NULL) {
|
|
+ rc = -EILSEQ;
|
|
+ goto out;
|
|
+ }
|
|
+ token++;
|
|
+
|
|
+ if (msg_type == XS_WATCH) {
|
|
+ watch = kzalloc(sizeof(*watch), GFP_KERNEL);
|
|
+ watch->watch.node = kmalloc(strlen(path)+1,
|
|
+ GFP_KERNEL);
|
|
+ strcpy((char *)watch->watch.node, path);
|
|
+ watch->watch.callback = watch_fired;
|
|
+ watch->token = kmalloc(strlen(token)+1, GFP_KERNEL);
|
|
+ strcpy(watch->token, token);
|
|
+ watch->dev_data = u;
|
|
+
|
|
+ err = register_xenbus_watch(&watch->watch);
|
|
+ if (err) {
|
|
+ free_watch_adapter(watch);
|
|
+ rc = err;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ list_add(&watch->list, &u->watches);
|
|
+ } else {
|
|
+ list_for_each_entry_safe(watch, tmp_watch,
|
|
+ &u->watches, list) {
|
|
+ if (!strcmp(watch->token, token) &&
|
|
+ !strcmp(watch->watch.node, path))
|
|
+ {
|
|
+ unregister_xenbus_watch(&watch->watch);
|
|
+ list_del(&watch->list);
|
|
+ free_watch_adapter(watch);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ hdr.type = msg_type;
|
|
+ hdr.len = strlen(XS_RESP) + 1;
|
|
+ mutex_lock(&u->reply_mutex);
|
|
+ queue_reply(u, (char *)&hdr, sizeof(hdr));
|
|
+ queue_reply(u, (char *)XS_RESP, hdr.len);
|
|
+ mutex_unlock(&u->reply_mutex);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ default:
|
|
+ if (msg_type == XS_TRANSACTION_START) {
|
|
+ trans = kmalloc(sizeof(*trans), GFP_KERNEL);
|
|
+ if (!trans) {
|
|
+ rc = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ reply = xenbus_dev_request_and_reply(&u->u.msg);
|
|
+ if (IS_ERR(reply)) {
|
|
+ kfree(trans);
|
|
+ rc = PTR_ERR(reply);
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ if (msg_type == XS_TRANSACTION_START) {
|
|
+ trans->handle.id = simple_strtoul(reply, NULL, 0);
|
|
+ list_add(&trans->list, &u->transactions);
|
|
+ } else if (msg_type == XS_TRANSACTION_END) {
|
|
+ list_for_each_entry(trans, &u->transactions, list)
|
|
+ if (trans->handle.id == u->u.msg.tx_id)
|
|
+ break;
|
|
+ BUG_ON(&trans->list == &u->transactions);
|
|
+ list_del(&trans->list);
|
|
+ kfree(trans);
|
|
+ }
|
|
+ mutex_lock(&u->reply_mutex);
|
|
+ queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
|
|
+ queue_reply(u, (char *)reply, u->u.msg.len);
|
|
+ mutex_unlock(&u->reply_mutex);
|
|
+ kfree(reply);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ out:
|
|
+ u->len = 0;
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static int xenbus_dev_open(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct xenbus_dev_data *u;
|
|
+
|
|
+ if (xen_store_evtchn == 0)
|
|
+ return -ENOENT;
|
|
+
|
|
+ nonseekable_open(inode, filp);
|
|
+
|
|
+ u = kzalloc(sizeof(*u), GFP_KERNEL);
|
|
+ if (u == NULL)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ INIT_LIST_HEAD(&u->transactions);
|
|
+ INIT_LIST_HEAD(&u->watches);
|
|
+ INIT_LIST_HEAD(&u->read_buffers);
|
|
+ init_waitqueue_head(&u->read_waitq);
|
|
+
|
|
+ mutex_init(&u->reply_mutex);
|
|
+
|
|
+ filp->private_data = u;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int xenbus_dev_release(struct inode *inode, struct file *filp)
|
|
+{
|
|
+ struct xenbus_dev_data *u = filp->private_data;
|
|
+ struct xenbus_dev_transaction *trans, *tmp;
|
|
+ struct watch_adapter *watch, *tmp_watch;
|
|
+ struct read_buffer *rb, *tmp_rb;
|
|
+
|
|
+ list_for_each_entry_safe(trans, tmp, &u->transactions, list) {
|
|
+ xenbus_transaction_end(trans->handle, 1);
|
|
+ list_del(&trans->list);
|
|
+ kfree(trans);
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) {
|
|
+ unregister_xenbus_watch(&watch->watch);
|
|
+ list_del(&watch->list);
|
|
+ free_watch_adapter(watch);
|
|
+ }
|
|
+
|
|
+ list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) {
|
|
+ list_del(&rb->list);
|
|
+ kfree(rb);
|
|
+ }
|
|
+ kfree(u);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static unsigned int xenbus_dev_poll(struct file *file, poll_table *wait)
|
|
+{
|
|
+ struct xenbus_dev_data *u = file->private_data;
|
|
+
|
|
+ if (!is_xenstored_ready())
|
|
+ return -ENODEV;
|
|
+
|
|
+ poll_wait(file, &u->read_waitq, wait);
|
|
+ if (!list_empty(&u->read_buffers))
|
|
+ return POLLIN | POLLRDNORM;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef HAVE_UNLOCKED_IOCTL
|
|
+static long xenbus_dev_ioctl(struct file *file,
|
|
+ unsigned int cmd, unsigned long data)
|
|
+{
|
|
+ extern int xenbus_conn(domid_t remote_dom, int *grant_ref,
|
|
+ evtchn_port_t *local_port);
|
|
+ void __user *udata = (void __user *) data;
|
|
+ int ret = -ENOTTY;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return -ENODEV;
|
|
+
|
|
+
|
|
+ switch (cmd) {
|
|
+ case IOCTL_XENBUS_ALLOC: {
|
|
+ xenbus_alloc_t xa;
|
|
+ int old;
|
|
+
|
|
+ old = atomic_cmpxchg(&xenbus_xsd_state,
|
|
+ XENBUS_XSD_UNCOMMITTED,
|
|
+ XENBUS_XSD_FOREIGN_INIT);
|
|
+ if (old != XENBUS_XSD_UNCOMMITTED)
|
|
+ return -EBUSY;
|
|
+
|
|
+ if (copy_from_user(&xa, udata, sizeof(xa))) {
|
|
+ ret = -EFAULT;
|
|
+ atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ ret = xenbus_conn(xa.dom, &xa.grant_ref, &xa.port);
|
|
+ if (ret != 0) {
|
|
+ atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (copy_to_user(udata, &xa, sizeof(xa))) {
|
|
+ ret = -EFAULT;
|
|
+ atomic_set(&xenbus_xsd_state, XENBUS_XSD_UNCOMMITTED);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ break;
|
|
+
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static const struct file_operations xenbus_dev_file_ops = {
|
|
+ .read = xenbus_dev_read,
|
|
+ .write = xenbus_dev_write,
|
|
+ .open = xenbus_dev_open,
|
|
+ .release = xenbus_dev_release,
|
|
+ .poll = xenbus_dev_poll,
|
|
+#ifdef HAVE_UNLOCKED_IOCTL
|
|
+ .unlocked_ioctl = xenbus_dev_ioctl
|
|
+#endif
|
|
+};
|
|
+
|
|
+int xenbus_dev_init(void)
|
|
+{
|
|
+ xenbus_dev_intf = create_xen_proc_entry("xenbus", 0400);
|
|
+ if (xenbus_dev_intf)
|
|
+ xenbus_dev_intf->proc_fops = &xenbus_dev_file_ops;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ b/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:38:29.000000000 +0100
|
|
@@ -0,0 +1,587 @@
|
|
+/**
|
|
+ * @file xenoprofile.c
|
|
+ *
|
|
+ * @remark Copyright 2002 OProfile authors
|
|
+ * @remark Read the file COPYING
|
|
+ *
|
|
+ * @author John Levon <levon@movementarian.org>
|
|
+ *
|
|
+ * Modified by Aravind Menon and Jose Renato Santos for Xen
|
|
+ * These modifications are:
|
|
+ * Copyright (C) 2005 Hewlett-Packard Co.
|
|
+ *
|
|
+ * Separated out arch-generic part
|
|
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
|
|
+ * VA Linux Systems Japan K.K.
|
|
+ */
|
|
+
|
|
+#include <linux/init.h>
|
|
+#include <linux/notifier.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/oprofile.h>
|
|
+#include <linux/sysdev.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <xen/evtchn.h>
|
|
+#include <xen/xenoprof.h>
|
|
+#include <xen/driver_util.h>
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/xenoprof.h>
|
|
+#include "../../../drivers/oprofile/cpu_buffer.h"
|
|
+#include "../../../drivers/oprofile/event_buffer.h"
|
|
+
|
|
+#define MAX_XENOPROF_SAMPLES 16
|
|
+
|
|
+/* sample buffers shared with Xen */
|
|
+static xenoprof_buf_t **__read_mostly xenoprof_buf;
|
|
+/* Shared buffer area */
|
|
+static struct xenoprof_shared_buffer shared_buffer;
|
|
+
|
|
+/* Passive sample buffers shared with Xen */
|
|
+static xenoprof_buf_t **__read_mostly p_xenoprof_buf[MAX_OPROF_DOMAINS];
|
|
+/* Passive shared buffer area */
|
|
+static struct xenoprof_shared_buffer p_shared_buffer[MAX_OPROF_DOMAINS];
|
|
+
|
|
+static int xenoprof_start(void);
|
|
+static void xenoprof_stop(void);
|
|
+
|
|
+static int xenoprof_enabled = 0;
|
|
+static int xenoprof_is_primary = 0;
|
|
+static int active_defined;
|
|
+
|
|
+extern unsigned long backtrace_depth;
|
|
+
|
|
+/* Number of buffers in shared area (one per VCPU) */
|
|
+static int nbuf;
|
|
+/* Mappings of VIRQ_XENOPROF to irq number (per cpu) */
|
|
+static int ovf_irq[NR_CPUS];
|
|
+/* cpu model type string - copied from Xen on XENOPROF_init command */
|
|
+static char cpu_type[XENOPROF_CPU_TYPE_SIZE];
|
|
+
|
|
+#ifdef CONFIG_PM
|
|
+
|
|
+static int xenoprof_suspend(struct sys_device * dev, pm_message_t state)
|
|
+{
|
|
+ if (xenoprof_enabled == 1)
|
|
+ xenoprof_stop();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int xenoprof_resume(struct sys_device * dev)
|
|
+{
|
|
+ if (xenoprof_enabled == 1)
|
|
+ xenoprof_start();
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static struct sysdev_class oprofile_sysclass = {
|
|
+ set_kset_name("oprofile"),
|
|
+ .resume = xenoprof_resume,
|
|
+ .suspend = xenoprof_suspend
|
|
+};
|
|
+
|
|
+
|
|
+static struct sys_device device_oprofile = {
|
|
+ .id = 0,
|
|
+ .cls = &oprofile_sysclass,
|
|
+};
|
|
+
|
|
+
|
|
+static int __init init_driverfs(void)
|
|
+{
|
|
+ int error;
|
|
+ if (!(error = sysdev_class_register(&oprofile_sysclass)))
|
|
+ error = sysdev_register(&device_oprofile);
|
|
+ return error;
|
|
+}
|
|
+
|
|
+
|
|
+static void exit_driverfs(void)
|
|
+{
|
|
+ sysdev_unregister(&device_oprofile);
|
|
+ sysdev_class_unregister(&oprofile_sysclass);
|
|
+}
|
|
+
|
|
+#else
|
|
+#define init_driverfs() do { } while (0)
|
|
+#define exit_driverfs() do { } while (0)
|
|
+#endif /* CONFIG_PM */
|
|
+
|
|
+static unsigned long long oprofile_samples;
|
|
+static unsigned long long p_oprofile_samples;
|
|
+
|
|
+static unsigned int pdomains;
|
|
+static struct xenoprof_passive passive_domains[MAX_OPROF_DOMAINS];
|
|
+
|
|
+/* Check whether the given entry is an escape code */
|
|
+static int xenoprof_is_escape(xenoprof_buf_t * buf, int tail)
|
|
+{
|
|
+ return (buf->event_log[tail].eip == XENOPROF_ESCAPE_CODE);
|
|
+}
|
|
+
|
|
+/* Get the event at the given entry */
|
|
+static uint8_t xenoprof_get_event(xenoprof_buf_t * buf, int tail)
|
|
+{
|
|
+ return (buf->event_log[tail].event);
|
|
+}
|
|
+
|
|
+static void xenoprof_add_pc(xenoprof_buf_t *buf, int is_passive)
|
|
+{
|
|
+ int head, tail, size;
|
|
+ int tracing = 0;
|
|
+
|
|
+ head = buf->event_head;
|
|
+ tail = buf->event_tail;
|
|
+ size = buf->event_size;
|
|
+
|
|
+ while (tail != head) {
|
|
+ if (xenoprof_is_escape(buf, tail) &&
|
|
+ xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) {
|
|
+ tracing=1;
|
|
+ oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode,
|
|
+ CPU_TRACE_BEGIN);
|
|
+ if (!is_passive)
|
|
+ oprofile_samples++;
|
|
+ else
|
|
+ p_oprofile_samples++;
|
|
+
|
|
+ } else {
|
|
+ oprofile_add_pc(buf->event_log[tail].eip,
|
|
+ buf->event_log[tail].mode,
|
|
+ buf->event_log[tail].event);
|
|
+ if (!tracing) {
|
|
+ if (!is_passive)
|
|
+ oprofile_samples++;
|
|
+ else
|
|
+ p_oprofile_samples++;
|
|
+ }
|
|
+
|
|
+ }
|
|
+ tail++;
|
|
+ if(tail==size)
|
|
+ tail=0;
|
|
+ }
|
|
+ buf->event_tail = tail;
|
|
+}
|
|
+
|
|
+static void xenoprof_handle_passive(void)
|
|
+{
|
|
+ int i, j;
|
|
+ int flag_domain, flag_switch = 0;
|
|
+
|
|
+ for (i = 0; i < pdomains; i++) {
|
|
+ flag_domain = 0;
|
|
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
|
|
+ xenoprof_buf_t *buf = p_xenoprof_buf[i][j];
|
|
+ if (buf->event_head == buf->event_tail)
|
|
+ continue;
|
|
+ if (!flag_domain) {
|
|
+ if (!oprofile_add_domain_switch(
|
|
+ passive_domains[i].domain_id))
|
|
+ goto done;
|
|
+ flag_domain = 1;
|
|
+ }
|
|
+ xenoprof_add_pc(buf, 1);
|
|
+ flag_switch = 1;
|
|
+ }
|
|
+ }
|
|
+done:
|
|
+ if (flag_switch)
|
|
+ oprofile_add_domain_switch(COORDINATOR_DOMAIN);
|
|
+}
|
|
+
|
|
+static irqreturn_t
|
|
+xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
|
|
+{
|
|
+ struct xenoprof_buf * buf;
|
|
+ static unsigned long flag;
|
|
+
|
|
+ buf = xenoprof_buf[smp_processor_id()];
|
|
+
|
|
+ xenoprof_add_pc(buf, 0);
|
|
+
|
|
+ if (xenoprof_is_primary && !test_and_set_bit(0, &flag)) {
|
|
+ xenoprof_handle_passive();
|
|
+ smp_mb__before_clear_bit();
|
|
+ clear_bit(0, &flag);
|
|
+ }
|
|
+
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+
|
|
+static void unbind_virq(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for_each_online_cpu(i) {
|
|
+ if (ovf_irq[i] >= 0) {
|
|
+ unbind_from_irqhandler(ovf_irq[i], NULL);
|
|
+ ovf_irq[i] = -1;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+
|
|
+static int bind_virq(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+ int result;
|
|
+
|
|
+ for_each_online_cpu(i) {
|
|
+ result = bind_virq_to_irqhandler(VIRQ_XENOPROF,
|
|
+ i,
|
|
+ xenoprof_ovf_interrupt,
|
|
+ SA_INTERRUPT,
|
|
+ "xenoprof",
|
|
+ NULL);
|
|
+
|
|
+ if (result < 0) {
|
|
+ unbind_virq();
|
|
+ return result;
|
|
+ }
|
|
+
|
|
+ ovf_irq[i] = result;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static xenoprof_buf_t **get_buffer_array(unsigned int nbuf)
|
|
+{
|
|
+ size_t size = nbuf * sizeof(xenoprof_buf_t);
|
|
+
|
|
+ if (size <= PAGE_SIZE)
|
|
+ return kmalloc(size, GFP_KERNEL);
|
|
+ return vmalloc(size);
|
|
+}
|
|
+
|
|
+static void release_buffer_array(xenoprof_buf_t **buf, unsigned int nbuf)
|
|
+{
|
|
+ if (nbuf * sizeof(xenoprof_buf_t) <= PAGE_SIZE)
|
|
+ kfree(buf);
|
|
+ else
|
|
+ vfree(buf);
|
|
+}
|
|
+
|
|
+
|
|
+static void unmap_passive_list(void)
|
|
+{
|
|
+ int i;
|
|
+ for (i = 0; i < pdomains; i++) {
|
|
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
|
|
+ release_buffer_array(p_xenoprof_buf[i],
|
|
+ passive_domains[i].nbuf);
|
|
+ }
|
|
+ pdomains = 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int map_xenoprof_buffer(int max_samples)
|
|
+{
|
|
+ struct xenoprof_get_buffer get_buffer;
|
|
+ struct xenoprof_buf *buf;
|
|
+ int ret, i;
|
|
+
|
|
+ if ( shared_buffer.buffer )
|
|
+ return 0;
|
|
+
|
|
+ get_buffer.max_samples = max_samples;
|
|
+ ret = xenoprof_arch_map_shared_buffer(&get_buffer, &shared_buffer);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ nbuf = get_buffer.nbuf;
|
|
+
|
|
+ xenoprof_buf = get_buffer_array(nbuf);
|
|
+ if (!xenoprof_buf) {
|
|
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ for (i=0; i< nbuf; i++) {
|
|
+ buf = (struct xenoprof_buf*)
|
|
+ &shared_buffer.buffer[i * get_buffer.bufsize];
|
|
+ BUG_ON(buf->vcpu_id >= nbuf);
|
|
+ xenoprof_buf[buf->vcpu_id] = buf;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+static int xenoprof_setup(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ if ( (ret = map_xenoprof_buffer(MAX_XENOPROF_SAMPLES)) )
|
|
+ return ret;
|
|
+
|
|
+ if ( (ret = bind_virq()) ) {
|
|
+ release_buffer_array(xenoprof_buf, nbuf);
|
|
+ return ret;
|
|
+ }
|
|
+
|
|
+ if (xenoprof_is_primary) {
|
|
+ /* Define dom0 as an active domain if not done yet */
|
|
+ if (!active_defined) {
|
|
+ domid_t domid;
|
|
+ ret = HYPERVISOR_xenoprof_op(
|
|
+ XENOPROF_reset_active_list, NULL);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+ domid = 0;
|
|
+ ret = HYPERVISOR_xenoprof_op(
|
|
+ XENOPROF_set_active, &domid);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+ active_defined = 1;
|
|
+ }
|
|
+
|
|
+ if (backtrace_depth > 0) {
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace,
|
|
+ &backtrace_depth);
|
|
+ if (ret)
|
|
+ backtrace_depth = 0;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ xenoprof_arch_counter();
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_setup_events, NULL);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+ }
|
|
+
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_enable_virq, NULL);
|
|
+ if (ret)
|
|
+ goto err;
|
|
+
|
|
+ xenoprof_enabled = 1;
|
|
+ return 0;
|
|
+ err:
|
|
+ unbind_virq();
|
|
+ release_buffer_array(xenoprof_buf, nbuf);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+
|
|
+static void xenoprof_shutdown(void)
|
|
+{
|
|
+ xenoprof_enabled = 0;
|
|
+
|
|
+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_disable_virq, NULL));
|
|
+
|
|
+ if (xenoprof_is_primary) {
|
|
+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_release_counters,
|
|
+ NULL));
|
|
+ active_defined = 0;
|
|
+ }
|
|
+
|
|
+ unbind_virq();
|
|
+
|
|
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
|
|
+ if (xenoprof_is_primary)
|
|
+ unmap_passive_list();
|
|
+ release_buffer_array(xenoprof_buf, nbuf);
|
|
+}
|
|
+
|
|
+
|
|
+static int xenoprof_start(void)
|
|
+{
|
|
+ int ret = 0;
|
|
+
|
|
+ if (xenoprof_is_primary)
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_start, NULL);
|
|
+ if (!ret)
|
|
+ xenoprof_arch_start();
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+
|
|
+static void xenoprof_stop(void)
|
|
+{
|
|
+ if (xenoprof_is_primary)
|
|
+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_stop, NULL));
|
|
+ xenoprof_arch_stop();
|
|
+}
|
|
+
|
|
+
|
|
+static int xenoprof_set_active(int * active_domains,
|
|
+ unsigned int adomains)
|
|
+{
|
|
+ int ret = 0;
|
|
+ int i;
|
|
+ int set_dom0 = 0;
|
|
+ domid_t domid;
|
|
+
|
|
+ if (!xenoprof_is_primary)
|
|
+ return 0;
|
|
+
|
|
+ if (adomains > MAX_OPROF_DOMAINS)
|
|
+ return -E2BIG;
|
|
+
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list, NULL);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ for (i=0; i<adomains; i++) {
|
|
+ domid = active_domains[i];
|
|
+ if (domid != active_domains[i]) {
|
|
+ ret = -EINVAL;
|
|
+ goto out;
|
|
+ }
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+ if (active_domains[i] == 0)
|
|
+ set_dom0 = 1;
|
|
+ }
|
|
+ /* dom0 must always be active but may not be in the list */
|
|
+ if (!set_dom0) {
|
|
+ domid = 0;
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_set_active, &domid);
|
|
+ }
|
|
+
|
|
+out:
|
|
+ if (ret)
|
|
+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_reset_active_list,
|
|
+ NULL));
|
|
+ active_defined = !ret;
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int xenoprof_set_passive(int * p_domains,
|
|
+ unsigned int pdoms)
|
|
+{
|
|
+ int ret;
|
|
+ unsigned int i, j;
|
|
+ struct xenoprof_buf *buf;
|
|
+
|
|
+ if (!xenoprof_is_primary)
|
|
+ return 0;
|
|
+
|
|
+ if (pdoms > MAX_OPROF_DOMAINS)
|
|
+ return -E2BIG;
|
|
+
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_reset_passive_list, NULL);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ unmap_passive_list();
|
|
+
|
|
+ for (i = 0; i < pdoms; i++) {
|
|
+ passive_domains[i].domain_id = p_domains[i];
|
|
+ passive_domains[i].max_samples = 2048;
|
|
+ ret = xenoprof_arch_set_passive(&passive_domains[i],
|
|
+ &p_shared_buffer[i]);
|
|
+ if (ret)
|
|
+ goto out;
|
|
+
|
|
+ p_xenoprof_buf[i] = get_buffer_array(passive_domains[i].nbuf);
|
|
+ if (!p_xenoprof_buf[i]) {
|
|
+ ++i;
|
|
+ ret = -ENOMEM;
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ for (j = 0; j < passive_domains[i].nbuf; j++) {
|
|
+ buf = (struct xenoprof_buf *)
|
|
+ &p_shared_buffer[i].buffer[
|
|
+ j * passive_domains[i].bufsize];
|
|
+ BUG_ON(buf->vcpu_id >= passive_domains[i].nbuf);
|
|
+ p_xenoprof_buf[i][buf->vcpu_id] = buf;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ pdomains = pdoms;
|
|
+ return 0;
|
|
+
|
|
+out:
|
|
+ for (j = 0; j < i; j++) {
|
|
+ xenoprof_arch_unmap_shared_buffer(&p_shared_buffer[i]);
|
|
+ release_buffer_array(p_xenoprof_buf[i],
|
|
+ passive_domains[i].nbuf);
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+
|
|
+/* The dummy backtrace function to keep oprofile happy
|
|
+ * The real backtrace is done in xen
|
|
+ */
|
|
+static void xenoprof_dummy_backtrace(struct pt_regs * const regs,
|
|
+ unsigned int depth)
|
|
+{
|
|
+ /* this should never be called */
|
|
+ BUG();
|
|
+ return;
|
|
+}
|
|
+
|
|
+
|
|
+static struct oprofile_operations xenoprof_ops = {
|
|
+#ifdef HAVE_XENOPROF_CREATE_FILES
|
|
+ .create_files = xenoprof_create_files,
|
|
+#endif
|
|
+ .set_active = xenoprof_set_active,
|
|
+ .set_passive = xenoprof_set_passive,
|
|
+ .setup = xenoprof_setup,
|
|
+ .shutdown = xenoprof_shutdown,
|
|
+ .start = xenoprof_start,
|
|
+ .stop = xenoprof_stop,
|
|
+ .backtrace = xenoprof_dummy_backtrace
|
|
+};
|
|
+
|
|
+
|
|
+/* in order to get driverfs right */
|
|
+static int using_xenoprof;
|
|
+
|
|
+int __init xenoprofile_init(struct oprofile_operations * ops)
|
|
+{
|
|
+ struct xenoprof_init init;
|
|
+ unsigned int i;
|
|
+ int ret;
|
|
+
|
|
+ ret = HYPERVISOR_xenoprof_op(XENOPROF_init, &init);
|
|
+ if (!ret) {
|
|
+ xenoprof_arch_init_counter(&init);
|
|
+ xenoprof_is_primary = init.is_primary;
|
|
+
|
|
+ /* cpu_type is detected by Xen */
|
|
+ cpu_type[XENOPROF_CPU_TYPE_SIZE-1] = 0;
|
|
+ strncpy(cpu_type, init.cpu_type, XENOPROF_CPU_TYPE_SIZE - 1);
|
|
+ xenoprof_ops.cpu_type = cpu_type;
|
|
+
|
|
+ init_driverfs();
|
|
+ using_xenoprof = 1;
|
|
+ *ops = xenoprof_ops;
|
|
+
|
|
+ for (i=0; i<NR_CPUS; i++)
|
|
+ ovf_irq[i] = -1;
|
|
+
|
|
+ active_defined = 0;
|
|
+ }
|
|
+
|
|
+ printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n",
|
|
+ __func__, ret, init.num_events, xenoprof_is_primary);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+
|
|
+void xenoprofile_exit(void)
|
|
+{
|
|
+ if (using_xenoprof)
|
|
+ exit_driverfs();
|
|
+
|
|
+ xenoprof_arch_unmap_shared_buffer(&shared_buffer);
|
|
+ if (xenoprof_is_primary) {
|
|
+ unmap_passive_list();
|
|
+ WARN_ON(HYPERVISOR_xenoprof_op(XENOPROF_shutdown, NULL));
|
|
+ }
|
|
+}
|