aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/aoe/mkdevs.sh1
-rw-r--r--Documentation/aoe/mkshelf.sh1
-rw-r--r--Documentation/aoe/todo.txt14
-rw-r--r--Documentation/aoe/udev-install.sh6
-rw-r--r--drivers/block/aoe/aoe.h23
-rw-r--r--drivers/block/aoe/aoeblk.c5
-rw-r--r--drivers/block/aoe/aoecmd.c108
-rw-r--r--drivers/block/aoe/aoedev.c8
-rw-r--r--drivers/block/aoe/aoenet.c8
9 files changed, 110 insertions, 64 deletions
diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh
index 6ce7070..ec5a6de 100644
--- a/Documentation/aoe/mkdevs.sh
+++ b/Documentation/aoe/mkdevs.sh
@@ -5,6 +5,7 @@ n_partitions=${n_partitions:-16}
if test "$#" != "1"; then
echo "Usage: sh `basename $0` {dir}" 1>&2
+ echo " n_partitions=16 sh `basename $0` {dir}" 1>&2
exit 1
fi
dir=$1
diff --git a/Documentation/aoe/mkshelf.sh b/Documentation/aoe/mkshelf.sh
index 4093283..8bacf9f 100644
--- a/Documentation/aoe/mkshelf.sh
+++ b/Documentation/aoe/mkshelf.sh
@@ -2,6 +2,7 @@
if test "$#" != "2"; then
echo "Usage: sh `basename $0` {dir} {shelfaddress}" 1>&2
+ echo " n_partitions=16 sh `basename $0` {dir} {shelfaddress}" 1>&2
exit 1
fi
n_partitions=${n_partitions:-16}
diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt
new file mode 100644
index 0000000..7fee1e1
--- /dev/null
+++ b/Documentation/aoe/todo.txt
@@ -0,0 +1,14 @@
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage. If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation. This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructore member of the struct sk_buff is available to the aoe
+driver. By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
index 861a27f..6449911 100644
--- a/Documentation/aoe/udev-install.sh
+++ b/Documentation/aoe/udev-install.sh
@@ -23,4 +23,8 @@ fi
# /etc/udev/rules.d
#
rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
-test "$rules_d" && sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
+if test -z "$rules_d" || test ! -d "$rules_d"; then
+ echo "$me Error: cannot find udev rules directory" 1>&2
+ exit 1
+fi
+sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index db78f82..aa8b547 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -1,10 +1,15 @@
/* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */
-#define VERSION "5"
+#define VERSION "6"
#define AOE_MAJOR 152
#define DEVICE_NAME "aoe"
+
+/* set AOE_PARTITIONS to 1 to use whole-disks only
+ * default is 16, which is 15 partitions plus the whole disk
+ */
#ifndef AOE_PARTITIONS
#define AOE_PARTITIONS 16
#endif
+
#define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * 10 + (aoeminor))
#define AOEMAJOR(sysminor) ((sysminor) / 10)
#define AOEMINOR(sysminor) ((sysminor) % 10)
@@ -34,13 +39,13 @@ enum {
struct aoe_hdr {
unsigned char dst[6];
unsigned char src[6];
- unsigned char type[2];
+ __be16 type;
unsigned char verfl;
unsigned char err;
- unsigned char major[2];
+ __be16 major;
unsigned char minor;
unsigned char cmd;
- unsigned char tag[4];
+ __be32 tag;
};
struct aoe_atahdr {
@@ -58,8 +63,8 @@ struct aoe_atahdr {
};
struct aoe_cfghdr {
- unsigned char bufcnt[2];
- unsigned char fwver[2];
+ __be16 bufcnt;
+ __be16 fwver;
unsigned char res;
unsigned char aoeccmd;
unsigned char cslen[2];
@@ -85,6 +90,7 @@ enum {
struct buf {
struct list_head bufs;
+ ulong start_time; /* for disk stats */
ulong flags;
ulong nframesout;
char *bufaddr;
@@ -125,7 +131,8 @@ struct aoedev {
struct timer_list timer;
spinlock_t lock;
struct net_device *ifp; /* interface ed is attached to */
- struct sk_buff *skblist;/* packets needing to be sent */
+ struct sk_buff *sendq_hd; /* packets needing to be sent, list head */
+ struct sk_buff *sendq_tl;
mempool_t *bufpool; /* for deadlock-free Buf allocation */
struct list_head bufq; /* queue of bios to work on */
struct buf *inprocess; /* the one we're currently working on */
@@ -151,7 +158,7 @@ void aoecmd_cfg_rsp(struct sk_buff *);
int aoedev_init(void);
void aoedev_exit(void);
-struct aoedev *aoedev_bymac(unsigned char *);
+struct aoedev *aoedev_by_aoeaddr(int maj, int min);
void aoedev_downdev(struct aoedev *d);
struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong);
int aoedev_busy(void);
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 63561b2..4780f79 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -125,6 +125,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
}
memset(buf, 0, sizeof(*buf));
INIT_LIST_HEAD(&buf->bufs);
+ buf->start_time = jiffies;
buf->bio = bio;
buf->resid = bio->bi_size;
buf->sector = bio->bi_sector;
@@ -146,8 +147,8 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
list_add_tail(&buf->bufs, &d->bufq);
aoecmd_work(d);
- sl = d->skblist;
- d->skblist = NULL;
+ sl = d->sendq_hd;
+ d->sendq_hd = d->sendq_tl = NULL;
spin_unlock_irqrestore(&d->lock, flags);
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index fb6d942..b5be4b7 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -90,19 +90,16 @@ newtag(struct aoedev *d)
static int
aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
{
- u16 type = __constant_cpu_to_be16(ETH_P_AOE);
- u16 aoemajor = __cpu_to_be16(d->aoemajor);
u32 host_tag = newtag(d);
- u32 tag = __cpu_to_be32(host_tag);
memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
memcpy(h->dst, d->addr, sizeof h->dst);
- memcpy(h->type, &type, sizeof type);
+ h->type = __constant_cpu_to_be16(ETH_P_AOE);
h->verfl = AOE_HVER;
- memcpy(h->major, &aoemajor, sizeof aoemajor);
+ h->major = cpu_to_be16(d->aoemajor);
h->minor = d->aoeminor;
h->cmd = AOECMD_ATA;
- memcpy(h->tag, &tag, sizeof tag);
+ h->tag = cpu_to_be32(host_tag);
return host_tag;
}
@@ -181,8 +178,12 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
skb = skb_prepare(d, f);
if (skb) {
- skb->next = d->skblist;
- d->skblist = skb;
+ skb->next = NULL;
+ if (d->sendq_hd)
+ d->sendq_tl->next = skb;
+ else
+ d->sendq_hd = skb;
+ d->sendq_tl = skb;
}
}
@@ -215,7 +216,6 @@ rexmit(struct aoedev *d, struct frame *f)
struct aoe_hdr *h;
char buf[128];
u32 n;
- u32 net_tag;
n = newtag(d);
@@ -227,13 +227,16 @@ rexmit(struct aoedev *d, struct frame *f)
h = (struct aoe_hdr *) f->data;
f->tag = n;
- net_tag = __cpu_to_be32(n);
- memcpy(h->tag, &net_tag, sizeof net_tag);
+ h->tag = cpu_to_be32(n);
skb = skb_prepare(d, f);
if (skb) {
- skb->next = d->skblist;
- d->skblist = skb;
+ skb->next = NULL;
+ if (d->sendq_hd)
+ d->sendq_tl->next = skb;
+ else
+ d->sendq_hd = skb;
+ d->sendq_tl = skb;
}
}
@@ -285,8 +288,8 @@ tdie: spin_unlock_irqrestore(&d->lock, flags);
}
}
- sl = d->skblist;
- d->skblist = NULL;
+ sl = d->sendq_hd;
+ d->sendq_hd = d->sendq_tl = NULL;
if (sl) {
n = d->rttavg <<= 1;
if (n > MAXTIMER)
@@ -308,16 +311,16 @@ ataid_complete(struct aoedev *d, unsigned char *id)
u16 n;
/* word 83: command set supported */
- n = __le16_to_cpu(*((u16 *) &id[83<<1]));
+ n = le16_to_cpup((__le16 *) &id[83<<1]);
/* word 86: command set/feature enabled */
- n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
+ n |= le16_to_cpup((__le16 *) &id[86<<1]);
if (n & (1<<10)) { /* bit 10: LBA 48 */
d->flags |= DEVFL_EXT;
/* word 100: number lba48 sectors */
- ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
+ ssize = le64_to_cpup((__le64 *) &id[100<<1]);
/* set as in ide-disk.c:init_idedisk_capacity */
d->geo.cylinders = ssize;
@@ -328,12 +331,12 @@ ataid_complete(struct aoedev *d, unsigned char *id)
d->flags &= ~DEVFL_EXT;
/* number lba28 sectors */
- ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
+ ssize = le32_to_cpup((__le32 *) &id[60<<1]);
/* NOTE: obsolete in ATA 6 */
- d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
- d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
- d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
+ d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]);
+ d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]);
+ d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]);
}
d->ssize = ssize;
d->geo.start = 0;
@@ -380,29 +383,30 @@ aoecmd_ata_rsp(struct sk_buff *skb)
register long n;
ulong flags;
char ebuf[128];
-
+ u16 aoemajor;
+
hin = (struct aoe_hdr *) skb->mac.raw;
- d = aoedev_bymac(hin->src);
+ aoemajor = be16_to_cpu(hin->major);
+ d = aoedev_by_aoeaddr(aoemajor, hin->minor);
if (d == NULL) {
snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
"for unknown device %d.%d\n",
- __be16_to_cpu(*((u16 *) hin->major)),
- hin->minor);
+ aoemajor, hin->minor);
aoechr_error(ebuf);
return;
}
spin_lock_irqsave(&d->lock, flags);
- f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
+ f = getframe(d, be32_to_cpu(hin->tag));
if (f == NULL) {
spin_unlock_irqrestore(&d->lock, flags);
snprintf(ebuf, sizeof ebuf,
"%15s e%d.%d tag=%08x@%08lx\n",
"unexpected rsp",
- __be16_to_cpu(*((u16 *) hin->major)),
+ be16_to_cpu(hin->major),
hin->minor,
- __be32_to_cpu(*((u32 *) hin->tag)),
+ be32_to_cpu(hin->tag),
jiffies);
aoechr_error(ebuf);
return;
@@ -452,7 +456,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
"outbound ata command %2.2Xh for %d.%d\n",
ahout->cmdstat,
- __be16_to_cpu(*((u16 *) hin->major)),
+ be16_to_cpu(hin->major),
hin->minor);
}
}
@@ -460,6 +464,20 @@ aoecmd_ata_rsp(struct sk_buff *skb)
if (buf) {
buf->nframesout -= 1;
if (buf->nframesout == 0 && buf->resid == 0) {
+ unsigned long duration = jiffies - buf->start_time;
+ unsigned long n_sect = buf->bio->bi_size >> 9;
+ struct gendisk *disk = d->gd;
+
+ if (bio_data_dir(buf->bio) == WRITE) {
+ disk_stat_inc(disk, writes);
+ disk_stat_add(disk, write_ticks, duration);
+ disk_stat_add(disk, write_sectors, n_sect);
+ } else {
+ disk_stat_inc(disk, reads);
+ disk_stat_add(disk, read_ticks, duration);
+ disk_stat_add(disk, read_sectors, n_sect);
+ }
+ disk_stat_add(disk, io_ticks, duration);
n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
bio_endio(buf->bio, buf->bio->bi_size, n);
mempool_free(buf, d->bufpool);
@@ -471,8 +489,8 @@ aoecmd_ata_rsp(struct sk_buff *skb)
aoecmd_work(d);
- sl = d->skblist;
- d->skblist = NULL;
+ sl = d->sendq_hd;
+ d->sendq_hd = d->sendq_tl = NULL;
spin_unlock_irqrestore(&d->lock, flags);
@@ -486,8 +504,6 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
struct aoe_cfghdr *ch;
struct sk_buff *skb, *sl;
struct net_device *ifp;
- u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
- u16 net_aoemajor = __cpu_to_be16(aoemajor);
sl = NULL;
@@ -507,9 +523,9 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
memset(h->dst, 0xff, sizeof h->dst);
memcpy(h->src, ifp->dev_addr, sizeof h->src);
- memcpy(h->type, &aoe_type, sizeof aoe_type);
+ h->type = __constant_cpu_to_be16(ETH_P_AOE);
h->verfl = AOE_HVER;
- memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
+ h->major = cpu_to_be16(aoemajor);
h->minor = aoeminor;
h->cmd = AOECMD_CFG;
@@ -523,7 +539,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
/*
* Since we only call this in one place (and it only prepares one frame)
- * we just return the skb. Usually we'd chain it up to the d->skblist.
+ * we just return the skb. Usually we'd chain it up to the aoedev sendq.
*/
static struct sk_buff *
aoecmd_ata_id(struct aoedev *d)
@@ -575,9 +591,10 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
struct aoedev *d;
struct aoe_hdr *h;
struct aoe_cfghdr *ch;
- ulong flags, bufcnt, sysminor, aoemajor;
+ ulong flags, sysminor, aoemajor;
+ u16 bufcnt;
struct sk_buff *sl;
- enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
+ enum { MAXFRAMES = 8 };
h = (struct aoe_hdr *) skb->mac.raw;
ch = (struct aoe_cfghdr *) (h+1);
@@ -586,7 +603,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
* Enough people have their dip switches set backwards to
* warrant a loud message for this special case.
*/
- aoemajor = __be16_to_cpu(*((u16 *) h->major));
+ aoemajor = be16_to_cpu(h->major);
if (aoemajor == 0xfff) {
printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
"address is all ones. Check shelf dip switches\n");
@@ -594,13 +611,14 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
}
sysminor = SYSMINOR(aoemajor, h->minor);
- if (sysminor > MAXSYSMINOR) {
- printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
- "large\n", sysminor);
+ if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
+ printk(KERN_INFO
+ "aoe: e%ld.%d: minor number too large\n",
+ aoemajor, (int) h->minor);
return;
}
- bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
+ bufcnt = be16_to_cpu(ch->bufcnt);
if (bufcnt > MAXFRAMES) /* keep it reasonable */
bufcnt = MAXFRAMES;
@@ -617,7 +635,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
return;
}
- d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
+ d->fw_ver = be16_to_cpu(ch->fwver);
/* we get here only if the device is new */
sl = aoecmd_ata_id(d);
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 240abae..ec16c64 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -13,7 +13,7 @@ static struct aoedev *devlist;
static spinlock_t devlist_lock;
struct aoedev *
-aoedev_bymac(unsigned char *macaddr)
+aoedev_by_aoeaddr(int maj, int min)
{
struct aoedev *d;
ulong flags;
@@ -21,7 +21,7 @@ aoedev_bymac(unsigned char *macaddr)
spin_lock_irqsave(&devlist_lock, flags);
for (d=devlist; d; d=d->next)
- if (!memcmp(d->addr, macaddr, 6))
+ if (d->aoemajor == maj && d->aoeminor == min)
break;
spin_unlock_irqrestore(&devlist_lock, flags);
@@ -125,7 +125,6 @@ aoedev_set(ulong sysminor, unsigned char *addr, struct net_device *ifp, ulong bu
d->ifp = ifp;
if (d->sysminor != sysminor
- || memcmp(d->addr, addr, sizeof d->addr)
|| (d->flags & DEVFL_UP) == 0) {
aoedev_downdev(d); /* flushes outstanding frames */
memcpy(d->addr, addr, sizeof d->addr);
@@ -147,7 +146,8 @@ aoedev_freedev(struct aoedev *d)
put_disk(d->gd);
}
kfree(d->frames);
- mempool_destroy(d->bufpool);
+ if (d->bufpool)
+ mempool_destroy(d->bufpool);
kfree(d);
}
diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c
index cc1945b..bc92aac 100644
--- a/drivers/block/aoe/aoenet.c
+++ b/drivers/block/aoe/aoenet.c
@@ -69,7 +69,7 @@ set_aoe_iflist(const char __user *user_str, size_t size)
u64
mac_addr(char addr[6])
{
- u64 n = 0;
+ __be64 n = 0;
char *p = (char *) &n;
memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */
@@ -108,7 +108,7 @@ static int
aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
{
struct aoe_hdr *h;
- ulong n;
+ u32 n;
skb = skb_check(skb);
if (!skb)
@@ -121,7 +121,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
skb_push(skb, ETH_HLEN); /* (1) */
h = (struct aoe_hdr *) skb->mac.raw;
- n = __be32_to_cpu(*((u32 *) h->tag));
+ n = be32_to_cpu(h->tag);
if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31))
goto exit;
@@ -132,7 +132,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
if (net_ratelimit())
printk(KERN_ERR "aoe: aoenet_rcv: error packet from %d.%d; "
"ecode=%d '%s'\n",
- __be16_to_cpu(*((u16 *) h->major)), h->minor,
+ be16_to_cpu(h->major), h->minor,
h->err, aoe_errlist[n]);
goto exit;
}