diff options
-rw-r--r-- | Documentation/aoe/mkdevs.sh | 1 | ||||
-rw-r--r-- | Documentation/aoe/mkshelf.sh | 1 | ||||
-rw-r--r-- | Documentation/aoe/todo.txt | 14 | ||||
-rw-r--r-- | Documentation/aoe/udev-install.sh | 6 | ||||
-rw-r--r-- | drivers/block/aoe/aoe.h | 23 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 5 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 108 | ||||
-rw-r--r-- | drivers/block/aoe/aoedev.c | 8 | ||||
-rw-r--r-- | drivers/block/aoe/aoenet.c | 8 |
9 files changed, 110 insertions, 64 deletions
diff --git a/Documentation/aoe/mkdevs.sh b/Documentation/aoe/mkdevs.sh index 6ce7070..ec5a6de 100644 --- a/Documentation/aoe/mkdevs.sh +++ b/Documentation/aoe/mkdevs.sh @@ -5,6 +5,7 @@ n_partitions=${n_partitions:-16} if test "$#" != "1"; then echo "Usage: sh `basename $0` {dir}" 1>&2 + echo " n_partitions=16 sh `basename $0` {dir}" 1>&2 exit 1 fi dir=$1 diff --git a/Documentation/aoe/mkshelf.sh b/Documentation/aoe/mkshelf.sh index 4093283..8bacf9f 100644 --- a/Documentation/aoe/mkshelf.sh +++ b/Documentation/aoe/mkshelf.sh @@ -2,6 +2,7 @@ if test "$#" != "2"; then echo "Usage: sh `basename $0` {dir} {shelfaddress}" 1>&2 + echo " n_partitions=16 sh `basename $0` {dir} {shelfaddress}" 1>&2 exit 1 fi n_partitions=${n_partitions:-16} diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt new file mode 100644 index 0000000..7fee1e1 --- /dev/null +++ b/Documentation/aoe/todo.txt @@ -0,0 +1,14 @@ +There is a potential for deadlock when allocating a struct sk_buff for +data that needs to be written out to aoe storage. If the data is +being written from a dirty page in order to free that page, and if +there are no other pages available, then deadlock may occur when a +free page is needed for the sk_buff allocation. This situation has +not been observed, but it would be nice to eliminate any potential for +deadlock under memory pressure. + +Because ATA over Ethernet is not fragmented by the kernel's IP code, +the destructore member of the struct sk_buff is available to the aoe +driver. By using a mempool for allocating all but the first few +sk_buffs, and by registering a destructor, we should be able to +efficiently allocate sk_buffs without introducing any potential for +deadlock. diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh index 861a27f..6449911 100644 --- a/Documentation/aoe/udev-install.sh +++ b/Documentation/aoe/udev-install.sh @@ -23,4 +23,8 @@ fi # /etc/udev/rules.d # rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`" -test "$rules_d" && sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules" +if test -z "$rules_d" || test ! -d "$rules_d"; then + echo "$me Error: cannot find udev rules directory" 1>&2 + exit 1 +fi +sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules" diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h index db78f82..aa8b547 100644 --- a/drivers/block/aoe/aoe.h +++ b/drivers/block/aoe/aoe.h @@ -1,10 +1,15 @@ /* Copyright (c) 2004 Coraid, Inc. See COPYING for GPL terms. */ -#define VERSION "5" +#define VERSION "6" #define AOE_MAJOR 152 #define DEVICE_NAME "aoe" + +/* set AOE_PARTITIONS to 1 to use whole-disks only + * default is 16, which is 15 partitions plus the whole disk + */ #ifndef AOE_PARTITIONS #define AOE_PARTITIONS 16 #endif + #define SYSMINOR(aoemajor, aoeminor) ((aoemajor) * 10 + (aoeminor)) #define AOEMAJOR(sysminor) ((sysminor) / 10) #define AOEMINOR(sysminor) ((sysminor) % 10) @@ -34,13 +39,13 @@ enum { struct aoe_hdr { unsigned char dst[6]; unsigned char src[6]; - unsigned char type[2]; + __be16 type; unsigned char verfl; unsigned char err; - unsigned char major[2]; + __be16 major; unsigned char minor; unsigned char cmd; - unsigned char tag[4]; + __be32 tag; }; struct aoe_atahdr { @@ -58,8 +63,8 @@ struct aoe_atahdr { }; struct aoe_cfghdr { - unsigned char bufcnt[2]; - unsigned char fwver[2]; + __be16 bufcnt; + __be16 fwver; unsigned char res; unsigned char aoeccmd; unsigned char cslen[2]; @@ -85,6 +90,7 @@ enum { struct buf { struct list_head bufs; + ulong start_time; /* for disk stats */ ulong flags; ulong nframesout; char *bufaddr; @@ -125,7 +131,8 @@ struct aoedev { struct timer_list timer; spinlock_t lock; struct net_device *ifp; /* interface ed is attached to */ - struct sk_buff *skblist;/* packets needing to be sent */ + struct sk_buff *sendq_hd; /* packets needing to be sent, list head */ + struct sk_buff *sendq_tl; mempool_t *bufpool; /* for deadlock-free Buf allocation */ struct list_head bufq; /* queue of bios to work on */ struct buf *inprocess; /* the one we're currently working on */ @@ -151,7 +158,7 @@ void aoecmd_cfg_rsp(struct sk_buff *); int aoedev_init(void); void aoedev_exit(void); -struct aoedev *aoedev_bymac(unsigned char *); +struct aoedev *aoedev_by_aoeaddr(int maj, int min); void aoedev_downdev(struct aoedev *d); struct aoedev *aoedev_set(ulong, unsigned char *, struct net_device *, ulong); int aoedev_busy(void); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 63561b2..4780f79 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -125,6 +125,7 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) } memset(buf, 0, sizeof(*buf)); INIT_LIST_HEAD(&buf->bufs); + buf->start_time = jiffies; buf->bio = bio; buf->resid = bio->bi_size; buf->sector = bio->bi_sector; @@ -146,8 +147,8 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio) list_add_tail(&buf->bufs, &d->bufq); aoecmd_work(d); - sl = d->skblist; - d->skblist = NULL; + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; spin_unlock_irqrestore(&d->lock, flags); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index fb6d942..b5be4b7 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -90,19 +90,16 @@ newtag(struct aoedev *d) static int aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h) { - u16 type = __constant_cpu_to_be16(ETH_P_AOE); - u16 aoemajor = __cpu_to_be16(d->aoemajor); u32 host_tag = newtag(d); - u32 tag = __cpu_to_be32(host_tag); memcpy(h->src, d->ifp->dev_addr, sizeof h->src); memcpy(h->dst, d->addr, sizeof h->dst); - memcpy(h->type, &type, sizeof type); + h->type = __constant_cpu_to_be16(ETH_P_AOE); h->verfl = AOE_HVER; - memcpy(h->major, &aoemajor, sizeof aoemajor); + h->major = cpu_to_be16(d->aoemajor); h->minor = d->aoeminor; h->cmd = AOECMD_ATA; - memcpy(h->tag, &tag, sizeof tag); + h->tag = cpu_to_be32(host_tag); return host_tag; } @@ -181,8 +178,12 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f) skb = skb_prepare(d, f); if (skb) { - skb->next = d->skblist; - d->skblist = skb; + skb->next = NULL; + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; } } @@ -215,7 +216,6 @@ rexmit(struct aoedev *d, struct frame *f) struct aoe_hdr *h; char buf[128]; u32 n; - u32 net_tag; n = newtag(d); @@ -227,13 +227,16 @@ rexmit(struct aoedev *d, struct frame *f) h = (struct aoe_hdr *) f->data; f->tag = n; - net_tag = __cpu_to_be32(n); - memcpy(h->tag, &net_tag, sizeof net_tag); + h->tag = cpu_to_be32(n); skb = skb_prepare(d, f); if (skb) { - skb->next = d->skblist; - d->skblist = skb; + skb->next = NULL; + if (d->sendq_hd) + d->sendq_tl->next = skb; + else + d->sendq_hd = skb; + d->sendq_tl = skb; } } @@ -285,8 +288,8 @@ tdie: spin_unlock_irqrestore(&d->lock, flags); } } - sl = d->skblist; - d->skblist = NULL; + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; if (sl) { n = d->rttavg <<= 1; if (n > MAXTIMER) @@ -308,16 +311,16 @@ ataid_complete(struct aoedev *d, unsigned char *id) u16 n; /* word 83: command set supported */ - n = __le16_to_cpu(*((u16 *) &id[83<<1])); + n = le16_to_cpup((__le16 *) &id[83<<1]); /* word 86: command set/feature enabled */ - n |= __le16_to_cpu(*((u16 *) &id[86<<1])); + n |= le16_to_cpup((__le16 *) &id[86<<1]); if (n & (1<<10)) { /* bit 10: LBA 48 */ d->flags |= DEVFL_EXT; /* word 100: number lba48 sectors */ - ssize = __le64_to_cpu(*((u64 *) &id[100<<1])); + ssize = le64_to_cpup((__le64 *) &id[100<<1]); /* set as in ide-disk.c:init_idedisk_capacity */ d->geo.cylinders = ssize; @@ -328,12 +331,12 @@ ataid_complete(struct aoedev *d, unsigned char *id) d->flags &= ~DEVFL_EXT; /* number lba28 sectors */ - ssize = __le32_to_cpu(*((u32 *) &id[60<<1])); + ssize = le32_to_cpup((__le32 *) &id[60<<1]); /* NOTE: obsolete in ATA 6 */ - d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1])); - d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1])); - d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1])); + d->geo.cylinders = le16_to_cpup((__le16 *) &id[54<<1]); + d->geo.heads = le16_to_cpup((__le16 *) &id[55<<1]); + d->geo.sectors = le16_to_cpup((__le16 *) &id[56<<1]); } d->ssize = ssize; d->geo.start = 0; @@ -380,29 +383,30 @@ aoecmd_ata_rsp(struct sk_buff *skb) register long n; ulong flags; char ebuf[128]; - + u16 aoemajor; + hin = (struct aoe_hdr *) skb->mac.raw; - d = aoedev_bymac(hin->src); + aoemajor = be16_to_cpu(hin->major); + d = aoedev_by_aoeaddr(aoemajor, hin->minor); if (d == NULL) { snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response " "for unknown device %d.%d\n", - __be16_to_cpu(*((u16 *) hin->major)), - hin->minor); + aoemajor, hin->minor); aoechr_error(ebuf); return; } spin_lock_irqsave(&d->lock, flags); - f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag))); + f = getframe(d, be32_to_cpu(hin->tag)); if (f == NULL) { spin_unlock_irqrestore(&d->lock, flags); snprintf(ebuf, sizeof ebuf, "%15s e%d.%d tag=%08x@%08lx\n", "unexpected rsp", - __be16_to_cpu(*((u16 *) hin->major)), + be16_to_cpu(hin->major), hin->minor, - __be32_to_cpu(*((u32 *) hin->tag)), + be32_to_cpu(hin->tag), jiffies); aoechr_error(ebuf); return; @@ -452,7 +456,7 @@ aoecmd_ata_rsp(struct sk_buff *skb) printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized " "outbound ata command %2.2Xh for %d.%d\n", ahout->cmdstat, - __be16_to_cpu(*((u16 *) hin->major)), + be16_to_cpu(hin->major), hin->minor); } } @@ -460,6 +464,20 @@ aoecmd_ata_rsp(struct sk_buff *skb) if (buf) { buf->nframesout -= 1; if (buf->nframesout == 0 && buf->resid == 0) { + unsigned long duration = jiffies - buf->start_time; + unsigned long n_sect = buf->bio->bi_size >> 9; + struct gendisk *disk = d->gd; + + if (bio_data_dir(buf->bio) == WRITE) { + disk_stat_inc(disk, writes); + disk_stat_add(disk, write_ticks, duration); + disk_stat_add(disk, write_sectors, n_sect); + } else { + disk_stat_inc(disk, reads); + disk_stat_add(disk, read_ticks, duration); + disk_stat_add(disk, read_sectors, n_sect); + } + disk_stat_add(disk, io_ticks, duration); n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; bio_endio(buf->bio, buf->bio->bi_size, n); mempool_free(buf, d->bufpool); @@ -471,8 +489,8 @@ aoecmd_ata_rsp(struct sk_buff *skb) aoecmd_work(d); - sl = d->skblist; - d->skblist = NULL; + sl = d->sendq_hd; + d->sendq_hd = d->sendq_tl = NULL; spin_unlock_irqrestore(&d->lock, flags); @@ -486,8 +504,6 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) struct aoe_cfghdr *ch; struct sk_buff *skb, *sl; struct net_device *ifp; - u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE); - u16 net_aoemajor = __cpu_to_be16(aoemajor); sl = NULL; @@ -507,9 +523,9 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) memset(h->dst, 0xff, sizeof h->dst); memcpy(h->src, ifp->dev_addr, sizeof h->src); - memcpy(h->type, &aoe_type, sizeof aoe_type); + h->type = __constant_cpu_to_be16(ETH_P_AOE); h->verfl = AOE_HVER; - memcpy(h->major, &net_aoemajor, sizeof net_aoemajor); + h->major = cpu_to_be16(aoemajor); h->minor = aoeminor; h->cmd = AOECMD_CFG; @@ -523,7 +539,7 @@ aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) /* * Since we only call this in one place (and it only prepares one frame) - * we just return the skb. Usually we'd chain it up to the d->skblist. + * we just return the skb. Usually we'd chain it up to the aoedev sendq. */ static struct sk_buff * aoecmd_ata_id(struct aoedev *d) @@ -575,9 +591,10 @@ aoecmd_cfg_rsp(struct sk_buff *skb) struct aoedev *d; struct aoe_hdr *h; struct aoe_cfghdr *ch; - ulong flags, bufcnt, sysminor, aoemajor; + ulong flags, sysminor, aoemajor; + u16 bufcnt; struct sk_buff *sl; - enum { MAXFRAMES = 8, MAXSYSMINOR = 255 }; + enum { MAXFRAMES = 8 }; h = (struct aoe_hdr *) skb->mac.raw; ch = (struct aoe_cfghdr *) (h+1); @@ -586,7 +603,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb) * Enough people have their dip switches set backwards to * warrant a loud message for this special case. */ - aoemajor = __be16_to_cpu(*((u16 *) h->major)); + aoemajor = be16_to_cpu(h->major); if (aoemajor == 0xfff) { printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf " "address is all ones. Check shelf dip switches\n"); @@ -594,13 +611,14 @@ aoecmd_cfg_rsp(struct sk_buff *skb) } sysminor = SYSMINOR(aoemajor, h->minor); - if (sysminor > MAXSYSMINOR) { - printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too " - "large\n", sysminor); + if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) { + printk(KERN_INFO + "aoe: e%ld.%d: minor number too large\n", + aoemajor, (int) h->minor); return; } - bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt)); + bufcnt = be16_to_cpu(ch->bufcnt); if (bufcnt > MAXFRAMES) /* keep it reasonable */ bufcnt = MAXFRAMES; @@ -617,7 +635,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb) return; } - d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver)); + d->fw_ver = be16_to_cpu(ch->fwver); /* we get here only if the device is new */ sl = aoecmd_ata_id(d); diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index 240abae..ec16c64 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -13,7 +13,7 @@ static struct aoedev *devlist; static spinlock_t devlist_lock; struct aoedev * -aoedev_bymac(unsigned char *macaddr) +aoedev_by_aoeaddr(int maj, int min) { struct aoedev *d; ulong flags; @@ -21,7 +21,7 @@ aoedev_bymac(unsigned char *macaddr) spin_lock_irqsave(&devlist_lock, flags); for (d=devlist; d; d=d->next) - if (!memcmp(d->addr, macaddr, 6)) + if (d->aoemajor == maj && d->aoeminor == min) break; spin_unlock_irqrestore(&devlist_lock, flags); @@ -125,7 +125,6 @@ aoedev_set(ulong sysminor, unsigned char *addr, struct net_device *ifp, ulong bu d->ifp = ifp; if (d->sysminor != sysminor - || memcmp(d->addr, addr, sizeof d->addr) || (d->flags & DEVFL_UP) == 0) { aoedev_downdev(d); /* flushes outstanding frames */ memcpy(d->addr, addr, sizeof d->addr); @@ -147,7 +146,8 @@ aoedev_freedev(struct aoedev *d) put_disk(d->gd); } kfree(d->frames); - mempool_destroy(d->bufpool); + if (d->bufpool) + mempool_destroy(d->bufpool); kfree(d); } diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index cc1945b..bc92aac 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -69,7 +69,7 @@ set_aoe_iflist(const char __user *user_str, size_t size) u64 mac_addr(char addr[6]) { - u64 n = 0; + __be64 n = 0; char *p = (char *) &n; memcpy(p + 2, addr, 6); /* (sizeof addr != 6) */ @@ -108,7 +108,7 @@ static int aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) { struct aoe_hdr *h; - ulong n; + u32 n; skb = skb_check(skb); if (!skb) @@ -121,7 +121,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) skb_push(skb, ETH_HLEN); /* (1) */ h = (struct aoe_hdr *) skb->mac.raw; - n = __be32_to_cpu(*((u32 *) h->tag)); + n = be32_to_cpu(h->tag); if ((h->verfl & AOEFL_RSP) == 0 || (n & 1<<31)) goto exit; @@ -132,7 +132,7 @@ aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) if (net_ratelimit()) printk(KERN_ERR "aoe: aoenet_rcv: error packet from %d.%d; " "ecode=%d '%s'\n", - __be16_to_cpu(*((u16 *) h->major)), h->minor, + be16_to_cpu(h->major), h->minor, h->err, aoe_errlist[n]); goto exit; } |