aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorDavid 'Digit' Turner <digit@android.com>2010-12-23 02:54:08 +0100
committerDavid 'Digit' Turner <digit@android.com>2011-01-02 22:31:27 +0100
commitcb42a1b1461e02efb034582ac5d8f71534723b92 (patch)
tree5dbd619671d4591d33834488f1c15690dabf50ba /block
parentca6a2e034bce665a08d9d748ac11d6a7cfcd7c48 (diff)
downloadexternal_qemu-cb42a1b1461e02efb034582ac5d8f71534723b92.zip
external_qemu-cb42a1b1461e02efb034582ac5d8f71534723b92.tar.gz
external_qemu-cb42a1b1461e02efb034582ac5d8f71534723b92.tar.bz2
upstream: integrate block changes
This large patch upgrades the block support code to the upstream version available in ba5e7f82169f32ab8163c707d97c799ca09f8924 dated 2010-08-08 Change-Id: I8b24df0c287e72f6620650a4d6a62e1bb315453e
Diffstat (limited to 'block')
-rw-r--r--block/bochs.c77
-rw-r--r--block/cloop.c46
-rw-r--r--block/cow.c147
-rw-r--r--block/dmg.c121
-rw-r--r--block/nbd.c5
-rw-r--r--block/parallels.c52
-rw-r--r--block/qcow.c112
-rw-r--r--block/qcow2-cluster.c343
-rw-r--r--block/qcow2-refcount.c750
-rw-r--r--block/qcow2-snapshot.c55
-rw-r--r--block/qcow2.c670
-rw-r--r--block/qcow2.h36
-rw-r--r--block/raw-posix.c646
-rw-r--r--block/raw-win32.c36
-rw-r--r--block/raw.c280
-rw-r--r--block/vpc.c97
-rw-r--r--block/vvfat.c11
17 files changed, 2198 insertions, 1286 deletions
diff --git a/block/bochs.c b/block/bochs.c
index bac81c4..5fe2fa3 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -80,8 +80,6 @@ struct bochs_header {
};
typedef struct BDRVBochsState {
- int fd;
-
uint32_t *catalog_bitmap;
int catalog_size;
@@ -109,25 +107,16 @@ static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
+static int bochs_open(BlockDriverState *bs, int flags)
{
BDRVBochsState *s = bs->opaque;
- int fd, i;
+ int i;
struct bochs_header bochs;
struct bochs_header_v1 header_v1;
- fd = open(filename, O_RDWR | O_BINARY);
- if (fd < 0) {
- fd = open(filename, O_RDONLY | O_BINARY);
- if (fd < 0)
- return -1;
- }
-
bs->read_only = 1; // no write support yet
- s->fd = fd;
-
- if (read(fd, &bochs, sizeof(bochs)) != sizeof(bochs)) {
+ if (bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)) != sizeof(bochs)) {
goto fail;
}
@@ -146,12 +135,10 @@ static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
}
- lseek(s->fd, le32_to_cpu(bochs.header), SEEK_SET);
-
s->catalog_size = le32_to_cpu(bochs.extra.redolog.catalog);
s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
- if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
- s->catalog_size * 4)
+ if (bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
+ s->catalog_size * 4) != s->catalog_size * 4)
goto fail;
for (i = 0; i < s->catalog_size; i++)
le32_to_cpus(&s->catalog_bitmap[i]);
@@ -165,68 +152,53 @@ static int bochs_open(BlockDriverState *bs, const char *filename, int flags)
return 0;
fail:
- close(fd);
return -1;
}
-static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
{
BDRVBochsState *s = bs->opaque;
int64_t offset = sector_num * 512;
- int64_t extent_index, extent_offset, bitmap_offset, block_offset;
+ int64_t extent_index, extent_offset, bitmap_offset;
char bitmap_entry;
// seek to sector
extent_index = offset / s->extent_size;
extent_offset = (offset % s->extent_size) / 512;
- if (s->catalog_bitmap[extent_index] == 0xffffffff)
- {
-// fprintf(stderr, "page not allocated [%x - %x:%x]\n",
-// sector_num, extent_index, extent_offset);
- return -1; // not allocated
+ if (s->catalog_bitmap[extent_index] == 0xffffffff) {
+ return -1; /* not allocated */
}
bitmap_offset = s->data_offset + (512 * s->catalog_bitmap[extent_index] *
(s->extent_blocks + s->bitmap_blocks));
- block_offset = bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
-
-// fprintf(stderr, "sect: %x [ext i: %x o: %x] -> %x bitmap: %x block: %x\n",
-// sector_num, extent_index, extent_offset,
-// le32_to_cpu(s->catalog_bitmap[extent_index]),
-// bitmap_offset, block_offset);
- // read in bitmap for current extent
- lseek(s->fd, bitmap_offset + (extent_offset / 8), SEEK_SET);
-
- read(s->fd, &bitmap_entry, 1);
-
- if (!((bitmap_entry >> (extent_offset % 8)) & 1))
- {
-// fprintf(stderr, "sector (%x) in bitmap not allocated\n",
-// sector_num);
- return -1; // not allocated
+ /* read in bitmap for current extent */
+ if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+ &bitmap_entry, 1) != 1) {
+ return -1;
}
- lseek(s->fd, block_offset, SEEK_SET);
+ if (!((bitmap_entry >> (extent_offset % 8)) & 1)) {
+ return -1; /* not allocated */
+ }
- return 0;
+ return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
}
static int bochs_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- BDRVBochsState *s = bs->opaque;
int ret;
while (nb_sectors > 0) {
- if (!seek_to_sector(bs, sector_num))
- {
- ret = read(s->fd, buf, 512);
- if (ret != 512)
- return -1;
- }
- else
+ int64_t block_offset = seek_to_sector(bs, sector_num);
+ if (block_offset >= 0) {
+ ret = bdrv_pread(bs->file, block_offset, buf, 512);
+ if (ret != 512) {
+ return -1;
+ }
+ } else
memset(buf, 0, 512);
nb_sectors--;
sector_num++;
@@ -239,7 +211,6 @@ static void bochs_close(BlockDriverState *bs)
{
BDRVBochsState *s = bs->opaque;
qemu_free(s->catalog_bitmap);
- close(s->fd);
}
static BlockDriver bdrv_bochs = {
diff --git a/block/cloop.c b/block/cloop.c
index 06c687e..fe015c4 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -27,7 +27,6 @@
#include <zlib.h>
typedef struct BDRVCloopState {
- int fd;
uint32_t block_size;
uint32_t n_blocks;
uint64_t* offsets;
@@ -51,34 +50,31 @@ static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int cloop_open(BlockDriverState *bs, const char *filename, int flags)
+static int cloop_open(BlockDriverState *bs, int flags)
{
BDRVCloopState *s = bs->opaque;
uint32_t offsets_size,max_compressed_block_size=1,i;
- s->fd = open(filename, O_RDONLY | O_BINARY);
- if (s->fd < 0)
- return -errno;
bs->read_only = 1;
/* read header */
- if(lseek(s->fd,128,SEEK_SET)<0) {
-cloop_close:
- close(s->fd);
- return -1;
+ if (bdrv_pread(bs->file, 128, &s->block_size, 4) < 4) {
+ goto cloop_close;
}
- if(read(s->fd,&s->block_size,4)<4)
- goto cloop_close;
- s->block_size=be32_to_cpu(s->block_size);
- if(read(s->fd,&s->n_blocks,4)<4)
- goto cloop_close;
- s->n_blocks=be32_to_cpu(s->n_blocks);
+ s->block_size = be32_to_cpu(s->block_size);
+
+ if (bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4) < 4) {
+ goto cloop_close;
+ }
+ s->n_blocks = be32_to_cpu(s->n_blocks);
/* read offsets */
- offsets_size=s->n_blocks*sizeof(uint64_t);
- s->offsets=(uint64_t*)qemu_malloc(offsets_size);
- if(read(s->fd,s->offsets,offsets_size)<offsets_size)
+ offsets_size = s->n_blocks * sizeof(uint64_t);
+ s->offsets = qemu_malloc(offsets_size);
+ if (bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size) <
+ offsets_size) {
goto cloop_close;
+ }
for(i=0;i<s->n_blocks;i++) {
s->offsets[i]=be64_to_cpu(s->offsets[i]);
if(i>0) {
@@ -98,16 +94,21 @@ cloop_close:
s->sectors_per_block = s->block_size/512;
bs->total_sectors = s->n_blocks*s->sectors_per_block;
return 0;
+
+cloop_close:
+ return -1;
}
-static inline int cloop_read_block(BDRVCloopState *s,int block_num)
+static inline int cloop_read_block(BlockDriverState *bs, int block_num)
{
+ BDRVCloopState *s = bs->opaque;
+
if(s->current_block != block_num) {
int ret;
uint32_t bytes = s->offsets[block_num+1]-s->offsets[block_num];
- lseek(s->fd, s->offsets[block_num], SEEK_SET);
- ret = read(s->fd, s->compressed_block, bytes);
+ ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
+ bytes);
if (ret != bytes)
return -1;
@@ -136,7 +137,7 @@ static int cloop_read(BlockDriverState *bs, int64_t sector_num,
for(i=0;i<nb_sectors;i++) {
uint32_t sector_offset_in_block=((sector_num+i)%s->sectors_per_block),
block_num=(sector_num+i)/s->sectors_per_block;
- if(cloop_read_block(s, block_num) != 0)
+ if(cloop_read_block(bs, block_num) != 0)
return -1;
memcpy(buf+i*512,s->uncompressed_block+sector_offset_in_block*512,512);
}
@@ -146,7 +147,6 @@ static int cloop_read(BlockDriverState *bs, int64_t sector_num,
static void cloop_close(BlockDriverState *bs)
{
BDRVCloopState *s = bs->opaque;
- close(s->fd);
if(s->n_blocks>0)
free(s->offsets);
free(s->compressed_block);
diff --git a/block/cow.c b/block/cow.c
index 84818f1..eedcc48 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -21,11 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#ifndef _WIN32
#include "qemu-common.h"
#include "block_int.h"
#include "module.h"
-#include <sys/mman.h>
/**************************************************************/
/* COW block driver using file system holes */
@@ -44,10 +42,6 @@ struct cow_header_v2 {
};
typedef struct BDRVCowState {
- int fd;
- uint8_t *cow_bitmap; /* if non NULL, COW mappings are used first */
- uint8_t *cow_bitmap_addr; /* mmap address of cow_bitmap */
- int cow_bitmap_size;
int64_t cow_sectors_offset;
} BDRVCowState;
@@ -63,22 +57,16 @@ static int cow_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int cow_open(BlockDriverState *bs, const char *filename, int flags)
+static int cow_open(BlockDriverState *bs, int flags)
{
BDRVCowState *s = bs->opaque;
- int fd;
struct cow_header_v2 cow_header;
+ int bitmap_size;
int64_t size;
- fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
- if (fd < 0) {
- fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
- if (fd < 0)
- return -1;
- }
- s->fd = fd;
/* see if it is a cow image */
- if (read(fd, &cow_header, sizeof(cow_header)) != sizeof(cow_header)) {
+ if (bdrv_pread(bs->file, 0, &cow_header, sizeof(cow_header)) !=
+ sizeof(cow_header)) {
goto fail;
}
@@ -94,61 +82,91 @@ static int cow_open(BlockDriverState *bs, const char *filename, int flags)
pstrcpy(bs->backing_file, sizeof(bs->backing_file),
cow_header.backing_file);
- /* mmap the bitmap */
- s->cow_bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
- s->cow_bitmap_addr = (void *)mmap(get_mmap_addr(s->cow_bitmap_size),
- s->cow_bitmap_size,
- PROT_READ | PROT_WRITE,
- MAP_SHARED, s->fd, 0);
- if (s->cow_bitmap_addr == MAP_FAILED)
- goto fail;
- s->cow_bitmap = s->cow_bitmap_addr + sizeof(cow_header);
- s->cow_sectors_offset = (s->cow_bitmap_size + 511) & ~511;
+ bitmap_size = ((bs->total_sectors + 7) >> 3) + sizeof(cow_header);
+ s->cow_sectors_offset = (bitmap_size + 511) & ~511;
return 0;
fail:
- close(fd);
return -1;
}
-static inline void cow_set_bit(uint8_t *bitmap, int64_t bitnum)
+/*
+ * XXX(hch): right now these functions are extremly ineffcient.
+ * We should just read the whole bitmap we'll need in one go instead.
+ */
+static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
{
- bitmap[bitnum / 8] |= (1 << (bitnum%8));
+ uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+ uint8_t bitmap;
+ int ret;
+
+ ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
+ }
+
+ bitmap |= (1 << (bitnum % 8));
+
+ ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
}
-static inline int is_bit_set(const uint8_t *bitmap, int64_t bitnum)
+static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
{
- return !!(bitmap[bitnum / 8] & (1 << (bitnum%8)));
-}
+ uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
+ uint8_t bitmap;
+ int ret;
+
+ ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
+ if (ret < 0) {
+ return ret;
+ }
+ return !!(bitmap & (1 << (bitnum % 8)));
+}
/* Return true if first block has been changed (ie. current version is
* in COW file). Set the number of continuous blocks for which that
* is true. */
-static inline int is_changed(uint8_t *bitmap,
- int64_t sector_num, int nb_sectors,
- int *num_same)
+static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *num_same)
{
int changed;
- if (!bitmap || nb_sectors == 0) {
+ if (nb_sectors == 0) {
*num_same = nb_sectors;
return 0;
}
- changed = is_bit_set(bitmap, sector_num);
+ changed = is_bit_set(bs, sector_num);
+ if (changed < 0) {
+ return 0; /* XXX: how to return I/O errors? */
+ }
+
for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
- if (is_bit_set(bitmap, sector_num + *num_same) != changed)
+ if (is_bit_set(bs, sector_num + *num_same) != changed)
break;
}
return changed;
}
-static int cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
+static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
{
- BDRVCowState *s = bs->opaque;
- return is_changed(s->cow_bitmap, sector_num, nb_sectors, pnum);
+ int error = 0;
+ int i;
+
+ for (i = 0; i < nb_sectors; i++) {
+ error = cow_set_bit(bs, sector_num + i);
+ if (error) {
+ break;
+ }
+ }
+
+ return error;
}
static int cow_read(BlockDriverState *bs, int64_t sector_num,
@@ -158,9 +176,10 @@ static int cow_read(BlockDriverState *bs, int64_t sector_num,
int ret, n;
while (nb_sectors > 0) {
- if (is_changed(s->cow_bitmap, sector_num, nb_sectors, &n)) {
- lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
- ret = read(s->fd, buf, n * 512);
+ if (cow_is_allocated(bs, sector_num, nb_sectors, &n)) {
+ ret = bdrv_pread(bs->file,
+ s->cow_sectors_offset + sector_num * 512,
+ buf, n * 512);
if (ret != n * 512)
return -1;
} else {
@@ -184,22 +203,18 @@ static int cow_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
BDRVCowState *s = bs->opaque;
- int ret, i;
+ int ret;
- lseek(s->fd, s->cow_sectors_offset + sector_num * 512, SEEK_SET);
- ret = write(s->fd, buf, nb_sectors * 512);
+ ret = bdrv_pwrite(bs->file, s->cow_sectors_offset + sector_num * 512,
+ buf, nb_sectors * 512);
if (ret != nb_sectors * 512)
return -1;
- for (i = 0; i < nb_sectors; i++)
- cow_set_bit(s->cow_bitmap, sector_num + i);
- return 0;
+
+ return cow_update_bitmap(bs, sector_num, nb_sectors);
}
static void cow_close(BlockDriverState *bs)
{
- BDRVCowState *s = bs->opaque;
- munmap((void *)s->cow_bitmap_addr, s->cow_bitmap_size);
- close(s->fd);
}
static int cow_create(const char *filename, QEMUOptionParameter *options)
@@ -209,6 +224,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
struct stat st;
int64_t image_sectors = 0;
const char *image_filename = NULL;
+ int ret;
/* Read out options */
while (options && options->name) {
@@ -223,7 +239,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
cow_fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
if (cow_fd < 0)
- return -1;
+ return -errno;
memset(&cow_header, 0, sizeof(cow_header));
cow_header.magic = cpu_to_be32(COW_MAGIC);
cow_header.version = cpu_to_be32(COW_VERSION);
@@ -248,17 +264,27 @@ static int cow_create(const char *filename, QEMUOptionParameter *options)
}
cow_header.sectorsize = cpu_to_be32(512);
cow_header.size = cpu_to_be64(image_sectors * 512);
- write(cow_fd, &cow_header, sizeof(cow_header));
+ ret = qemu_write_full(cow_fd, &cow_header, sizeof(cow_header));
+ if (ret != sizeof(cow_header)) {
+ ret = -errno;
+ goto exit;
+ }
+
/* resize to include at least all the bitmap */
- ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
+ ret = ftruncate(cow_fd, sizeof(cow_header) + ((image_sectors + 7) >> 3));
+ if (ret) {
+ ret = -errno;
+ goto exit;
+ }
+
+exit:
close(cow_fd);
- return 0;
+ return ret;
}
static void cow_flush(BlockDriverState *bs)
{
- BDRVCowState *s = bs->opaque;
- fsync(s->fd);
+ bdrv_flush(bs->file);
}
static QEMUOptionParameter cow_create_options[] = {
@@ -296,4 +322,3 @@ static void bdrv_cow_init(void)
}
block_init(bdrv_cow_init);
-#endif
diff --git a/block/dmg.c b/block/dmg.c
index 262560f..a3c815b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -28,8 +28,6 @@
#include <zlib.h>
typedef struct BDRVDMGState {
- int fd;
-
/* each chunk contains a certain number of sectors,
* offsets[i] is the offset in the .dmg file,
* lengths[i] is the length of the compressed chunk,
@@ -58,72 +56,75 @@ static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static off_t read_off(int fd)
+static off_t read_off(BlockDriverState *bs, int64_t offset)
{
uint64_t buffer;
- if(read(fd,&buffer,8)<8)
+ if (bdrv_pread(bs->file, offset, &buffer, 8) < 8)
return 0;
return be64_to_cpu(buffer);
}
-static off_t read_uint32(int fd)
+static off_t read_uint32(BlockDriverState *bs, int64_t offset)
{
uint32_t buffer;
- if(read(fd,&buffer,4)<4)
+ if (bdrv_pread(bs->file, offset, &buffer, 4) < 4)
return 0;
return be32_to_cpu(buffer);
}
-static int dmg_open(BlockDriverState *bs, const char *filename, int flags)
+static int dmg_open(BlockDriverState *bs, int flags)
{
BDRVDMGState *s = bs->opaque;
off_t info_begin,info_end,last_in_offset,last_out_offset;
uint32_t count;
uint32_t max_compressed_size=1,max_sectors_per_chunk=1,i;
+ int64_t offset;
- s->fd = open(filename, O_RDONLY | O_BINARY);
- if (s->fd < 0)
- return -errno;
bs->read_only = 1;
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
/* read offset of info blocks */
- if(lseek(s->fd,-0x1d8,SEEK_END)<0) {
-dmg_close:
- close(s->fd);
- /* open raw instead */
- bs->drv=bdrv_find_format("raw");
- return bs->drv->bdrv_open(bs, filename, flags);
+ offset = bdrv_getlength(bs->file);
+ if (offset < 0) {
+ goto fail;
+ }
+ offset -= 0x1d8;
+
+ info_begin = read_off(bs, offset);
+ if (info_begin == 0) {
+ goto fail;
+ }
+
+ if (read_uint32(bs, info_begin) != 0x100) {
+ goto fail;
}
- info_begin=read_off(s->fd);
- if(info_begin==0)
- goto dmg_close;
- if(lseek(s->fd,info_begin,SEEK_SET)<0)
- goto dmg_close;
- if(read_uint32(s->fd)!=0x100)
- goto dmg_close;
- if((count = read_uint32(s->fd))==0)
- goto dmg_close;
- info_end = info_begin+count;
- if(lseek(s->fd,0xf8,SEEK_CUR)<0)
- goto dmg_close;
+
+ count = read_uint32(bs, info_begin + 4);
+ if (count == 0) {
+ goto fail;
+ }
+ info_end = info_begin + count;
+
+ offset = info_begin + 0x100;
/* read offsets */
last_in_offset = last_out_offset = 0;
- while(lseek(s->fd,0,SEEK_CUR)<info_end) {
+ while (offset < info_end) {
uint32_t type;
- count = read_uint32(s->fd);
+ count = read_uint32(bs, offset);
if(count==0)
- goto dmg_close;
- type = read_uint32(s->fd);
- if(type!=0x6d697368 || count<244)
- lseek(s->fd,count-4,SEEK_CUR);
- else {
+ goto fail;
+ offset += 4;
+
+ type = read_uint32(bs, offset);
+ if (type == 0x6d697368 && count >= 244) {
int new_size, chunk_count;
- if(lseek(s->fd,200,SEEK_CUR)<0)
- goto dmg_close;
+
+ offset += 4;
+ offset += 200;
+
chunk_count = (count-204)/40;
new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
s->types = qemu_realloc(s->types, new_size/2);
@@ -133,7 +134,8 @@ dmg_close:
s->sectorcounts = qemu_realloc(s->sectorcounts, new_size);
for(i=s->n_chunks;i<s->n_chunks+chunk_count;i++) {
- s->types[i] = read_uint32(s->fd);
+ s->types[i] = read_uint32(bs, offset);
+ offset += 4;
if(s->types[i]!=0x80000005 && s->types[i]!=1 && s->types[i]!=2) {
if(s->types[i]==0xffffffff) {
last_in_offset = s->offsets[i-1]+s->lengths[i-1];
@@ -141,15 +143,23 @@ dmg_close:
}
chunk_count--;
i--;
- if(lseek(s->fd,36,SEEK_CUR)<0)
- goto dmg_close;
+ offset += 36;
continue;
}
- read_uint32(s->fd);
- s->sectors[i] = last_out_offset+read_off(s->fd);
- s->sectorcounts[i] = read_off(s->fd);
- s->offsets[i] = last_in_offset+read_off(s->fd);
- s->lengths[i] = read_off(s->fd);
+ offset += 4;
+
+ s->sectors[i] = last_out_offset+read_off(bs, offset);
+ offset += 8;
+
+ s->sectorcounts[i] = read_off(bs, offset);
+ offset += 8;
+
+ s->offsets[i] = last_in_offset+read_off(bs, offset);
+ offset += 8;
+
+ s->lengths[i] = read_off(bs, offset);
+ offset += 8;
+
if(s->lengths[i]>max_compressed_size)
max_compressed_size = s->lengths[i];
if(s->sectorcounts[i]>max_sectors_per_chunk)
@@ -163,11 +173,13 @@ dmg_close:
s->compressed_chunk = qemu_malloc(max_compressed_size+1);
s->uncompressed_chunk = qemu_malloc(512*max_sectors_per_chunk);
if(inflateInit(&s->zstream) != Z_OK)
- goto dmg_close;
+ goto fail;
s->current_chunk = s->n_chunks;
return 0;
+fail:
+ return -1;
}
static inline int is_sector_in_chunk(BDRVDMGState* s,
@@ -196,8 +208,10 @@ static inline uint32_t search_chunk(BDRVDMGState* s,int sector_num)
return s->n_chunks; /* error */
}
-static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num)
+static inline int dmg_read_chunk(BlockDriverState *bs, int sector_num)
{
+ BDRVDMGState *s = bs->opaque;
+
if(!is_sector_in_chunk(s,s->current_chunk,sector_num)) {
int ret;
uint32_t chunk = search_chunk(s,sector_num);
@@ -210,15 +224,12 @@ static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num)
case 0x80000005: { /* zlib compressed */
int i;
- ret = lseek(s->fd, s->offsets[chunk], SEEK_SET);
- if(ret<0)
- return -1;
-
/* we need to buffer, because only the chunk as whole can be
* inflated. */
i=0;
do {
- ret = read(s->fd, s->compressed_chunk+i, s->lengths[chunk]-i);
+ ret = bdrv_pread(bs->file, s->offsets[chunk] + i,
+ s->compressed_chunk+i, s->lengths[chunk]-i);
if(ret<0 && errno==EINTR)
ret=0;
i+=ret;
@@ -239,7 +250,8 @@ static inline int dmg_read_chunk(BDRVDMGState *s,int sector_num)
return -1;
break; }
case 1: /* copy */
- ret = read(s->fd, s->uncompressed_chunk, s->lengths[chunk]);
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
+ s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk])
return -1;
break;
@@ -260,7 +272,7 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
for(i=0;i<nb_sectors;i++) {
uint32_t sector_offset_in_chunk;
- if(dmg_read_chunk(s, sector_num+i) != 0)
+ if(dmg_read_chunk(bs, sector_num+i) != 0)
return -1;
sector_offset_in_chunk = sector_num+i-s->sectors[s->current_chunk];
memcpy(buf+i*512,s->uncompressed_chunk+sector_offset_in_chunk*512,512);
@@ -271,7 +283,6 @@ static int dmg_read(BlockDriverState *bs, int64_t sector_num,
static void dmg_close(BlockDriverState *bs)
{
BDRVDMGState *s = bs->opaque;
- close(s->fd);
if(s->n_chunks>0) {
free(s->types);
free(s->offsets);
diff --git a/block/nbd.c b/block/nbd.c
index 47d4778..a1ec123 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -49,9 +49,6 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
size_t blocksize;
int ret;
- if ((flags & BDRV_O_CREAT))
- return -EINVAL;
-
if (!strstart(filename, "nbd:", &host))
return -EINVAL;
@@ -180,7 +177,7 @@ static int64_t nbd_getlength(BlockDriverState *bs)
static BlockDriver bdrv_nbd = {
.format_name = "nbd",
.instance_size = sizeof(BDRVNBDState),
- .bdrv_open = nbd_open,
+ .bdrv_file_open = nbd_open,
.bdrv_read = nbd_read,
.bdrv_write = nbd_write,
.bdrv_close = nbd_close,
diff --git a/block/parallels.c b/block/parallels.c
index 0b64a5c..35a14aa 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -46,7 +46,6 @@ struct parallels_header {
} __attribute__((packed));
typedef struct BDRVParallelsState {
- int fd;
uint32_t *catalog_bitmap;
int catalog_size;
@@ -68,24 +67,15 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam
return 0;
}
-static int parallels_open(BlockDriverState *bs, const char *filename, int flags)
+static int parallels_open(BlockDriverState *bs, int flags)
{
BDRVParallelsState *s = bs->opaque;
- int fd, i;
+ int i;
struct parallels_header ph;
- fd = open(filename, O_RDWR | O_BINARY | O_LARGEFILE);
- if (fd < 0) {
- fd = open(filename, O_RDONLY | O_BINARY | O_LARGEFILE);
- if (fd < 0)
- return -1;
- }
-
bs->read_only = 1; // no write support yet
- s->fd = fd;
-
- if (read(fd, &ph, sizeof(ph)) != sizeof(ph))
+ if (bdrv_pread(bs->file, 0, &ph, sizeof(ph)) != sizeof(ph))
goto fail;
if (memcmp(ph.magic, HEADER_MAGIC, 16) ||
@@ -95,14 +85,11 @@ static int parallels_open(BlockDriverState *bs, const char *filename, int flags)
bs->total_sectors = le32_to_cpu(ph.nb_sectors);
- if (lseek(s->fd, 64, SEEK_SET) != 64)
- goto fail;
-
s->tracks = le32_to_cpu(ph.tracks);
s->catalog_size = le32_to_cpu(ph.catalog_entries);
s->catalog_bitmap = qemu_malloc(s->catalog_size * 4);
- if (read(s->fd, s->catalog_bitmap, s->catalog_size * 4) !=
+ if (bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4) !=
s->catalog_size * 4)
goto fail;
for (i = 0; i < s->catalog_size; i++)
@@ -112,44 +99,34 @@ static int parallels_open(BlockDriverState *bs, const char *filename, int flags)
fail:
if (s->catalog_bitmap)
qemu_free(s->catalog_bitmap);
- close(fd);
return -1;
}
-static inline int seek_to_sector(BlockDriverState *bs, int64_t sector_num)
+static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
{
BDRVParallelsState *s = bs->opaque;
- uint32_t index, offset, position;
+ uint32_t index, offset;
index = sector_num / s->tracks;
offset = sector_num % s->tracks;
- // not allocated
+ /* not allocated */
if ((index > s->catalog_size) || (s->catalog_bitmap[index] == 0))
return -1;
-
- position = (s->catalog_bitmap[index] + offset) * 512;
-
-// fprintf(stderr, "sector: %llx index=%x offset=%x pointer=%x position=%x\n",
-// sector_num, index, offset, s->catalog_bitmap[index], position);
-
- if (lseek(s->fd, position, SEEK_SET) != position)
- return -1;
-
- return 0;
+ return (uint64_t)(s->catalog_bitmap[index] + offset) * 512;
}
static int parallels_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- BDRVParallelsState *s = bs->opaque;
-
while (nb_sectors > 0) {
- if (!seek_to_sector(bs, sector_num)) {
- if (read(s->fd, buf, 512) != 512)
- return -1;
- } else
+ int64_t position = seek_to_sector(bs, sector_num);
+ if (position >= 0) {
+ if (bdrv_pread(bs->file, position, buf, 512) != 512)
+ return -1;
+ } else {
memset(buf, 0, 512);
+ }
nb_sectors--;
sector_num++;
buf += 512;
@@ -161,7 +138,6 @@ static void parallels_close(BlockDriverState *bs)
{
BDRVParallelsState *s = bs->opaque;
qemu_free(s->catalog_bitmap);
- close(s->fd);
}
static BlockDriver bdrv_parallels = {
diff --git a/block/qcow.c b/block/qcow.c
index 55a68a6..816103d 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -76,7 +76,7 @@ typedef struct BDRVQcowState {
AES_KEY aes_decrypt_key;
} BDRVQcowState;
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
{
@@ -90,16 +90,13 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
+static int qcow_open(BlockDriverState *bs, int flags)
{
BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
+ int len, i, shift;
QCowHeader header;
- ret = bdrv_file_open(&s->hd, filename, flags);
- if (ret < 0)
- return ret;
- if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
+ if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header))
goto fail;
be32_to_cpus(&header.magic);
be32_to_cpus(&header.version);
@@ -135,7 +132,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
if (!s->l1_table)
goto fail;
- if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
+ if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
s->l1_size * sizeof(uint64_t))
goto fail;
for(i = 0;i < s->l1_size; i++) {
@@ -158,7 +155,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
len = header.backing_file_size;
if (len > 1023)
len = 1023;
- if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
+ if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len)
goto fail;
bs->backing_file[len] = '\0';
}
@@ -169,7 +166,6 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
qemu_free(s->l2_cache);
qemu_free(s->cluster_cache);
qemu_free(s->cluster_data);
- bdrv_delete(s->hd);
return -1;
}
@@ -271,14 +267,15 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
if (!allocate)
return 0;
/* allocate a new l2 entry */
- l2_offset = bdrv_getlength(s->hd);
+ l2_offset = bdrv_getlength(bs->file);
/* round to cluster size */
l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite(s->hd, s->l1_table_offset + l1_index * sizeof(tmp),
- &tmp, sizeof(tmp)) != sizeof(tmp))
+ if (bdrv_pwrite_sync(bs->file,
+ s->l1_table_offset + l1_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
return 0;
new_l2_table = 1;
}
@@ -306,11 +303,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_table = s->l2_cache + (min_index << s->l2_bits);
if (new_l2_table) {
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
+ if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
- if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
}
@@ -329,22 +326,22 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* if the cluster is already compressed, we must
decompress it in the case it is not completely
overwritten */
- if (decompress_cluster(s, cluster_offset) < 0)
+ if (decompress_cluster(bs, cluster_offset) < 0)
return 0;
- cluster_offset = bdrv_getlength(s->hd);
+ cluster_offset = bdrv_getlength(bs->file);
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
- if (bdrv_pwrite(s->hd, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
s->cluster_size)
return -1;
} else {
- cluster_offset = bdrv_getlength(s->hd);
+ cluster_offset = bdrv_getlength(bs->file);
if (allocate == 1) {
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
- bdrv_truncate(s->hd, cluster_offset + s->cluster_size);
+ bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (s->crypt_method &&
@@ -358,7 +355,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
s->cluster_data,
s->cluster_data + 512, 1, 1,
&s->aes_encrypt_key);
- if (bdrv_pwrite(s->hd, cluster_offset + i * 512,
+ if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
}
@@ -372,8 +369,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
- if (bdrv_pwrite(s->hd,
- l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) != sizeof(tmp))
+ if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ &tmp, sizeof(tmp)) < 0)
return 0;
}
return cluster_offset;
@@ -422,8 +419,9 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
return 0;
}
-static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
+static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
{
+ BDRVQcowState *s = bs->opaque;
int ret, csize;
uint64_t coffset;
@@ -431,7 +429,7 @@ static int decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
- ret = bdrv_pread(s->hd, coffset, s->cluster_data, csize);
+ ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -468,11 +466,11 @@ static int qcow_read(BlockDriverState *bs, int64_t sector_num,
memset(buf, 0, 512 * n);
}
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- if (decompress_cluster(s, cluster_offset) < 0)
+ if (decompress_cluster(bs, cluster_offset) < 0)
return -1;
memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
} else {
- ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+ ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
if (ret != n * 512)
return -1;
if (s->crypt_method) {
@@ -505,7 +503,7 @@ typedef struct QCowAIOCB {
static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
{
- QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+ QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
if (acb->hd_aiocb)
bdrv_aio_cancel(acb->hd_aiocb);
qemu_aio_release(acb);
@@ -601,7 +599,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
- if (decompress_cluster(s, acb->cluster_offset) < 0)
+ if (decompress_cluster(bs, acb->cluster_offset) < 0)
goto done;
memcpy(acb->buf,
s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
@@ -614,7 +612,7 @@ static void qcow_aio_read_cb(void *opaque, int ret)
acb->hd_iov.iov_base = (void *)acb->buf;
acb->hd_iov.iov_len = acb->n * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_readv(s->hd,
+ acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
&acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
@@ -699,7 +697,7 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->hd_iov.iov_base = (void *)src_buf;
acb->hd_iov.iov_len = acb->n * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(s->hd,
+ acb->hd_aiocb = bdrv_aio_writev(bs->file,
(cluster_offset >> 9) + index_in_cluster,
&acb->hd_qiov, acb->n,
qcow_aio_write_cb, acb);
@@ -723,7 +721,7 @@ static BlockDriverAIOCB *qcow_aio_writev(BlockDriverState *bs,
s->cluster_cache_offset = -1; /* disable compressed cache */
- acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+ acb = qcow_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
if (!acb)
return NULL;
@@ -739,7 +737,6 @@ static void qcow_close(BlockDriverState *bs)
qemu_free(s->l2_cache);
qemu_free(s->cluster_cache);
qemu_free(s->cluster_data);
- bdrv_delete(s->hd);
}
static int qcow_create(const char *filename, QEMUOptionParameter *options)
@@ -750,6 +747,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
int64_t total_size = 0;
const char *backing_file = NULL;
int flags = 0;
+ int ret;
/* Read out options */
while (options && options->name) {
@@ -765,7 +763,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
if (fd < 0)
- return -1;
+ return -errno;
memset(&header, 0, sizeof(header));
header.magic = cpu_to_be32(QCOW_MAGIC);
header.version = cpu_to_be32(QCOW_VERSION);
@@ -801,17 +799,34 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
}
/* write all the data */
- write(fd, &header, sizeof(header));
+ ret = qemu_write_full(fd, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ ret = -errno;
+ goto exit;
+ }
+
if (backing_file) {
- write(fd, backing_file, backing_filename_len);
+ ret = qemu_write_full(fd, backing_file, backing_filename_len);
+ if (ret != backing_filename_len) {
+ ret = -errno;
+ goto exit;
+ }
+
}
lseek(fd, header_size, SEEK_SET);
tmp = 0;
for(i = 0;i < l1_size; i++) {
- write(fd, &tmp, sizeof(tmp));
+ ret = qemu_write_full(fd, &tmp, sizeof(tmp));
+ if (ret != sizeof(tmp)) {
+ ret = -errno;
+ goto exit;
+ }
}
+
+ ret = 0;
+exit:
close(fd);
- return 0;
+ return ret;
}
static int qcow_make_empty(BlockDriverState *bs)
@@ -821,9 +836,10 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
- return -1;
- ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ l1_length) < 0)
+ return -1;
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -884,7 +900,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
out_len, 0, 0);
cluster_offset &= s->cluster_offset_mask;
- if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
+ if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) {
qemu_free(out_buf);
return -1;
}
@@ -896,8 +912,13 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
static void qcow_flush(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
- bdrv_flush(s->hd);
+ bdrv_flush(bs->file);
+}
+
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_flush(bs->file, cb, opaque);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -940,6 +961,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_make_empty = qcow_make_empty,
.bdrv_aio_readv = qcow_aio_readv,
.bdrv_aio_writev = qcow_aio_writev,
+ .bdrv_aio_flush = qcow_aio_flush,
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_get_info = qcow_get_info,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index fdedf17..166922f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -33,12 +33,15 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
BDRVQcowState *s = bs->opaque;
int new_l1_size, new_l1_size2, ret, i;
uint64_t *new_l1_table;
- uint64_t new_l1_table_offset;
+ int64_t new_l1_table_offset;
uint8_t data[12];
new_l1_size = s->l1_size;
if (min_size <= new_l1_size)
return 0;
+ if (new_l1_size == 0) {
+ new_l1_size = 1;
+ }
while (min_size > new_l1_size) {
new_l1_size = (new_l1_size * 3 + 1) / 2;
}
@@ -47,26 +50,34 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = qemu_mallocz(new_l1_size2);
+ new_l1_table = qemu_mallocz(align_offset(new_l1_size2, 512));
memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
/* write new table (align to cluster) */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
+ if (new_l1_table_offset < 0) {
+ qemu_free(new_l1_table);
+ return new_l1_table_offset;
+ }
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
- if (ret != new_l1_size2)
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ if (ret < 0)
goto fail;
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
/* set new table */
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,
- sizeof(data)) != sizeof(data))
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ if (ret < 0) {
goto fail;
+ }
qemu_free(s->l1_table);
qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
s->l1_table_offset = new_l1_table_offset;
@@ -74,8 +85,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
s->l1_size = new_l1_size;
return 0;
fail:
- qemu_free(s->l1_table);
- return -EIO;
+ qemu_free(new_l1_table);
+ qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
+ return ret;
}
void qcow2_l2_cache_reset(BlockDriverState *bs)
@@ -145,29 +157,36 @@ static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
* the image file failed.
*/
-static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset)
+static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
+ uint64_t **l2_table)
{
BDRVQcowState *s = bs->opaque;
int min_index;
- uint64_t *l2_table;
+ int ret;
/* seek if the table for the given offset is in the cache */
- l2_table = seek_l2_table(s, l2_offset);
- if (l2_table != NULL)
- return l2_table;
+ *l2_table = seek_l2_table(s, l2_offset);
+ if (*l2_table != NULL) {
+ return 0;
+ }
/* not found: load a new entry in the least used one */
min_index = l2_cache_new_entry(bs);
- l2_table = s->l2_cache + (min_index << s->l2_bits);
- if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return NULL;
+ *l2_table = s->l2_cache + (min_index << s->l2_bits);
+
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
+ ret = bdrv_pread(bs->file, l2_offset, *l2_table,
+ s->l2_size * sizeof(uint64_t));
+ if (ret < 0) {
+ return ret;
+ }
+
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
- return l2_table;
+ return 0;
}
/*
@@ -175,21 +194,23 @@ static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset)
* and we really don't want bdrv_pread to perform a read-modify-write)
*/
#define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BDRVQcowState *s, int l1_index)
+static int write_l1_entry(BlockDriverState *bs, int l1_index)
{
+ BDRVQcowState *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR];
int l1_start_index;
- int i;
+ int i, ret;
l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
}
- if (bdrv_pwrite(s->hd, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf)) != sizeof(buf))
- {
- return -1;
+ BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
+ if (ret < 0) {
+ return ret;
}
return 0;
@@ -205,24 +226,22 @@ static int write_l1_entry(BDRVQcowState *s, int l1_index)
*
*/
-static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
+static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{
BDRVQcowState *s = bs->opaque;
int min_index;
uint64_t old_l2_offset;
- uint64_t *l2_table, l2_offset;
+ uint64_t *l2_table;
+ int64_t l2_offset;
+ int ret;
old_l2_offset = s->l1_table[l1_index];
/* allocate a new l2 entry */
l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
-
- /* update the L1 entry */
-
- s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
- if (write_l1_entry(s, l1_index) < 0) {
- return NULL;
+ if (l2_offset < 0) {
+ return l2_offset;
}
/* allocate a new entry in the l2 cache */
@@ -235,23 +254,40 @@ static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
} else {
/* if there was an old l2 table, read it from the disk */
- if (bdrv_pread(s->hd, old_l2_offset,
- l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return NULL;
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
+ ret = bdrv_pread(bs->file, old_l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
}
/* write the l2 table to the file */
- if (bdrv_pwrite(s->hd, l2_offset,
- l2_table, s->l2_size * sizeof(uint64_t)) !=
- s->l2_size * sizeof(uint64_t))
- return NULL;
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
+ ret = bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* update the L1 entry */
+ s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
+ ret = write_l1_entry(bs, l1_index);
+ if (ret < 0) {
+ goto fail;
+ }
/* update the l2 cache entry */
s->l2_cache_offsets[min_index] = l2_offset;
s->l2_cache_counts[min_index] = 1;
- return l2_table;
+ *table = l2_table;
+ return 0;
+
+fail:
+ s->l1_table[l1_index] = old_l2_offset;
+ qcow2_l2_cache_reset(bs);
+ return ret;
}
static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
@@ -264,7 +300,7 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
return 0;
for (i = start; i < start + nb_clusters; i++)
- if (offset + i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
+ if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
break;
return (i - start);
@@ -306,8 +342,8 @@ void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
}
-int qcow2_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+static int qcow_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
{
BDRVQcowState *s = bs->opaque;
int ret, index_in_cluster, n, n1;
@@ -315,13 +351,20 @@ int qcow2_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
while (nb_sectors > 0) {
n = nb_sectors;
- cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, &n);
+
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, &n,
+ &cluster_offset);
+ if (ret < 0) {
+ return ret;
+ }
+
index_in_cluster = sector_num & (s->cluster_sectors - 1);
if (!cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n);
if (n1 > 0) {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING);
ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
if (ret < 0)
return -1;
@@ -330,11 +373,12 @@ int qcow2_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
memset(buf, 0, 512 * n);
}
} else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
- if (qcow2_decompress_cluster(s, cluster_offset) < 0)
+ if (qcow2_decompress_cluster(bs, cluster_offset) < 0)
return -1;
memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
} else {
- ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ);
+ ret = bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512);
if (ret != n * 512)
return -1;
if (s->crypt_method) {
@@ -358,7 +402,8 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
n = n_end - n_start;
if (n <= 0)
return 0;
- ret = qcow2_read(bs, start_sect + n_start, s->cluster_data, n);
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
+ ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
if (ret < 0)
return ret;
if (s->crypt_method) {
@@ -367,8 +412,9 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
s->cluster_data, n, 1,
&s->aes_encrypt_key);
}
- ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
- s->cluster_data, n);
+ BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
+ ret = bdrv_write_sync(bs->file, (cluster_offset >> 9) + n_start,
+ s->cluster_data, n);
if (ret < 0)
return ret;
return 0;
@@ -378,27 +424,29 @@ static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
/*
* get_cluster_offset
*
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
+ * For a given offset of the disk image, find the cluster offset in
+ * qcow2 file. The offset is stored in *cluster_offset.
*
* on entry, *num is the number of contiguous clusters we'd like to
* access following offset.
*
* on exit, *num is the number of contiguous clusters we can read.
*
- * Return 1, if the offset is found
- * Return 0, otherwise.
+ * Return 0, if the offset is found
+ * Return -errno, otherwise.
*
*/
-uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num)
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset)
{
BDRVQcowState *s = bs->opaque;
- int l1_index, l2_index;
- uint64_t l2_offset, *l2_table, cluster_offset;
+ unsigned int l1_index, l2_index;
+ uint64_t l2_offset, *l2_table;
int l1_bits, c;
- int index_in_cluster, nb_available, nb_needed, nb_clusters;
+ unsigned int index_in_cluster, nb_clusters;
+ uint64_t nb_available, nb_needed;
+ int ret;
index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
nb_needed = *num + index_in_cluster;
@@ -409,7 +457,7 @@ uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
* the end of the l1 entry
*/
- nb_available = (1 << l1_bits) - (offset & ((1 << l1_bits) - 1));
+ nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
/* compute the number of available sectors */
@@ -419,7 +467,7 @@ uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
nb_needed = nb_available;
}
- cluster_offset = 0;
+ *cluster_offset = 0;
/* seek the the l2 offset in the l1 table */
@@ -437,17 +485,18 @@ uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
/* load the l2 table in memory */
l2_offset &= ~QCOW_OFLAG_COPIED;
- l2_table = l2_load(bs, l2_offset);
- if (l2_table == NULL)
- return 0;
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
/* find the cluster offset for the given disk offset */
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
- cluster_offset = be64_to_cpu(l2_table[l2_index]);
+ *cluster_offset = be64_to_cpu(l2_table[l2_index]);
nb_clusters = size_to_clusters(s, nb_needed << 9);
- if (!cluster_offset) {
+ if (!*cluster_offset) {
/* how many empty clusters ? */
c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
} else {
@@ -463,7 +512,8 @@ out:
*num = nb_available - index_in_cluster;
- return cluster_offset & ~QCOW_OFLAG_COPIED;
+ *cluster_offset &=~QCOW_OFLAG_COPIED;
+ return 0;
}
/*
@@ -475,24 +525,27 @@ out:
* the l2 table offset in the qcow2 file and the cluster index
* in the l2 table are given to the caller.
*
+ * Returns 0 on success, -errno in failure case
*/
-
static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
uint64_t *new_l2_offset,
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
- int l1_index, l2_index, ret;
- uint64_t l2_offset, *l2_table;
+ unsigned int l1_index, l2_index;
+ uint64_t l2_offset;
+ uint64_t *l2_table = NULL;
+ int ret;
/* seek the the l2 offset in the l1 table */
l1_index = offset >> (s->l2_bits + s->cluster_bits);
if (l1_index >= s->l1_size) {
ret = qcow2_grow_l1_table(bs, l1_index + 1);
- if (ret < 0)
- return 0;
+ if (ret < 0) {
+ return ret;
+ }
}
l2_offset = s->l1_table[l1_index];
@@ -501,15 +554,17 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
if (l2_offset & QCOW_OFLAG_COPIED) {
/* load the l2 table in memory */
l2_offset &= ~QCOW_OFLAG_COPIED;
- l2_table = l2_load(bs, l2_offset);
- if (l2_table == NULL)
- return 0;
+ ret = l2_load(bs, l2_offset, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
} else {
if (l2_offset)
qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
- l2_table = l2_allocate(bs, l1_index);
- if (l2_table == NULL)
- return 0;
+ ret = l2_allocate(bs, l1_index, &l2_table);
+ if (ret < 0) {
+ return ret;
+ }
l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
}
@@ -521,7 +576,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
*new_l2_offset = l2_offset;
*new_l2_index = l2_index;
- return 1;
+ return 0;
}
/*
@@ -543,12 +598,14 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
- uint64_t l2_offset, *l2_table, cluster_offset;
+ uint64_t l2_offset, *l2_table;
+ int64_t cluster_offset;
int nb_csectors;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
- if (ret == 0)
+ if (ret < 0) {
return 0;
+ }
cluster_offset = be64_to_cpu(l2_table[l2_index]);
if (cluster_offset & QCOW_OFLAG_COPIED)
@@ -558,6 +615,10 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
qcow2_free_any_clusters(bs, cluster_offset, 1);
cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
+ if (cluster_offset < 0) {
+ return 0;
+ }
+
nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
(cluster_offset >> 9);
@@ -568,11 +629,12 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
/* compressed clusters never have the copied flag */
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
l2_table[l2_index] = cpu_to_be64(cluster_offset);
- if (bdrv_pwrite(s->hd,
+ if (bdrv_pwrite_sync(bs->file,
l2_offset + l2_index * sizeof(uint64_t),
l2_table + l2_index,
- sizeof(uint64_t)) != sizeof(uint64_t))
+ sizeof(uint64_t)) < 0)
return 0;
return cluster_offset;
@@ -583,29 +645,31 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
* read-modify-write in bdrv_pwrite
*/
#define L2_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l2_entries(BDRVQcowState *s, uint64_t *l2_table,
+static int write_l2_entries(BlockDriverState *bs, uint64_t *l2_table,
uint64_t l2_offset, int l2_index, int num)
{
int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
int start_offset = (8 * l2_index) & ~511;
int end_offset = (8 * (l2_index + num) + 511) & ~511;
size_t len = end_offset - start_offset;
+ int ret;
- if (bdrv_pwrite(s->hd, l2_offset + start_offset, &l2_table[l2_start_index],
- len) != len)
- {
- return -1;
+ BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
+ ret = bdrv_pwrite_sync(bs->file, l2_offset + start_offset,
+ &l2_table[l2_start_index], len);
+ if (ret < 0) {
+ return ret;
}
return 0;
}
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
- QCowL2Meta *m)
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
+ uint64_t cluster_offset = m->cluster_offset;
if (m->nb_clusters == 0)
return 0;
@@ -628,10 +692,11 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
goto err;
}
- ret = -EIO;
/* update L2 table */
- if (!get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index))
+ ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
+ if (ret < 0) {
goto err;
+ }
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
@@ -647,8 +712,9 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
}
- if (write_l2_entries(s, l2_table, l2_offset, l2_index, m->nb_clusters) < 0) {
- ret = -1;
+ ret = write_l2_entries(bs, l2_table, l2_offset, l2_index, m->nb_clusters);
+ if (ret < 0) {
+ qcow2_l2_cache_reset(bs);
goto err;
}
@@ -665,29 +731,36 @@ err:
/*
* alloc_cluster_offset
*
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
+ * For a given offset of the disk image, return cluster offset in qcow2 file.
* If the offset is not found, allocate a new cluster.
*
- * Return the cluster offset if successful,
- * Return 0, otherwise.
+ * If the cluster was already allocated, m->nb_clusters is set to 0,
+ * m->depends_on is set to NULL and the other fields in m are meaningless.
+ *
+ * If the cluster is newly allocated, m->nb_clusters is set to the number of
+ * contiguous clusters that have been allocated. This may be 0 if the request
+ * conflict with another write request in flight; in this case, m->depends_on
+ * is set and the remaining fields of m are meaningless.
+ *
+ * If m->nb_clusters is non-zero, the other fields of m are valid and contain
+ * information about the first allocated cluster.
*
+ * Return 0 on success and -errno in error cases
*/
-
-uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int n_start, int n_end,
- int *num, QCowL2Meta *m)
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, QCowL2Meta *m)
{
BDRVQcowState *s = bs->opaque;
int l2_index, ret;
- uint64_t l2_offset, *l2_table, cluster_offset;
- int nb_clusters, i = 0;
+ uint64_t l2_offset, *l2_table;
+ int64_t cluster_offset;
+ unsigned int nb_clusters, i = 0;
+ QCowL2Meta *old_alloc;
ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
- if (ret == 0)
- return 0;
+ if (ret < 0) {
+ return ret;
+ }
nb_clusters = size_to_clusters(s, n_end << 9);
@@ -703,6 +776,7 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
cluster_offset &= ~QCOW_OFLAG_COPIED;
m->nb_clusters = 0;
+ m->depends_on = NULL;
goto out;
}
@@ -717,12 +791,15 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
while (i < nb_clusters) {
i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
&l2_table[l2_index], i, 0);
-
- if(be64_to_cpu(l2_table[l2_index + i]))
+ if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
break;
+ }
i += count_contiguous_free_clusters(nb_clusters - i,
&l2_table[l2_index + i]);
+ if (i >= nb_clusters) {
+ break;
+ }
cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
@@ -730,8 +807,41 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
(cluster_offset & QCOW_OFLAG_COMPRESSED))
break;
}
+ assert(i <= nb_clusters);
nb_clusters = i;
+ /*
+ * Check if there already is an AIO write request in flight which allocates
+ * the same cluster. In this case we need to wait until the previous
+ * request has completed and updated the L2 table accordingly.
+ */
+ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
+
+ uint64_t end_offset = offset + nb_clusters * s->cluster_size;
+ uint64_t old_offset = old_alloc->offset;
+ uint64_t old_end_offset = old_alloc->offset +
+ old_alloc->nb_clusters * s->cluster_size;
+
+ if (end_offset < old_offset || offset > old_end_offset) {
+ /* No intersection */
+ } else {
+ if (offset < old_offset) {
+ /* Stop at the start of a running allocation */
+ nb_clusters = (old_offset - offset) >> s->cluster_bits;
+ } else {
+ nb_clusters = 0;
+ }
+
+ if (nb_clusters == 0) {
+ /* Set dependency and wait for a callback */
+ m->depends_on = old_alloc;
+ m->nb_clusters = 0;
+ *num = 0;
+ return 0;
+ }
+ }
+ }
+
if (!nb_clusters) {
abort();
}
@@ -741,6 +851,10 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
/* allocate a new cluster */
cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
+ if (cluster_offset < 0) {
+ QLIST_REMOVE(m, next_in_flight);
+ return cluster_offset;
+ }
/* save info needed for meta data update */
m->offset = offset;
@@ -749,10 +863,11 @@ uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
out:
m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
+ m->cluster_offset = cluster_offset;
*num = m->nb_available - n_start;
- return cluster_offset;
+ return 0;
}
static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
@@ -782,8 +897,9 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
return 0;
}
-int qcow2_decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
{
+ BDRVQcowState *s = bs->opaque;
int ret, csize, nb_csectors, sector_offset;
uint64_t coffset;
@@ -792,7 +908,8 @@ int qcow2_decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
- ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
if (ret < 0) {
return -1;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index dd6e293..4c19e7e 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -27,10 +27,32 @@
#include "block/qcow2.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size);
-static int update_refcount(BlockDriverState *bs,
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
int64_t offset, int64_t length,
int addend);
+
+static int cache_refcount_updates = 0;
+
+static int write_refcount_block(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ size_t size = s->cluster_size;
+
+ if (s->refcount_block_cache_offset == 0) {
+ return 0;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE);
+ if (bdrv_pwrite_sync(bs->file, s->refcount_block_cache_offset,
+ s->refcount_block_cache, size) < 0)
+ {
+ return -EIO;
+ }
+
+ return 0;
+}
+
/*********************************************************/
/* refcount handling */
@@ -43,7 +65,8 @@ int qcow2_refcount_init(BlockDriverState *bs)
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
s->refcount_table = qemu_malloc(refcount_table_size2);
if (s->refcount_table_size > 0) {
- ret = bdrv_pread(s->hd, s->refcount_table_offset,
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+ ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret != refcount_table_size2)
goto fail;
@@ -68,19 +91,36 @@ static int load_refcount_block(BlockDriverState *bs,
{
BDRVQcowState *s = bs->opaque;
int ret;
- ret = bdrv_pread(s->hd, refcount_block_offset, s->refcount_block_cache,
+
+ if (cache_refcount_updates) {
+ ret = write_refcount_block(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
+ ret = bdrv_pread(bs->file, refcount_block_offset, s->refcount_block_cache,
s->cluster_size);
- if (ret != s->cluster_size)
- return -EIO;
+ if (ret < 0) {
+ return ret;
+ }
+
s->refcount_block_cache_offset = refcount_block_offset;
return 0;
}
+/*
+ * Returns the refcount of the cluster given by its index. Any non-negative
+ * return value is the refcount of the cluster, negative values are -errno
+ * and indicate an error.
+ */
static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
{
BDRVQcowState *s = bs->opaque;
int refcount_table_index, block_index;
int64_t refcount_block_offset;
+ int ret;
refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
if (refcount_table_index >= s->refcount_table_size)
@@ -90,163 +130,351 @@ static int get_refcount(BlockDriverState *bs, int64_t cluster_index)
return 0;
if (refcount_block_offset != s->refcount_block_cache_offset) {
/* better than nothing: return allocated if read error */
- if (load_refcount_block(bs, refcount_block_offset) < 0)
- return 1;
+ ret = load_refcount_block(bs, refcount_block_offset);
+ if (ret < 0) {
+ return ret;
+ }
}
block_index = cluster_index &
((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
return be16_to_cpu(s->refcount_block_cache[block_index]);
}
-static int grow_refcount_table(BlockDriverState *bs, int min_size)
+/*
+ * Rounds the refcount table size up to avoid growing the table for each single
+ * refcount block that is allocated.
+ */
+static unsigned int next_refcount_table_size(BDRVQcowState *s,
+ unsigned int min_size)
{
- BDRVQcowState *s = bs->opaque;
- int new_table_size, new_table_size2, refcount_table_clusters, i, ret;
- uint64_t *new_table;
- int64_t table_offset;
- uint8_t data[12];
- int old_table_size;
- int64_t old_table_offset;
+ unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
+ unsigned int refcount_table_clusters =
+ MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
- if (min_size <= s->refcount_table_size)
- return 0;
- /* compute new table size */
- refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
- for(;;) {
- if (refcount_table_clusters == 0) {
- refcount_table_clusters = 1;
- } else {
- refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
- }
- new_table_size = refcount_table_clusters << (s->cluster_bits - 3);
- if (min_size <= new_table_size)
- break;
+ while (min_clusters > refcount_table_clusters) {
+ refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
}
-#ifdef DEBUG_ALLOC2
- printf("grow_refcount_table from %d to %d\n",
- s->refcount_table_size,
- new_table_size);
-#endif
- new_table_size2 = new_table_size * sizeof(uint64_t);
- new_table = qemu_mallocz(new_table_size2);
- memcpy(new_table, s->refcount_table,
- s->refcount_table_size * sizeof(uint64_t));
- for(i = 0; i < s->refcount_table_size; i++)
- cpu_to_be64s(&new_table[i]);
- /* Note: we cannot update the refcount now to avoid recursion */
- table_offset = alloc_clusters_noref(bs, new_table_size2);
- ret = bdrv_pwrite(s->hd, table_offset, new_table, new_table_size2);
- if (ret != new_table_size2)
- goto fail;
- for(i = 0; i < s->refcount_table_size; i++)
- be64_to_cpus(&new_table[i]);
-
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), refcount_table_clusters);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data)) != sizeof(data))
- goto fail;
- qemu_free(s->refcount_table);
- old_table_offset = s->refcount_table_offset;
- old_table_size = s->refcount_table_size;
- s->refcount_table = new_table;
- s->refcount_table_size = new_table_size;
- s->refcount_table_offset = table_offset;
- update_refcount(bs, table_offset, new_table_size2, 1);
- qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
- return 0;
- fail:
- qcow2_free_clusters(bs, table_offset, new_table_size2);
- qemu_free(new_table);
- return -EIO;
+ return refcount_table_clusters << (s->cluster_bits - 3);
}
+/* Checks if two offsets are described by the same refcount block */
+static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+ uint64_t offset_b)
+{
+ uint64_t block_a = offset_a >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t block_b = offset_b >> (2 * s->cluster_bits - REFCOUNT_SHIFT);
+
+ return (block_a == block_b);
+}
+
+/*
+ * Loads a refcount block. If it doesn't exist yet, it is allocated first
+ * (including growing the refcount table if needed).
+ *
+ * Returns the offset of the refcount block on success or -errno in error case
+ */
static int64_t alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index)
{
BDRVQcowState *s = bs->opaque;
- int64_t offset, refcount_block_offset;
- int ret, refcount_table_index;
- uint64_t data64;
+ unsigned int refcount_table_index;
+ int ret;
- /* Find L1 index and grow refcount table if needed */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
+
+ /* Find the refcount block for the given cluster */
refcount_table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
- if (refcount_table_index >= s->refcount_table_size) {
- ret = grow_refcount_table(bs, refcount_table_index + 1);
- if (ret < 0)
+
+ if (refcount_table_index < s->refcount_table_size) {
+
+ uint64_t refcount_block_offset =
+ s->refcount_table[refcount_table_index];
+
+ /* If it's already there, we're done */
+ if (refcount_block_offset) {
+ if (refcount_block_offset != s->refcount_block_cache_offset) {
+ ret = load_refcount_block(bs, refcount_block_offset);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ return refcount_block_offset;
+ }
+ }
+
+ /*
+ * If we came here, we need to allocate something. Something is at least
+ * a cluster for the new refcount block. It may also include a new refcount
+ * table if the old refcount table is too small.
+ *
+ * Note that allocating clusters here needs some special care:
+ *
+ * - We can't use the normal qcow2_alloc_clusters(), it would try to
+ * increase the refcount and very likely we would end up with an endless
+ * recursion. Instead we must place the refcount blocks in a way that
+ * they can describe them themselves.
+ *
+ * - We need to consider that at this point we are inside update_refcounts
+ * and doing the initial refcount increase. This means that some clusters
+ * have already been allocated by the caller, but their refcount isn't
+ * accurate yet. free_cluster_index tells us where this allocation ends
+ * as long as we don't overwrite it by freeing clusters.
+ *
+ * - alloc_clusters_noref and qcow2_free_clusters may load a different
+ * refcount block into the cache
+ */
+
+ if (cache_refcount_updates) {
+ ret = write_refcount_block(bs);
+ if (ret < 0) {
return ret;
+ }
}
- /* Load or allocate the refcount block */
- refcount_block_offset = s->refcount_table[refcount_table_index];
- if (!refcount_block_offset) {
- /* create a new refcount block */
- /* Note: we cannot update the refcount now to avoid recursion */
- offset = alloc_clusters_noref(bs, s->cluster_size);
+ /* Allocate the refcount block itself and mark it as used */
+ int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_block < 0) {
+ return new_block;
+ }
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
+ " at %" PRIx64 "\n",
+ refcount_table_index, cluster_index << s->cluster_bits, new_block);
+#endif
+
+ if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
+ /* Zero the new refcount block before updating it */
memset(s->refcount_block_cache, 0, s->cluster_size);
- ret = bdrv_pwrite(s->hd, offset, s->refcount_block_cache, s->cluster_size);
- if (ret != s->cluster_size)
- return -EINVAL;
- s->refcount_table[refcount_table_index] = offset;
- data64 = cpu_to_be64(offset);
- ret = bdrv_pwrite(s->hd, s->refcount_table_offset +
- refcount_table_index * sizeof(uint64_t),
- &data64, sizeof(data64));
- if (ret != sizeof(data64))
- return -EINVAL;
-
- refcount_block_offset = offset;
- s->refcount_block_cache_offset = offset;
- update_refcount(bs, offset, s->cluster_size, 1);
+ s->refcount_block_cache_offset = new_block;
+
+ /* The block describes itself, need to update the cache */
+ int block_index = (new_block >> s->cluster_bits) &
+ ((1 << (s->cluster_bits - REFCOUNT_SHIFT)) - 1);
+ s->refcount_block_cache[block_index] = cpu_to_be16(1);
} else {
- if (refcount_block_offset != s->refcount_block_cache_offset) {
- if (load_refcount_block(bs, refcount_block_offset) < 0)
- return -EIO;
+ /* Described somewhere else. This can recurse at most twice before we
+ * arrive at a block that describes itself. */
+ ret = update_refcount(bs, new_block, s->cluster_size, 1);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* Initialize the new refcount block only after updating its refcount,
+ * update_refcount uses the refcount cache itself */
+ memset(s->refcount_block_cache, 0, s->cluster_size);
+ s->refcount_block_cache_offset = new_block;
+ }
+
+ /* Now the new refcount block needs to be written to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
+ ret = bdrv_pwrite_sync(bs->file, new_block, s->refcount_block_cache,
+ s->cluster_size);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ /* If the refcount table is big enough, just hook the block up there */
+ if (refcount_table_index < s->refcount_table_size) {
+ uint64_t data64 = cpu_to_be64(new_block);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
+ ret = bdrv_pwrite_sync(bs->file,
+ s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
+ &data64, sizeof(data64));
+ if (ret < 0) {
+ goto fail_block;
}
+
+ s->refcount_table[refcount_table_index] = new_block;
+ return new_block;
+ }
+
+ /*
+ * If we come here, we need to grow the refcount table. Again, a new
+ * refcount table needs some space and we can't simply allocate to avoid
+ * endless recursion.
+ *
+ * Therefore let's grab new refcount blocks at the end of the image, which
+ * will describe themselves and the new refcount table. This way we can
+ * reference them only in the new table and do the switch to the new
+ * refcount table at once without producing an inconsistent state in
+ * between.
+ */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
+
+ /* Calculate the number of refcount blocks needed so far */
+ uint64_t refcount_block_clusters = 1 << (s->cluster_bits - REFCOUNT_SHIFT);
+ uint64_t blocks_used = (s->free_cluster_index +
+ refcount_block_clusters - 1) / refcount_block_clusters;
+
+ /* And now we need at least one block more for the new metadata */
+ uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
+ uint64_t last_table_size;
+ uint64_t blocks_clusters;
+ do {
+ uint64_t table_clusters = size_to_clusters(s, table_size);
+ blocks_clusters = 1 +
+ ((table_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters);
+ uint64_t meta_clusters = table_clusters + blocks_clusters;
+
+ last_table_size = table_size;
+ table_size = next_refcount_table_size(s, blocks_used +
+ ((meta_clusters + refcount_block_clusters - 1)
+ / refcount_block_clusters));
+
+ } while (last_table_size != table_size);
+
+#ifdef DEBUG_ALLOC2
+ fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
+ s->refcount_table_size, table_size);
+#endif
+
+ /* Create the new refcount table and blocks */
+ uint64_t meta_offset = (blocks_used * refcount_block_clusters) *
+ s->cluster_size;
+ uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
+ uint16_t *new_blocks = qemu_mallocz(blocks_clusters * s->cluster_size);
+ uint64_t *new_table = qemu_mallocz(table_size * sizeof(uint64_t));
+
+ assert(meta_offset >= (s->free_cluster_index * s->cluster_size));
+
+ /* Fill the new refcount table */
+ memcpy(new_table, s->refcount_table,
+ s->refcount_table_size * sizeof(uint64_t));
+ new_table[refcount_table_index] = new_block;
+
+ int i;
+ for (i = 0; i < blocks_clusters; i++) {
+ new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
+ }
+
+ /* Fill the refcount blocks */
+ uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
+ int block = 0;
+ for (i = 0; i < table_clusters + blocks_clusters; i++) {
+ new_blocks[block++] = cpu_to_be16(1);
+ }
+
+ /* Write refcount blocks to disk */
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
+ ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ blocks_clusters * s->cluster_size);
+ qemu_free(new_blocks);
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ /* Write refcount table to disk */
+ for(i = 0; i < table_size; i++) {
+ cpu_to_be64s(&new_table[i]);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ table_size * sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail_table;
+ }
+
+ for(i = 0; i < table_size; i++) {
+ cpu_to_be64s(&new_table[i]);
+ }
+
+ /* Hook up the new refcount table in the qcow2 header */
+ uint8_t data[12];
+ cpu_to_be64w((uint64_t*)data, table_offset);
+ cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
+ data, sizeof(data));
+ if (ret < 0) {
+ goto fail_table;
}
- return refcount_block_offset;
+ /* And switch it in memory */
+ uint64_t old_table_offset = s->refcount_table_offset;
+ uint64_t old_table_size = s->refcount_table_size;
+
+ qemu_free(s->refcount_table);
+ s->refcount_table = new_table;
+ s->refcount_table_size = table_size;
+ s->refcount_table_offset = table_offset;
+
+ /* Free old table. Remember, we must not change free_cluster_index */
+ uint64_t old_free_cluster_index = s->free_cluster_index;
+ qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t));
+ s->free_cluster_index = old_free_cluster_index;
+
+ ret = load_refcount_block(bs, new_block);
+ if (ret < 0) {
+ goto fail_block;
+ }
+
+ return new_block;
+
+fail_table:
+ qemu_free(new_table);
+fail_block:
+ s->refcount_block_cache_offset = 0;
+ return ret;
}
#define REFCOUNTS_PER_SECTOR (512 >> REFCOUNT_SHIFT)
-static int write_refcount_block_entries(BDRVQcowState *s,
+static int write_refcount_block_entries(BlockDriverState *bs,
int64_t refcount_block_offset, int first_index, int last_index)
{
+ BDRVQcowState *s = bs->opaque;
size_t size;
+ int ret;
+
+ if (cache_refcount_updates) {
+ return 0;
+ }
+
+ if (first_index < 0) {
+ return 0;
+ }
first_index &= ~(REFCOUNTS_PER_SECTOR - 1);
last_index = (last_index + REFCOUNTS_PER_SECTOR)
& ~(REFCOUNTS_PER_SECTOR - 1);
size = (last_index - first_index) << REFCOUNT_SHIFT;
- if (bdrv_pwrite(s->hd,
+
+ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
+ ret = bdrv_pwrite_sync(bs->file,
refcount_block_offset + (first_index << REFCOUNT_SHIFT),
- &s->refcount_block_cache[first_index], size) != size)
- {
- return -EIO;
+ &s->refcount_block_cache[first_index], size);
+ if (ret < 0) {
+ return ret;
}
return 0;
}
/* XXX: cache several refcount block clusters ? */
-static int update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length,
- int addend)
+static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length, int addend)
{
BDRVQcowState *s = bs->opaque;
int64_t start, last, cluster_offset;
int64_t refcount_block_offset = 0;
int64_t table_index = -1, old_table_index;
int first_index = -1, last_index = -1;
+ int ret;
#ifdef DEBUG_ALLOC2
- printf("update_refcount: offset=%lld size=%lld addend=%d\n",
+ printf("update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
offset, length, addend);
#endif
- if (length <= 0)
+ if (length < 0) {
return -EINVAL;
+ } else if (length == 0) {
+ return 0;
+ }
+
start = offset & ~(s->cluster_size - 1);
last = (offset + length - 1) & ~(s->cluster_size - 1);
for(cluster_offset = start; cluster_offset <= last;
@@ -254,16 +482,17 @@ static int update_refcount(BlockDriverState *bs,
{
int block_index, refcount;
int64_t cluster_index = cluster_offset >> s->cluster_bits;
+ int64_t new_block;
/* Only write refcount block to disk when we are done with it */
old_table_index = table_index;
table_index = cluster_index >> (s->cluster_bits - REFCOUNT_SHIFT);
if ((old_table_index >= 0) && (table_index != old_table_index)) {
- if (write_refcount_block_entries(s, refcount_block_offset,
- first_index, last_index) < 0)
- {
- return -EIO;
+ ret = write_refcount_block_entries(bs, refcount_block_offset,
+ first_index, last_index);
+ if (ret < 0) {
+ return ret;
}
first_index = -1;
@@ -271,10 +500,12 @@ static int update_refcount(BlockDriverState *bs,
}
/* Load the refcount block and allocate it if needed */
- refcount_block_offset = alloc_refcount_block(bs, cluster_index);
- if (refcount_block_offset < 0) {
- return refcount_block_offset;
+ new_block = alloc_refcount_block(bs, cluster_index);
+ if (new_block < 0) {
+ ret = new_block;
+ goto fail;
}
+ refcount_block_offset = new_block;
/* we can update the count and save it */
block_index = cluster_index &
@@ -288,27 +519,48 @@ static int update_refcount(BlockDriverState *bs,
refcount = be16_to_cpu(s->refcount_block_cache[block_index]);
refcount += addend;
- if (refcount < 0 || refcount > 0xffff)
- return -EINVAL;
+ if (refcount < 0 || refcount > 0xffff) {
+ ret = -EINVAL;
+ goto fail;
+ }
if (refcount == 0 && cluster_index < s->free_cluster_index) {
s->free_cluster_index = cluster_index;
}
s->refcount_block_cache[block_index] = cpu_to_be16(refcount);
}
+ ret = 0;
+fail:
+
/* Write last changed block to disk */
if (refcount_block_offset != 0) {
- if (write_refcount_block_entries(s, refcount_block_offset,
- first_index, last_index) < 0)
- {
- return -EIO;
+ int wret;
+ wret = write_refcount_block_entries(bs, refcount_block_offset,
+ first_index, last_index);
+ if (wret < 0) {
+ return ret < 0 ? ret : wret;
}
}
- return 0;
+ /*
+ * Try do undo any updates if an error is returned (This may succeed in
+ * some cases like ENOSPC for allocating a new refcount block)
+ */
+ if (ret < 0) {
+ int dummy;
+ dummy = update_refcount(bs, offset, cluster_offset - offset, -addend);
+ }
+
+ return ret;
}
-/* addend must be 1 or -1 */
+/*
+ * Increases or decreases the refcount of a given cluster by one.
+ * addend must be 1 or -1.
+ *
+ * If the return value is non-negative, it is the new refcount of the cluster.
+ * If it is negative, it is -errno and indicates an error.
+ */
static int update_cluster_refcount(BlockDriverState *bs,
int64_t cluster_index,
int addend)
@@ -335,17 +587,22 @@ static int update_cluster_refcount(BlockDriverState *bs,
static int64_t alloc_clusters_noref(BlockDriverState *bs, int64_t size)
{
BDRVQcowState *s = bs->opaque;
- int i, nb_clusters;
+ int i, nb_clusters, refcount;
nb_clusters = size_to_clusters(s, size);
retry:
for(i = 0; i < nb_clusters; i++) {
- int64_t i = s->free_cluster_index++;
- if (get_refcount(bs, i) != 0)
+ int64_t next_cluster_index = s->free_cluster_index++;
+ refcount = get_refcount(bs, next_cluster_index);
+
+ if (refcount < 0) {
+ return refcount;
+ } else if (refcount != 0) {
goto retry;
+ }
}
#ifdef DEBUG_ALLOC2
- printf("alloc_clusters: size=%lld -> %lld\n",
+ printf("alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
size,
(s->free_cluster_index - nb_clusters) << s->cluster_bits);
#endif
@@ -355,9 +612,18 @@ retry:
int64_t qcow2_alloc_clusters(BlockDriverState *bs, int64_t size)
{
int64_t offset;
+ int ret;
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
offset = alloc_clusters_noref(bs, size);
- update_refcount(bs, offset, size, 1);
+ if (offset < 0) {
+ return offset;
+ }
+
+ ret = update_refcount(bs, offset, size, 1);
+ if (ret < 0) {
+ return ret;
+ }
return offset;
}
@@ -369,9 +635,13 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
int64_t offset, cluster_offset;
int free_in_cluster;
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
assert(size > 0 && size <= s->cluster_size);
if (s->free_byte_offset == 0) {
s->free_byte_offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (s->free_byte_offset < 0) {
+ return s->free_byte_offset;
+ }
}
redo:
free_in_cluster = s->cluster_size -
@@ -387,6 +657,9 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
update_cluster_refcount(bs, offset >> s->cluster_bits, 1);
} else {
offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ return offset;
+ }
cluster_offset = s->free_byte_offset & ~(s->cluster_size - 1);
if ((cluster_offset + s->cluster_size) == offset) {
/* we are lucky: contiguous data */
@@ -404,7 +677,14 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size)
{
- update_refcount(bs, offset, size, -1);
+ int ret;
+
+ BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
+ ret = update_refcount(bs, offset, size, -1);
+ if (ret < 0) {
+ fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
+ /* TODO Remember the clusters to free them later and avoid leaking */
+ }
}
/*
@@ -471,15 +751,19 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int l2_size, i, j, l1_modified, l2_modified, nb_csectors, refcount;
qcow2_l2_cache_reset(bs);
+ cache_refcount_updates = 1;
l2_table = NULL;
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
- l1_allocated = 0;
if (l1_table_offset != s->l1_table_offset) {
- l1_table = qemu_malloc(l1_size2);
+ if (l1_size2 != 0) {
+ l1_table = qemu_mallocz(align_offset(l1_size2, 512));
+ } else {
+ l1_table = NULL;
+ }
l1_allocated = 1;
- if (bdrv_pread(s->hd, l1_table_offset,
+ if (bdrv_pread(bs->file, l1_table_offset,
l1_table, l1_size2) != l1_size2)
goto fail;
for(i = 0;i < l1_size; i++)
@@ -499,7 +783,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
old_l2_offset = l2_offset;
l2_offset &= ~QCOW_OFLAG_COPIED;
l2_modified = 0;
- if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+ if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
goto fail;
for(j = 0; j < s->l2_size; j++) {
offset = be64_to_cpu(l2_table[j]);
@@ -509,9 +793,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (offset & QCOW_OFLAG_COMPRESSED) {
nb_csectors = ((offset >> s->csize_shift) &
s->csize_mask) + 1;
- if (addend != 0)
- update_refcount(bs, (offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, addend);
+ if (addend != 0) {
+ int ret;
+ ret = update_refcount(bs,
+ (offset & s->cluster_offset_mask) & ~511,
+ nb_csectors * 512, addend);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
/* compressed clusters are never modified */
refcount = 2;
} else {
@@ -520,6 +810,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
refcount = get_refcount(bs, offset >> s->cluster_bits);
}
+
+ if (refcount < 0) {
+ goto fail;
+ }
}
if (refcount == 1) {
@@ -532,8 +826,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
}
if (l2_modified) {
- if (bdrv_pwrite(s->hd,
- l2_offset, l2_table, l2_size) != l2_size)
+ if (bdrv_pwrite_sync(bs->file,
+ l2_offset, l2_table, l2_size) < 0)
goto fail;
}
@@ -542,7 +836,9 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
} else {
refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
}
- if (refcount == 1) {
+ if (refcount < 0) {
+ goto fail;
+ } else if (refcount == 1) {
l2_offset |= QCOW_OFLAG_COPIED;
}
if (l2_offset != old_l2_offset) {
@@ -554,8 +850,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (l1_modified) {
for(i = 0; i < l1_size; i++)
cpu_to_be64s(&l1_table[i]);
- if (bdrv_pwrite(s->hd, l1_table_offset, l1_table,
- l1_size2) != l1_size2)
+ if (bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table,
+ l1_size2) < 0)
goto fail;
for(i = 0; i < l1_size; i++)
be64_to_cpus(&l1_table[i]);
@@ -563,11 +859,15 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
if (l1_allocated)
qemu_free(l1_table);
qemu_free(l2_table);
+ cache_refcount_updates = 0;
+ write_refcount_block(bs);
return 0;
fail:
if (l1_allocated)
qemu_free(l1_table);
qemu_free(l2_table);
+ cache_refcount_updates = 0;
+ write_refcount_block(bs);
return -EIO;
}
@@ -584,9 +884,10 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
* This is used to construct a temporary refcount table out of L1 and L2 tables
* which can be compared the the refcount table saved in the image.
*
- * Returns the number of errors in the image that were found
+ * Modifies the number of errors in res.
*/
-static int inc_refcounts(BlockDriverState *bs,
+static void inc_refcounts(BlockDriverState *bs,
+ BdrvCheckResult *res,
uint16_t *refcount_table,
int refcount_table_size,
int64_t offset, int64_t size)
@@ -594,30 +895,32 @@ static int inc_refcounts(BlockDriverState *bs,
BDRVQcowState *s = bs->opaque;
int64_t start, last, cluster_offset;
int k;
- int errors = 0;
if (size <= 0)
- return 0;
+ return;
start = offset & ~(s->cluster_size - 1);
last = (offset + size - 1) & ~(s->cluster_size - 1);
for(cluster_offset = start; cluster_offset <= last;
cluster_offset += s->cluster_size) {
k = cluster_offset >> s->cluster_bits;
- if (k < 0 || k >= refcount_table_size) {
+ if (k < 0) {
fprintf(stderr, "ERROR: invalid cluster offset=0x%" PRIx64 "\n",
cluster_offset);
- errors++;
+ res->corruptions++;
+ } else if (k >= refcount_table_size) {
+ fprintf(stderr, "Warning: cluster offset=0x%" PRIx64 " is after "
+ "the end of the image file, can't properly check refcounts.\n",
+ cluster_offset);
+ res->check_errors++;
} else {
if (++refcount_table[k] == 0) {
fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
"\n", cluster_offset);
- errors++;
+ res->corruptions++;
}
}
}
-
- return errors;
}
/*
@@ -628,20 +931,19 @@ static int inc_refcounts(BlockDriverState *bs,
* Returns the number of errors found by the checks or -errno if an internal
* error occurred.
*/
-static int check_refcounts_l2(BlockDriverState *bs,
+static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
uint16_t *refcount_table, int refcount_table_size, int64_t l2_offset,
int check_copied)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l2_table, offset;
int i, l2_size, nb_csectors, refcount;
- int errors = 0;
/* Read L2 table from disk */
l2_size = s->l2_size * sizeof(uint64_t);
l2_table = qemu_malloc(l2_size);
- if (bdrv_pread(s->hd, l2_offset, l2_table, l2_size) != l2_size)
+ if (bdrv_pread(bs->file, l2_offset, l2_table, l2_size) != l2_size)
goto fail;
/* Do the actual checks */
@@ -655,50 +957,53 @@ static int check_refcounts_l2(BlockDriverState *bs,
"copied flag must never be set for compressed "
"clusters\n", offset >> s->cluster_bits);
offset &= ~QCOW_OFLAG_COPIED;
- errors++;
+ res->corruptions++;
}
/* Mark cluster as used */
nb_csectors = ((offset >> s->csize_shift) &
s->csize_mask) + 1;
offset &= s->cluster_offset_mask;
- errors += inc_refcounts(bs, refcount_table,
- refcount_table_size,
- offset & ~511, nb_csectors * 512);
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ offset & ~511, nb_csectors * 512);
} else {
/* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
if (check_copied) {
uint64_t entry = offset;
offset &= ~QCOW_OFLAG_COPIED;
refcount = get_refcount(bs, offset >> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for offset %"
+ PRIx64 ": %s\n", entry, strerror(-refcount));
+ goto fail;
+ }
if ((refcount == 1) != ((entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
PRIx64 " refcount=%d\n", entry, refcount);
- errors++;
+ res->corruptions++;
}
}
/* Mark cluster as used */
offset &= ~QCOW_OFLAG_COPIED;
- errors += inc_refcounts(bs, refcount_table,
- refcount_table_size,
- offset, s->cluster_size);
+ inc_refcounts(bs, res, refcount_table,refcount_table_size,
+ offset, s->cluster_size);
/* Correct offsets are cluster aligned */
if (offset & (s->cluster_size - 1)) {
fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
"properly aligned; L2 entry corrupted.\n", offset);
- errors++;
+ res->corruptions++;
}
}
}
}
qemu_free(l2_table);
- return errors;
+ return 0;
fail:
- fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+ fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
qemu_free(l2_table);
return -EIO;
}
@@ -712,6 +1017,7 @@ fail:
* error occurred.
*/
static int check_refcounts_l1(BlockDriverState *bs,
+ BdrvCheckResult *res,
uint16_t *refcount_table,
int refcount_table_size,
int64_t l1_table_offset, int l1_size,
@@ -720,21 +1026,24 @@ static int check_refcounts_l1(BlockDriverState *bs,
BDRVQcowState *s = bs->opaque;
uint64_t *l1_table, l2_offset, l1_size2;
int i, refcount, ret;
- int errors = 0;
l1_size2 = l1_size * sizeof(uint64_t);
/* Mark L1 table as used */
- errors += inc_refcounts(bs, refcount_table, refcount_table_size,
- l1_table_offset, l1_size2);
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l1_table_offset, l1_size2);
/* Read L1 table entries from disk */
- l1_table = qemu_malloc(l1_size2);
- if (bdrv_pread(s->hd, l1_table_offset,
- l1_table, l1_size2) != l1_size2)
- goto fail;
- for(i = 0;i < l1_size; i++)
- be64_to_cpus(&l1_table[i]);
+ if (l1_size2 == 0) {
+ l1_table = NULL;
+ } else {
+ l1_table = qemu_malloc(l1_size2);
+ if (bdrv_pread(bs->file, l1_table_offset,
+ l1_table, l1_size2) != l1_size2)
+ goto fail;
+ for(i = 0;i < l1_size; i++)
+ be64_to_cpus(&l1_table[i]);
+ }
/* Do the actual checks */
for(i = 0; i < l1_size; i++) {
@@ -744,41 +1053,44 @@ static int check_refcounts_l1(BlockDriverState *bs,
if (check_copied) {
refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
>> s->cluster_bits);
+ if (refcount < 0) {
+ fprintf(stderr, "Can't get refcount for l2_offset %"
+ PRIx64 ": %s\n", l2_offset, strerror(-refcount));
+ goto fail;
+ }
if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
" refcount=%d\n", l2_offset, refcount);
- errors++;
+ res->corruptions++;
}
}
/* Mark L2 table as used */
l2_offset &= ~QCOW_OFLAG_COPIED;
- errors += inc_refcounts(bs, refcount_table,
- refcount_table_size,
- l2_offset,
- s->cluster_size);
+ inc_refcounts(bs, res, refcount_table, refcount_table_size,
+ l2_offset, s->cluster_size);
/* L2 tables are cluster aligned */
if (l2_offset & (s->cluster_size - 1)) {
fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
"cluster aligned; L1 entry corrupted\n", l2_offset);
- errors++;
+ res->corruptions++;
}
/* Process and check L2 entries */
- ret = check_refcounts_l2(bs, refcount_table, refcount_table_size,
- l2_offset, check_copied);
+ ret = check_refcounts_l2(bs, res, refcount_table,
+ refcount_table_size, l2_offset, check_copied);
if (ret < 0) {
goto fail;
}
- errors += ret;
}
}
qemu_free(l1_table);
- return errors;
+ return 0;
fail:
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
+ res->check_errors++;
qemu_free(l1_table);
return -EIO;
}
@@ -789,66 +1101,102 @@ fail:
* Returns 0 if no errors are found, the number of errors in case the image is
* detected as corrupted, and -errno when an internal error occured.
*/
-int qcow2_check_refcounts(BlockDriverState *bs)
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res)
{
BDRVQcowState *s = bs->opaque;
int64_t size;
int nb_clusters, refcount1, refcount2, i;
QCowSnapshot *sn;
uint16_t *refcount_table;
- int ret, errors = 0;
+ int ret;
- size = bdrv_getlength(s->hd);
+ size = bdrv_getlength(bs->file);
nb_clusters = size_to_clusters(s, size);
refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
/* header */
- errors += inc_refcounts(bs, refcount_table, nb_clusters,
- 0, s->cluster_size);
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ 0, s->cluster_size);
/* current L1 table */
- ret = check_refcounts_l1(bs, refcount_table, nb_clusters,
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
s->l1_table_offset, s->l1_size, 1);
if (ret < 0) {
return ret;
}
- errors += ret;
/* snapshots */
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
- check_refcounts_l1(bs, refcount_table, nb_clusters,
- sn->l1_table_offset, sn->l1_size, 0);
+ ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
+ sn->l1_table_offset, sn->l1_size, 0);
+ if (ret < 0) {
+ return ret;
+ }
}
- errors += inc_refcounts(bs, refcount_table, nb_clusters,
- s->snapshots_offset, s->snapshots_size);
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->snapshots_offset, s->snapshots_size);
/* refcount data */
- errors += inc_refcounts(bs, refcount_table, nb_clusters,
- s->refcount_table_offset,
- s->refcount_table_size * sizeof(uint64_t));
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+
for(i = 0; i < s->refcount_table_size; i++) {
- int64_t offset;
+ uint64_t offset, cluster;
offset = s->refcount_table[i];
+ cluster = offset >> s->cluster_bits;
+
+ /* Refcount blocks are cluster aligned */
+ if (offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %d is not "
+ "cluster aligned; refcount table entry corrupted\n", i);
+ res->corruptions++;
+ continue;
+ }
+
+ if (cluster >= nb_clusters) {
+ fprintf(stderr, "ERROR refcount block %d is outside image\n", i);
+ res->corruptions++;
+ continue;
+ }
+
if (offset != 0) {
- errors += inc_refcounts(bs, refcount_table, nb_clusters,
- offset, s->cluster_size);
+ inc_refcounts(bs, res, refcount_table, nb_clusters,
+ offset, s->cluster_size);
+ if (refcount_table[cluster] != 1) {
+ fprintf(stderr, "ERROR refcount block %d refcount=%d\n",
+ i, refcount_table[cluster]);
+ res->corruptions++;
+ }
}
}
/* compare ref counts */
for(i = 0; i < nb_clusters; i++) {
refcount1 = get_refcount(bs, i);
+ if (refcount1 < 0) {
+ fprintf(stderr, "Can't get refcount for cluster %d: %s\n",
+ i, strerror(-refcount1));
+ res->check_errors++;
+ continue;
+ }
+
refcount2 = refcount_table[i];
if (refcount1 != refcount2) {
- fprintf(stderr, "ERROR cluster %d refcount=%d reference=%d\n",
+ fprintf(stderr, "%s cluster %d refcount=%d reference=%d\n",
+ refcount1 < refcount2 ? "ERROR" : "Leaked",
i, refcount1, refcount2);
- errors++;
+ if (refcount1 < refcount2) {
+ res->corruptions++;
+ } else {
+ res->leaks++;
+ }
}
}
qemu_free(refcount_table);
- return errors;
+ return 0;
}
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index e1e4d89..6228612 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -79,7 +79,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
s->snapshots = qemu_mallocz(s->nb_snapshots * sizeof(QCowSnapshot));
for(i = 0; i < s->nb_snapshots; i++) {
offset = align_offset(offset, 8);
- if (bdrv_pread(s->hd, offset, &h, sizeof(h)) != sizeof(h))
+ if (bdrv_pread(bs->file, offset, &h, sizeof(h)) != sizeof(h))
goto fail;
offset += sizeof(h);
sn = s->snapshots + i;
@@ -97,13 +97,13 @@ int qcow2_read_snapshots(BlockDriverState *bs)
offset += extra_data_size;
sn->id_str = qemu_malloc(id_str_size + 1);
- if (bdrv_pread(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
+ if (bdrv_pread(bs->file, offset, sn->id_str, id_str_size) != id_str_size)
goto fail;
offset += id_str_size;
sn->id_str[id_str_size] = '\0';
sn->name = qemu_malloc(name_size + 1);
- if (bdrv_pread(s->hd, offset, sn->name, name_size) != name_size)
+ if (bdrv_pread(bs->file, offset, sn->name, name_size) != name_size)
goto fail;
offset += name_size;
sn->name[name_size] = '\0';
@@ -139,6 +139,9 @@ static int qcow_write_snapshots(BlockDriverState *bs)
snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
offset = snapshots_offset;
+ if (offset < 0) {
+ return offset;
+ }
for(i = 0; i < s->nb_snapshots; i++) {
sn = s->snapshots + i;
@@ -155,25 +158,25 @@ static int qcow_write_snapshots(BlockDriverState *bs)
h.id_str_size = cpu_to_be16(id_str_size);
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
- if (bdrv_pwrite(s->hd, offset, &h, sizeof(h)) != sizeof(h))
+ if (bdrv_pwrite_sync(bs->file, offset, &h, sizeof(h)) < 0)
goto fail;
offset += sizeof(h);
- if (bdrv_pwrite(s->hd, offset, sn->id_str, id_str_size) != id_str_size)
+ if (bdrv_pwrite_sync(bs->file, offset, sn->id_str, id_str_size) < 0)
goto fail;
offset += id_str_size;
- if (bdrv_pwrite(s->hd, offset, sn->name, name_size) != name_size)
+ if (bdrv_pwrite_sync(bs->file, offset, sn->name, name_size) < 0)
goto fail;
offset += name_size;
}
/* update the various header fields */
data64 = cpu_to_be64(snapshots_offset);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, snapshots_offset),
- &data64, sizeof(data64)) != sizeof(data64))
+ if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, snapshots_offset),
+ &data64, sizeof(data64)) < 0)
goto fail;
data32 = cpu_to_be32(s->nb_snapshots);
- if (bdrv_pwrite(s->hd, offsetof(QCowHeader, nb_snapshots),
- &data32, sizeof(data32)) != sizeof(data32))
+ if (bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ &data32, sizeof(data32)) < 0)
goto fail;
/* free the old snapshot table */
@@ -235,6 +238,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
QCowSnapshot *snapshots1, sn1, *sn = &sn1;
int i, ret;
uint64_t *l1_table = NULL;
+ int64_t l1_table_offset;
memset(sn, 0, sizeof(*sn));
@@ -263,16 +267,25 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
/* create the L1 table of the snapshot */
- sn->l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+ l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
+ if (l1_table_offset < 0) {
+ goto fail;
+ }
+
+ sn->l1_table_offset = l1_table_offset;
sn->l1_size = s->l1_size;
- l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+ if (s->l1_size != 0) {
+ l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
+ } else {
+ l1_table = NULL;
+ }
+
for(i = 0; i < s->l1_size; i++) {
l1_table[i] = cpu_to_be64(s->l1_table[i]);
}
- if (bdrv_pwrite(s->hd, sn->l1_table_offset,
- l1_table, s->l1_size * sizeof(uint64_t)) !=
- (s->l1_size * sizeof(uint64_t)))
+ if (bdrv_pwrite_sync(bs->file, sn->l1_table_offset,
+ l1_table, s->l1_size * sizeof(uint64_t)) < 0)
goto fail;
qemu_free(l1_table);
l1_table = NULL;
@@ -288,7 +301,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
if (qcow_write_snapshots(bs) < 0)
goto fail;
#ifdef DEBUG_ALLOC
- check_refcounts(bs);
+ qcow2_check_refcounts(bs);
#endif
return 0;
fail:
@@ -318,11 +331,11 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
s->l1_size = sn->l1_size;
l1_size2 = s->l1_size * sizeof(uint64_t);
/* copy the snapshot l1 table to the current l1 table */
- if (bdrv_pread(s->hd, sn->l1_table_offset,
+ if (bdrv_pread(bs->file, sn->l1_table_offset,
s->l1_table, l1_size2) != l1_size2)
goto fail;
- if (bdrv_pwrite(s->hd, s->l1_table_offset,
- s->l1_table, l1_size2) != l1_size2)
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset,
+ s->l1_table, l1_size2) < 0)
goto fail;
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
@@ -332,7 +345,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
#ifdef DEBUG_ALLOC
- check_refcounts(bs);
+ qcow2_check_refcounts(bs);
#endif
return 0;
fail:
@@ -369,7 +382,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
return ret;
}
#ifdef DEBUG_ALLOC
- check_refcounts(bs);
+ qcow2_check_refcounts(bs);
#endif
return 0;
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 5ca20b2..a53014d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -44,10 +44,6 @@
- L2 tables have always a size of one cluster.
*/
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
typedef struct {
uint32_t magic;
@@ -56,8 +52,6 @@ typedef struct {
#define QCOW_EXT_MAGIC_END 0
#define QCOW_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-
-
static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
{
const QCowHeader *cow_header = (const void *)buf;
@@ -71,7 +65,7 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
}
-/*
+/*
* read qcow2 extension and fill bs
* start reading from start_offset
* finish reading upon magic of value 0 or when end_offset reached
@@ -81,7 +75,6 @@ static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uint64_t end_offset)
{
- BDRVQcowState *s = bs->opaque;
QCowExtension ext;
uint64_t offset;
@@ -99,9 +92,10 @@ static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
printf("attemting to read extended header in offset %lu\n", offset);
#endif
- if (bdrv_pread(s->hd, offset, &ext, sizeof(ext)) != sizeof(ext)) {
- fprintf(stderr, "qcow_handle_extension: ERROR: pread fail from offset %llu\n",
- (unsigned long long)offset);
+ if (bdrv_pread(bs->file, offset, &ext, sizeof(ext)) != sizeof(ext)) {
+ fprintf(stderr, "qcow_handle_extension: ERROR: "
+ "pread fail from offset %" PRIu64 "\n",
+ offset);
return 1;
}
be32_to_cpus(&ext.magic);
@@ -121,19 +115,19 @@ static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
ext.len, sizeof(bs->backing_format));
return 2;
}
- if (bdrv_pread(s->hd, offset , bs->backing_format,
+ if (bdrv_pread(bs->file, offset , bs->backing_format,
ext.len) != ext.len)
return 3;
bs->backing_format[ext.len] = '\0';
#ifdef DEBUG_EXT
printf("Qcow2: Got format extension %s\n", bs->backing_format);
#endif
- offset += ((ext.len + 7) & ~7);
+ offset = ((offset + ext.len + 7) & ~7);
break;
default:
/* unknown magic -- just skip it */
- offset += ((ext.len + 7) & ~7);
+ offset = ((offset + ext.len + 7) & ~7);
break;
}
}
@@ -142,25 +136,14 @@ static int qcow_read_extensions(BlockDriverState *bs, uint64_t start_offset,
}
-static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
+static int qcow_open(BlockDriverState *bs, int flags)
{
BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
+ int len, i;
QCowHeader header;
uint64_t ext_end;
- /* Performance is terrible right now with cache=writethrough due mainly
- * to reference count updates. If the user does not explicitly specify
- * a caching type, force to writeback caching.
- */
- if ((flags & BDRV_O_CACHE_DEF)) {
- flags |= BDRV_O_CACHE_WB;
- flags &= ~BDRV_O_CACHE_DEF;
- }
- ret = bdrv_file_open(&s->hd, filename, flags);
- if (ret < 0)
- return ret;
- if (bdrv_pread(s->hd, 0, &header, sizeof(header)) != sizeof(header))
+ if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header))
goto fail;
be32_to_cpus(&header.magic);
be32_to_cpus(&header.version);
@@ -178,8 +161,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
if (header.magic != QCOW_MAGIC || header.version != QCOW_VERSION)
goto fail;
- if (header.size <= 1 ||
- header.cluster_bits < MIN_CLUSTER_BITS ||
+ if (header.cluster_bits < MIN_CLUSTER_BITS ||
header.cluster_bits > MAX_CLUSTER_BITS)
goto fail;
if (header.crypt_method > QCOW_CRYPT_AES)
@@ -205,19 +187,21 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
/* read the level 1 table */
s->l1_size = header.l1_size;
- shift = s->cluster_bits + s->l2_bits;
- s->l1_vm_state_index = (header.size + (1LL << shift) - 1) >> shift;
+ s->l1_vm_state_index = size_to_l1(s, header.size);
/* the L1 table must contain at least enough entries to put
header.size bytes */
if (s->l1_size < s->l1_vm_state_index)
goto fail;
s->l1_table_offset = header.l1_table_offset;
- s->l1_table = qemu_malloc(s->l1_size * sizeof(uint64_t));
- if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
- s->l1_size * sizeof(uint64_t))
- goto fail;
- for(i = 0;i < s->l1_size; i++) {
- be64_to_cpus(&s->l1_table[i]);
+ if (s->l1_size > 0) {
+ s->l1_table = qemu_mallocz(
+ align_offset(s->l1_size * sizeof(uint64_t), 512));
+ if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)) !=
+ s->l1_size * sizeof(uint64_t))
+ goto fail;
+ for(i = 0;i < s->l1_size; i++) {
+ be64_to_cpus(&s->l1_table[i]);
+ }
}
/* alloc L2 cache */
s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
@@ -245,7 +229,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
len = header.backing_file_size;
if (len > 1023)
len = 1023;
- if (bdrv_pread(s->hd, header.backing_file_offset, bs->backing_file, len) != len)
+ if (bdrv_pread(bs->file, header.backing_file_offset, bs->backing_file, len) != len)
goto fail;
bs->backing_file[len] = '\0';
}
@@ -253,7 +237,7 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
goto fail;
#ifdef DEBUG_ALLOC
- check_refcounts(bs);
+ qcow2_check_refcounts(bs);
#endif
return 0;
@@ -264,7 +248,6 @@ static int qcow_open(BlockDriverState *bs, const char *filename, int flags)
qemu_free(s->l2_cache);
qemu_free(s->cluster_cache);
qemu_free(s->cluster_data);
- bdrv_delete(s->hd);
return -1;
}
@@ -314,9 +297,15 @@ static int qcow_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
uint64_t cluster_offset;
+ int ret;
*pnum = nb_sectors;
- cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, pnum);
+ /* FIXME We can get errors here, but the bdrv_is_allocated interface can't
+ * pass them on today */
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ if (ret < 0) {
+ *pnum = 0;
+ }
return (cluster_offset != 0);
}
@@ -342,8 +331,8 @@ typedef struct QCowAIOCB {
QEMUIOVector *qiov;
uint8_t *buf;
void *orig_buf;
- int nb_sectors;
- int n;
+ int remaining_sectors;
+ int cur_nr_sectors; /* number of sectors in current iteration */
uint64_t cluster_offset;
uint8_t *cluster_data;
BlockDriverAIOCB *hd_aiocb;
@@ -351,11 +340,12 @@ typedef struct QCowAIOCB {
QEMUIOVector hd_qiov;
QEMUBH *bh;
QCowL2Meta l2meta;
+ QLIST_ENTRY(QCowAIOCB) next_depend;
} QCowAIOCB;
static void qcow_aio_cancel(BlockDriverAIOCB *blockacb)
{
- QCowAIOCB *acb = (QCowAIOCB *)blockacb;
+ QCowAIOCB *acb = container_of(blockacb, QCowAIOCB, common);
if (acb->hd_aiocb)
bdrv_aio_cancel(acb->hd_aiocb);
qemu_aio_release(acb);
@@ -408,38 +398,43 @@ static void qcow_aio_read_cb(void *opaque, int ret)
} else {
if (s->crypt_method) {
qcow2_encrypt_sectors(s, acb->sector_num, acb->buf, acb->buf,
- acb->n, 0,
+ acb->cur_nr_sectors, 0,
&s->aes_decrypt_key);
}
}
- acb->nb_sectors -= acb->n;
- acb->sector_num += acb->n;
- acb->buf += acb->n * 512;
+ acb->remaining_sectors -= acb->cur_nr_sectors;
+ acb->sector_num += acb->cur_nr_sectors;
+ acb->buf += acb->cur_nr_sectors * 512;
- if (acb->nb_sectors == 0) {
+ if (acb->remaining_sectors == 0) {
/* request completed */
ret = 0;
goto done;
}
/* prepare next AIO request */
- acb->n = acb->nb_sectors;
- acb->cluster_offset =
- qcow2_get_cluster_offset(bs, acb->sector_num << 9, &acb->n);
+ acb->cur_nr_sectors = acb->remaining_sectors;
+ ret = qcow2_get_cluster_offset(bs, acb->sector_num << 9,
+ &acb->cur_nr_sectors, &acb->cluster_offset);
+ if (ret < 0) {
+ goto done;
+ }
+
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
if (!acb->cluster_offset) {
if (bs->backing_hd) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing_hd, acb->sector_num,
- acb->buf, acb->n);
+ acb->buf, acb->cur_nr_sectors);
if (n1 > 0) {
acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
acb->hd_aiocb = bdrv_aio_readv(bs->backing_hd, acb->sector_num,
- &acb->hd_qiov, acb->n,
+ &acb->hd_qiov, acb->cur_nr_sectors,
qcow_aio_read_cb, acb);
if (acb->hd_aiocb == NULL)
goto done;
@@ -450,17 +445,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
} else {
/* Note: in this case, no need to wait */
- memset(acb->buf, 0, 512 * acb->n);
+ memset(acb->buf, 0, 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
}
} else if (acb->cluster_offset & QCOW_OFLAG_COMPRESSED) {
/* add AIO support for compressed blocks ? */
- if (qcow2_decompress_cluster(s, acb->cluster_offset) < 0)
+ if (qcow2_decompress_cluster(bs, acb->cluster_offset) < 0)
goto done;
- memcpy(acb->buf,
- s->cluster_cache + index_in_cluster * 512, 512 * acb->n);
+ memcpy(acb->buf, s->cluster_cache + index_in_cluster * 512,
+ 512 * acb->cur_nr_sectors);
ret = qcow_schedule_bh(qcow_aio_read_bh, acb);
if (ret < 0)
goto done;
@@ -471,13 +466,17 @@ static void qcow_aio_read_cb(void *opaque, int ret)
}
acb->hd_iov.iov_base = (void *)acb->buf;
- acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_readv(s->hd,
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ acb->hd_aiocb = bdrv_aio_readv(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->n, qcow_aio_read_cb, acb);
- if (acb->hd_aiocb == NULL)
+ &acb->hd_qiov, acb->cur_nr_sectors,
+ qcow_aio_read_cb, acb);
+ if (acb->hd_aiocb == NULL) {
+ ret = -EIO;
goto done;
+ }
}
return;
@@ -509,10 +508,11 @@ static QCowAIOCB *qcow_aio_setup(BlockDriverState *bs,
} else {
acb->buf = (uint8_t *)qiov->iov->iov_base;
}
- acb->nb_sectors = nb_sectors;
- acb->n = 0;
+ acb->remaining_sectors = nb_sectors;
+ acb->cur_nr_sectors = 0;
acb->cluster_offset = 0;
acb->l2meta.nb_clusters = 0;
+ QLIST_INIT(&acb->l2meta.dependent_requests);
return acb;
}
@@ -530,6 +530,27 @@ static BlockDriverAIOCB *qcow_aio_readv(BlockDriverState *bs,
return &acb->common;
}
+static void qcow_aio_write_cb(void *opaque, int ret);
+
+static void run_dependent_requests(QCowL2Meta *m)
+{
+ QCowAIOCB *req;
+ QCowAIOCB *next;
+
+ /* Take the request off the list of running requests */
+ if (m->nb_clusters != 0) {
+ QLIST_REMOVE(m, next_in_flight);
+ }
+
+ /* Restart all dependent requests */
+ QLIST_FOREACH_SAFE(req, &m->dependent_requests, next_depend, next) {
+ qcow_aio_write_cb(req, 0);
+ }
+
+ /* Empty the list for the next part of the request */
+ QLIST_INIT(&m->dependent_requests);
+}
+
static void qcow_aio_write_cb(void *opaque, int ret)
{
QCowAIOCB *acb = opaque;
@@ -541,60 +562,78 @@ static void qcow_aio_write_cb(void *opaque, int ret)
acb->hd_aiocb = NULL;
- if (ret < 0)
- goto done;
+ if (ret >= 0) {
+ ret = qcow2_alloc_cluster_link_l2(bs, &acb->l2meta);
+ }
- if (qcow2_alloc_cluster_link_l2(bs, acb->cluster_offset, &acb->l2meta) < 0) {
- qcow2_free_any_clusters(bs, acb->cluster_offset, acb->l2meta.nb_clusters);
+ run_dependent_requests(&acb->l2meta);
+
+ if (ret < 0)
goto done;
- }
- acb->nb_sectors -= acb->n;
- acb->sector_num += acb->n;
- acb->buf += acb->n * 512;
+ acb->remaining_sectors -= acb->cur_nr_sectors;
+ acb->sector_num += acb->cur_nr_sectors;
+ acb->buf += acb->cur_nr_sectors * 512;
- if (acb->nb_sectors == 0) {
+ if (acb->remaining_sectors == 0) {
/* request completed */
ret = 0;
goto done;
}
index_in_cluster = acb->sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + acb->nb_sectors;
+ n_end = index_in_cluster + acb->remaining_sectors;
if (s->crypt_method &&
n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- acb->cluster_offset = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
- index_in_cluster,
- n_end, &acb->n, &acb->l2meta);
- if (!acb->cluster_offset || (acb->cluster_offset & 511) != 0) {
- ret = -EIO;
+ ret = qcow2_alloc_cluster_offset(bs, acb->sector_num << 9,
+ index_in_cluster, n_end, &acb->cur_nr_sectors, &acb->l2meta);
+ if (ret < 0) {
goto done;
}
+
+ acb->cluster_offset = acb->l2meta.cluster_offset;
+
+ /* Need to wait for another request? If so, we are done for now. */
+ if (acb->l2meta.nb_clusters == 0 && acb->l2meta.depends_on != NULL) {
+ QLIST_INSERT_HEAD(&acb->l2meta.depends_on->dependent_requests,
+ acb, next_depend);
+ return;
+ }
+
+ assert((acb->cluster_offset & 511) == 0);
+
if (s->crypt_method) {
if (!acb->cluster_data) {
acb->cluster_data = qemu_mallocz(QCOW_MAX_CRYPT_CLUSTERS *
s->cluster_size);
}
qcow2_encrypt_sectors(s, acb->sector_num, acb->cluster_data, acb->buf,
- acb->n, 1, &s->aes_encrypt_key);
+ acb->cur_nr_sectors, 1, &s->aes_encrypt_key);
src_buf = acb->cluster_data;
} else {
src_buf = acb->buf;
}
acb->hd_iov.iov_base = (void *)src_buf;
- acb->hd_iov.iov_len = acb->n * 512;
+ acb->hd_iov.iov_len = acb->cur_nr_sectors * 512;
qemu_iovec_init_external(&acb->hd_qiov, &acb->hd_iov, 1);
- acb->hd_aiocb = bdrv_aio_writev(s->hd,
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ acb->hd_aiocb = bdrv_aio_writev(bs->file,
(acb->cluster_offset >> 9) + index_in_cluster,
- &acb->hd_qiov, acb->n,
+ &acb->hd_qiov, acb->cur_nr_sectors,
qcow_aio_write_cb, acb);
- if (acb->hd_aiocb == NULL)
- goto done;
+ if (acb->hd_aiocb == NULL) {
+ ret = -EIO;
+ goto fail;
+ }
return;
+fail:
+ if (acb->l2meta.nb_clusters != 0) {
+ QLIST_REMOVE(&acb->l2meta, next_in_flight);
+ }
done:
if (acb->qiov->niov > 1)
qemu_vfree(acb->orig_buf);
@@ -627,7 +666,105 @@ static void qcow_close(BlockDriverState *bs)
qemu_free(s->cluster_cache);
qemu_free(s->cluster_data);
qcow2_refcount_close(bs);
- bdrv_delete(s->hd);
+}
+
+/*
+ * Updates the variable length parts of the qcow2 header, i.e. the backing file
+ * name and all extensions. qcow2 was not designed to allow such changes, so if
+ * we run out of space (we can only use the first cluster) this function may
+ * fail.
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+static int qcow2_update_ext_header(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ size_t backing_file_len = 0;
+ size_t backing_fmt_len = 0;
+ BDRVQcowState *s = bs->opaque;
+ QCowExtension ext_backing_fmt = {0, 0};
+ int ret;
+
+ /* Backing file format doesn't make sense without a backing file */
+ if (backing_fmt && !backing_file) {
+ return -EINVAL;
+ }
+
+ /* Prepare the backing file format extension if needed */
+ if (backing_fmt) {
+ ext_backing_fmt.len = cpu_to_be32(strlen(backing_fmt));
+ ext_backing_fmt.magic = cpu_to_be32(QCOW_EXT_MAGIC_BACKING_FORMAT);
+ backing_fmt_len = ((sizeof(ext_backing_fmt)
+ + strlen(backing_fmt) + 7) & ~7);
+ }
+
+ /* Check if we can fit the new header into the first cluster */
+ if (backing_file) {
+ backing_file_len = strlen(backing_file);
+ }
+
+ size_t header_size = sizeof(QCowHeader) + backing_file_len
+ + backing_fmt_len;
+
+ if (header_size > s->cluster_size) {
+ return -ENOSPC;
+ }
+
+ /* Rewrite backing file name and qcow2 extensions */
+ size_t ext_size = header_size - sizeof(QCowHeader);
+ uint8_t buf[ext_size];
+ size_t offset = 0;
+ size_t backing_file_offset = 0;
+
+ if (backing_file) {
+ if (backing_fmt) {
+ int padding = backing_fmt_len -
+ (sizeof(ext_backing_fmt) + strlen(backing_fmt));
+
+ memcpy(buf + offset, &ext_backing_fmt, sizeof(ext_backing_fmt));
+ offset += sizeof(ext_backing_fmt);
+
+ memcpy(buf + offset, backing_fmt, strlen(backing_fmt));
+ offset += strlen(backing_fmt);
+
+ memset(buf + offset, 0, padding);
+ offset += padding;
+ }
+
+ memcpy(buf + offset, backing_file, backing_file_len);
+ backing_file_offset = sizeof(QCowHeader) + offset;
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, sizeof(QCowHeader), buf, ext_size);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Update header fields */
+ uint64_t be_backing_file_offset = cpu_to_be64(backing_file_offset);
+ uint32_t be_backing_file_size = cpu_to_be32(backing_file_len);
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_offset),
+ &be_backing_file_offset, sizeof(uint64_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_size),
+ &be_backing_file_size, sizeof(uint32_t));
+ if (ret < 0) {
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ return ret;
+}
+
+static int qcow2_change_backing_file(BlockDriverState *bs,
+ const char *backing_file, const char *backing_fmt)
+{
+ return qcow2_update_ext_header(bs, backing_file, backing_fmt);
}
static int get_bits_from_size(size_t size)
@@ -651,52 +788,80 @@ static int get_bits_from_size(size_t size)
return res;
}
-static int write_all(int fd, const void *buff, size_t bufsize)
+
+static int preallocate(BlockDriverState *bs)
{
- int ret = 0;
- const char *ptr = buff;
- while (bufsize > 0) {
- ret = write(fd, ptr, bufsize);
+ uint64_t nb_sectors;
+ uint64_t offset;
+ int num;
+ int ret;
+ QCowL2Meta meta;
+
+ nb_sectors = bdrv_getlength(bs) >> 9;
+ offset = 0;
+ QLIST_INIT(&meta.dependent_requests);
+ meta.cluster_offset = 0;
+
+ while (nb_sectors) {
+ num = MIN(nb_sectors, INT_MAX >> 9);
+ ret = qcow2_alloc_cluster_offset(bs, offset, 0, num, &num, &meta);
if (ret < 0) {
- if (errno != EINTR)
- return -1;
- } else {
- bufsize -= ret;
+ return ret;
}
- }
- return 0;
-}
-static int lseek_to(int fd, off_t offset)
-{
- off_t ret;
- do {
- ret = lseek(fd, offset, SEEK_SET);
- } while (ret == (off_t)-1 && errno == EINTR);
+ ret = qcow2_alloc_cluster_link_l2(bs, &meta);
+ if (ret < 0) {
+ qcow2_free_any_clusters(bs, meta.cluster_offset, meta.nb_clusters);
+ return ret;
+ }
- if (ret == (off_t)-1)
- return -1;
+ /* There are no dependent requests, but we need to remove our request
+ * from the list of in-flight requests */
+ run_dependent_requests(&meta);
+
+ /* TODO Preallocate data if requested */
+
+ nb_sectors -= num;
+ offset += num << 9;
+ }
+
+ /*
+ * It is expected that the image file is large enough to actually contain
+ * all of the allocated clusters (otherwise we get failing reads after
+ * EOF). Extend the image to the last allocated sector.
+ */
+ if (meta.cluster_offset != 0) {
+ uint8_t buf[512];
+ memset(buf, 0, 512);
+ ret = bdrv_write(bs->file, (meta.cluster_offset >> 9) + num - 1, buf, 1);
+ if (ret < 0) {
+ return ret;
+ }
+ }
return 0;
}
static int qcow_create2(const char *filename, int64_t total_size,
const char *backing_file, const char *backing_format,
- int flags, size_t cluster_size)
+ int flags, size_t cluster_size, int prealloc)
{
int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
- int ref_clusters, backing_format_len = 0;
+ int ref_clusters, reftable_clusters, backing_format_len = 0;
+ int rounded_ext_bf_len = 0;
QCowHeader header;
uint64_t tmp, offset;
+ uint64_t old_ref_clusters;
QCowCreateState s1, *s = &s1;
QCowExtension ext_bf = {0, 0};
+ int ret;
memset(s, 0, sizeof(*s));
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
if (fd < 0)
- return -1;
+ return -errno;
memset(&header, 0, sizeof(header));
header.magic = cpu_to_be32(QCOW_MAGIC);
header.version = cpu_to_be32(QCOW_VERSION);
@@ -707,8 +872,9 @@ static int qcow_create2(const char *filename, int64_t total_size,
if (backing_format) {
ext_bf.magic = QCOW_EXT_MAGIC_BACKING_FORMAT;
backing_format_len = strlen(backing_format);
- ext_bf.len = (backing_format_len + 7) & ~7;
- header_size += ((sizeof(ext_bf) + ext_bf.len + 7) & ~7);
+ ext_bf.len = backing_format_len;
+ rounded_ext_bf_len = (sizeof(ext_bf) + ext_bf.len + 7) & ~7;
+ header_size += rounded_ext_bf_len;
}
header.backing_file_offset = cpu_to_be64(header_size);
backing_filename_len = strlen(backing_file);
@@ -745,17 +911,37 @@ static int qcow_create2(const char *filename, int64_t total_size,
header.l1_size = cpu_to_be32(l1_size);
offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
- s->refcount_table = qemu_mallocz(s->cluster_size);
+ /* count how many refcount blocks needed */
+
+#define NUM_CLUSTERS(bytes) \
+ (((bytes) + (s->cluster_size) - 1) / (s->cluster_size))
+
+ ref_clusters = NUM_CLUSTERS(NUM_CLUSTERS(offset) * sizeof(uint16_t));
+
+ do {
+ uint64_t image_clusters;
+ old_ref_clusters = ref_clusters;
+
+ /* Number of clusters used for the refcount table */
+ reftable_clusters = NUM_CLUSTERS(ref_clusters * sizeof(uint64_t));
+
+ /* Number of clusters that the whole image will have */
+ image_clusters = NUM_CLUSTERS(offset) + ref_clusters
+ + reftable_clusters;
+
+ /* Number of refcount blocks needed for the image */
+ ref_clusters = NUM_CLUSTERS(image_clusters * sizeof(uint16_t));
+
+ } while (ref_clusters != old_ref_clusters);
+
+ s->refcount_table = qemu_mallocz(reftable_clusters * s->cluster_size);
s->refcount_table_offset = offset;
header.refcount_table_offset = cpu_to_be64(offset);
- header.refcount_table_clusters = cpu_to_be32(1);
- offset += s->cluster_size;
+ header.refcount_table_clusters = cpu_to_be32(reftable_clusters);
+ offset += (reftable_clusters * s->cluster_size);
s->refcount_block_offset = offset;
- /* count how many refcount blocks needed */
- tmp = offset >> s->cluster_bits;
- ref_clusters = (tmp >> (s->cluster_bits - REFCOUNT_SHIFT)) + 1;
for (i=0; i < ref_clusters; i++) {
s->refcount_table[i] = cpu_to_be64(offset);
offset += s->cluster_size;
@@ -767,57 +953,91 @@ static int qcow_create2(const char *filename, int64_t total_size,
qcow2_create_refcount_update(s, 0, header_size);
qcow2_create_refcount_update(s, s->l1_table_offset,
l1_size * sizeof(uint64_t));
- qcow2_create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
+ qcow2_create_refcount_update(s, s->refcount_table_offset,
+ reftable_clusters * s->cluster_size);
qcow2_create_refcount_update(s, s->refcount_block_offset,
ref_clusters * s->cluster_size);
/* write all the data */
- if (write_all(fd, &header, sizeof(header)) < 0)
- goto FAIL;
+ ret = qemu_write_full(fd, &header, sizeof(header));
+ if (ret != sizeof(header)) {
+ ret = -errno;
+ goto exit;
+ }
if (backing_file) {
if (backing_format_len) {
char zero[16];
- int d = ext_bf.len - backing_format_len;
+ int padding = rounded_ext_bf_len - (ext_bf.len + sizeof(ext_bf));
memset(zero, 0, sizeof(zero));
cpu_to_be32s(&ext_bf.magic);
cpu_to_be32s(&ext_bf.len);
- if (write_all(fd, &ext_bf, sizeof(ext_bf)) < 0 ||
- write_all(fd, backing_format, backing_format_len) < 0)
- goto FAIL;
- if (d>0) {
- if (write_all(fd, zero, d) < 0)
- goto FAIL;
+ ret = qemu_write_full(fd, &ext_bf, sizeof(ext_bf));
+ if (ret != sizeof(ext_bf)) {
+ ret = -errno;
+ goto exit;
}
+ ret = qemu_write_full(fd, backing_format, backing_format_len);
+ if (ret != backing_format_len) {
+ ret = -errno;
+ goto exit;
+ }
+ if (padding > 0) {
+ ret = qemu_write_full(fd, zero, padding);
+ if (ret != padding) {
+ ret = -errno;
+ goto exit;
+ }
+ }
+ }
+ ret = qemu_write_full(fd, backing_file, backing_filename_len);
+ if (ret != backing_filename_len) {
+ ret = -errno;
+ goto exit;
}
- if (write_all(fd, backing_file, backing_filename_len) < 0)
- goto FAIL;
}
- if (lseek_to(fd, s->l1_table_offset) < 0)
- goto FAIL;
-
+ lseek(fd, s->l1_table_offset, SEEK_SET);
tmp = 0;
for(i = 0;i < l1_size; i++) {
- if (write_all(fd, &tmp, sizeof(tmp)) < 0)
- goto FAIL;
+ ret = qemu_write_full(fd, &tmp, sizeof(tmp));
+ if (ret != sizeof(tmp)) {
+ ret = -errno;
+ goto exit;
+ }
+ }
+ lseek(fd, s->refcount_table_offset, SEEK_SET);
+ ret = qemu_write_full(fd, s->refcount_table,
+ reftable_clusters * s->cluster_size);
+ if (ret != reftable_clusters * s->cluster_size) {
+ ret = -errno;
+ goto exit;
}
- if (lseek_to(fd, s->refcount_table_offset) < 0 ||
- write_all(fd, s->refcount_table, s->cluster_size) < 0)
- goto FAIL;
- if (lseek_to(fd, s->refcount_block_offset) < 0 ||
- write_all(fd, s->refcount_block, ref_clusters * s->cluster_size) < 0)
- goto FAIL;
+ lseek(fd, s->refcount_block_offset, SEEK_SET);
+ ret = qemu_write_full(fd, s->refcount_block,
+ ref_clusters * s->cluster_size);
+ if (ret != ref_clusters * s->cluster_size) {
+ ret = -errno;
+ goto exit;
+ }
+ ret = 0;
+exit:
qemu_free(s->refcount_table);
qemu_free(s->refcount_block);
close(fd);
- return 0;
-FAIL:
- qemu_free(s->refcount_table);
- qemu_free(s->refcount_block);
- close(fd);
- return -errno;
+
+ /* Preallocate metadata */
+ if (ret == 0 && prealloc) {
+ BlockDriverState *bs;
+ BlockDriver *drv = bdrv_find_format("qcow2");
+ bs = bdrv_new("");
+ bdrv_open(bs, filename, BDRV_O_CACHE_WB | BDRV_O_RDWR, drv);
+ ret = preallocate(bs);
+ bdrv_close(bs);
+ }
+
+ return ret;
}
static int qcow_create(const char *filename, QEMUOptionParameter *options)
@@ -827,6 +1047,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
uint64_t sectors = 0;
int flags = 0;
size_t cluster_size = 65536;
+ int prealloc = 0;
/* Read out options */
while (options && options->name) {
@@ -842,12 +1063,28 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options)
if (options->value.n) {
cluster_size = options->value.n;
}
+ } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
+ if (!options->value.s || !strcmp(options->value.s, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(options->value.s, "metadata")) {
+ prealloc = 1;
+ } else {
+ fprintf(stderr, "Invalid preallocation mode: '%s'\n",
+ options->value.s);
+ return -EINVAL;
+ }
}
options++;
}
+ if (backing_file && prealloc) {
+ fprintf(stderr, "Backing file and preallocation cannot be used at "
+ "the same time\n");
+ return -EINVAL;
+ }
+
return qcow_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size);
+ cluster_size, prealloc);
}
static int qcow_make_empty(BlockDriverState *bs)
@@ -859,9 +1096,9 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite(s->hd, s->l1_table_offset, s->l1_table, l1_length) < 0)
+ if (bdrv_pwrite(bs->file, s->l1_table_offset, s->l1_table, l1_length) < 0)
return -1;
- ret = bdrv_truncate(s->hd, s->l1_table_offset + l1_length);
+ ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -870,51 +1107,40 @@ static int qcow_make_empty(BlockDriverState *bs)
return 0;
}
-/**
- * Write data synchronously
- */
-static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
{
BDRVQcowState *s = bs->opaque;
- int ret, index_in_cluster, n;
- uint64_t cluster_offset;
- int n_end;
- QCowL2Meta l2meta;
+ int ret, new_l1_size;
- while (nb_sectors > 0) {
- memset(&l2meta, 0, sizeof(l2meta));
-
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- n_end = index_in_cluster + nb_sectors;
- if (s->crypt_method &&
- n_end > QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors)
- n_end = QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors;
- cluster_offset = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- index_in_cluster,
- n_end, &n, &l2meta);
- if (!cluster_offset)
- return -1;
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, s->cluster_data, buf, n, 1,
- &s->aes_encrypt_key);
- ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512,
- s->cluster_data, n * 512);
- } else {
- ret = bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
- }
- if (ret != n * 512 || qcow2_alloc_cluster_link_l2(bs, cluster_offset, &l2meta) < 0) {
- qcow2_free_any_clusters(bs, cluster_offset, l2meta.nb_clusters);
- return -1;
- }
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
- if (l2meta.nb_clusters != 0) {
- QLIST_REMOVE(&l2meta, next_in_flight);
- }
+ if (offset & 511) {
+ return -EINVAL;
}
- s->cluster_cache_offset = -1; /* disable compressed cache */
+
+ /* cannot proceed if image has snapshots */
+ if (s->nb_snapshots) {
+ return -ENOTSUP;
+ }
+
+ /* shrinking is currently not supported */
+ if (offset < bs->total_sectors * 512) {
+ return -ENOTSUP;
+ }
+
+ new_l1_size = size_to_l1(s, offset);
+ ret = qcow2_grow_l1_table(bs, new_l1_size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* write updated header.size */
+ offset = cpu_to_be64(offset);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ &offset, sizeof(uint64_t));
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->l1_vm_state_index = new_l1_size;
return 0;
}
@@ -932,9 +1158,9 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (nb_sectors == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
- cluster_offset = bdrv_getlength(s->hd);
+ cluster_offset = bdrv_getlength(bs->file);
cluster_offset = (cluster_offset + 511) & ~511;
- bdrv_truncate(s->hd, cluster_offset);
+ bdrv_truncate(bs->file, cluster_offset);
return 0;
}
@@ -977,7 +1203,8 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (!cluster_offset)
return -1;
cluster_offset &= s->cluster_offset_mask;
- if (bdrv_pwrite(s->hd, cluster_offset, out_buf, out_len) != out_len) {
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+ if (bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len) != out_len) {
qemu_free(out_buf);
return -1;
}
@@ -989,13 +1216,18 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
static void qcow_flush(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
- bdrv_flush(s->hd);
+ bdrv_flush(bs->file);
+}
+
+static BlockDriverAIOCB *qcow_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_flush(bs->file, cb, opaque);
}
static int64_t qcow_vm_state_offset(BDRVQcowState *s)
{
- return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
+ return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
}
static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -1007,9 +1239,9 @@ static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
}
-static int qcow_check(BlockDriverState *bs)
+static int qcow_check(BlockDriverState *bs, BdrvCheckResult *result)
{
- return qcow2_check_refcounts(bs);
+ return qcow2_check_refcounts(bs, result);
}
#if 0
@@ -1019,7 +1251,7 @@ static void dump_refcounts(BlockDriverState *bs)
int64_t nb_clusters, k, k1, size;
int refcount;
- size = bdrv_getlength(s->hd);
+ size = bdrv_getlength(bs->file);
nb_clusters = size_to_clusters(s, size);
for(k = 0; k < nb_clusters;) {
k1 = k;
@@ -1027,31 +1259,35 @@ static void dump_refcounts(BlockDriverState *bs)
k++;
while (k < nb_clusters && get_refcount(bs, k) == refcount)
k++;
- printf("%lld: refcount=%d nb=%lld\n", k, refcount, k - k1);
+ printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
+ k - k1);
}
}
#endif
-static int qcow_put_buffer(BlockDriverState *bs, const uint8_t *buf,
+static int qcow_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size)
{
BDRVQcowState *s = bs->opaque;
int growable = bs->growable;
+ int ret;
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
bs->growable = 1;
- bdrv_pwrite(bs, qcow_vm_state_offset(s) + pos, buf, size);
+ ret = bdrv_pwrite(bs, qcow_vm_state_offset(s) + pos, buf, size);
bs->growable = growable;
- return size;
+ return ret;
}
-static int qcow_get_buffer(BlockDriverState *bs, uint8_t *buf,
+static int qcow_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
BDRVQcowState *s = bs->opaque;
int growable = bs->growable;
int ret;
+ BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
bs->growable = 1;
ret = bdrv_pread(bs, qcow_vm_state_offset(s) + pos, buf, size);
bs->growable = growable;
@@ -1085,6 +1321,11 @@ static QEMUOptionParameter qcow_create_options[] = {
.type = OPT_SIZE,
.help = "qcow2 cluster size"
},
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, metadata)"
+ },
{ NULL }
};
@@ -1100,11 +1341,12 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_set_key = qcow_set_key,
.bdrv_make_empty = qcow_make_empty,
- .bdrv_read = qcow2_read,
- .bdrv_write = qcow2_write,
- .bdrv_aio_readv = qcow_aio_readv,
- .bdrv_aio_writev = qcow_aio_writev,
- .bdrv_write_compressed = qcow_write_compressed,
+ .bdrv_aio_readv = qcow_aio_readv,
+ .bdrv_aio_writev = qcow_aio_writev,
+ .bdrv_aio_flush = qcow_aio_flush,
+
+ .bdrv_truncate = qcow2_truncate,
+ .bdrv_write_compressed = qcow_write_compressed,
.bdrv_snapshot_create = qcow2_snapshot_create,
.bdrv_snapshot_goto = qcow2_snapshot_goto,
@@ -1112,8 +1354,10 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_snapshot_list = qcow2_snapshot_list,
.bdrv_get_info = qcow_get_info,
- .bdrv_put_buffer = qcow_put_buffer,
- .bdrv_get_buffer = qcow_get_buffer,
+ .bdrv_save_vmstate = qcow_save_vmstate,
+ .bdrv_load_vmstate = qcow_load_vmstate,
+
+ .bdrv_change_backing_file = qcow2_change_backing_file,
.create_options = qcow_create_options,
.bdrv_check = qcow_check,
diff --git a/block/qcow2.h b/block/qcow2.h
index 542292d..3ff162e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -27,6 +27,10 @@
#include "aes.h"
+//#define DEBUG_ALLOC
+//#define DEBUG_ALLOC2
+//#define DEBUG_EXT
+
#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
#define QCOW_VERSION 2
@@ -43,7 +47,7 @@
#define REFCOUNT_SHIFT 1 /* refcount size is 2 bytes */
#define MIN_CLUSTER_BITS 9
-#define MAX_CLUSTER_BITS 16
+#define MAX_CLUSTER_BITS 21
#define L2_CACHE_SIZE 16
@@ -125,14 +129,18 @@ typedef struct QCowCreateState {
int64_t refcount_block_offset;
} QCowCreateState;
+struct QCowAIOCB;
+
/* XXX This could be private for qcow2-cluster.c */
typedef struct QCowL2Meta
{
uint64_t offset;
+ uint64_t cluster_offset;
int n_start;
int nb_available;
int nb_clusters;
struct QCowL2Meta *depends_on;
+ QLIST_HEAD(QCowAioDependencies, QCowAIOCB) dependent_requests;
QLIST_ENTRY(QCowL2Meta) next_in_flight;
} QCowL2Meta;
@@ -142,6 +150,12 @@ static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
}
+static inline int size_to_l1(BDRVQcowState *s, int64_t size)
+{
+ int shift = s->cluster_bits + s->l2_bits;
+ return (size + (1ULL << shift) - 1) >> shift;
+}
+
static inline int64_t align_offset(int64_t offset, int n)
{
offset = (offset + n - 1) & ~(n - 1);
@@ -171,32 +185,26 @@ void qcow2_create_refcount_update(QCowCreateState *s, int64_t offset,
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend);
-int qcow2_check_refcounts(BlockDriverState *bs);
+int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res);
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, int min_size);
void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset);
+int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, int enc,
const AES_KEY *key);
-uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num);
-uint64_t qcow2_alloc_cluster_offset(BlockDriverState *bs,
- uint64_t offset,
- int n_start, int n_end,
- int *num, QCowL2Meta *m);
+int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int *num, uint64_t *cluster_offset);
+int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
+ int n_start, int n_end, int *num, QCowL2Meta *m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size);
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, uint64_t cluster_offset,
- QCowL2Meta *m);
-
-int qcow2_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors);
+int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 0e9e343..72fb8ce 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -24,11 +24,10 @@
#include "qemu-common.h"
#include "qemu-timer.h"
#include "qemu-char.h"
+#include "qemu-log.h"
#include "block_int.h"
#include "module.h"
-#ifdef CONFIG_AIO
-#include "posix-aio-compat.h"
-#endif
+#include "block/raw-posix-aio.h"
#ifdef CONFIG_COCOA
#include <paths.h>
@@ -52,7 +51,7 @@
#include <linux/cdrom.h>
#include <linux/fd.h>
#endif
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
#include <signal.h>
#include <sys/disk.h>
#include <sys/cdio.h>
@@ -81,7 +80,11 @@
/* OS X does not have O_DSYNC */
#ifndef O_DSYNC
+#ifdef O_SYNC
#define O_DSYNC O_SYNC
+#elif defined(O_FSYNC)
+#define O_DSYNC O_FSYNC
+#endif
#endif
/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
@@ -102,7 +105,6 @@
typedef struct BDRVRawState {
int fd;
int type;
- unsigned int lseek_err_cnt;
int open_flags;
#if defined(__linux__)
/* linux floppy specific */
@@ -111,14 +113,17 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
+#ifdef CONFIG_LINUX_AIO
+ int use_aio;
+ void *aio_ctx;
+#endif
uint8_t* aligned_buf;
} BDRVRawState;
-static int posix_aio_init(void);
-
static int fd_open(BlockDriverState *bs);
+static int64_t raw_getlength(BlockDriverState *bs);
-#if defined(__FreeBSD__)
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int cdrom_reopen(BlockDriverState *bs);
#endif
@@ -128,17 +133,12 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
BDRVRawState *s = bs->opaque;
int fd, ret;
- posix_aio_init();
-
- s->lseek_err_cnt = 0;
-
s->open_flags = open_flags | O_BINARY;
s->open_flags &= ~O_ACCMODE;
- if ((bdrv_flags & BDRV_O_ACCESS) == BDRV_O_RDWR) {
+ if (bdrv_flags & BDRV_O_RDWR) {
s->open_flags |= O_RDWR;
} else {
s->open_flags |= O_RDONLY;
- bs->read_only = 1;
}
/* Use O_DSYNC for write-through caching, no flags for write-back caching,
@@ -149,7 +149,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
s->open_flags |= O_DSYNC;
s->fd = -1;
- fd = open(filename, s->open_flags, 0644);
+ fd = qemu_open(filename, s->open_flags, 0644);
if (fd < 0) {
ret = -errno;
if (ret == -EROFS)
@@ -158,27 +158,52 @@ static int raw_open_common(BlockDriverState *bs, const char *filename,
}
s->fd = fd;
s->aligned_buf = NULL;
+
if ((bdrv_flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
- ret = -errno;
- close(fd);
- return ret;
+ goto out_close;
}
}
+
+#ifdef CONFIG_LINUX_AIO
+ if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
+
+ /* We're falling back to POSIX AIO in some cases */
+ paio_init();
+
+ s->aio_ctx = laio_init();
+ if (!s->aio_ctx) {
+ goto out_free_buf;
+ }
+ s->use_aio = 1;
+ } else
+#endif
+ {
+ if (paio_init() < 0) {
+ goto out_free_buf;
+ }
+#ifdef CONFIG_LINUX_AIO
+ s->use_aio = 0;
+#endif
+ }
+
return 0;
+
+out_free_buf:
+ qemu_vfree(s->aligned_buf);
+out_close:
+ close(fd);
+ return -errno;
}
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
- int open_flags = 0;
s->type = FTYPE_FILE;
- if (flags & BDRV_O_CREAT)
- open_flags = O_CREAT | O_TRUNC;
-
- return raw_open_common(bs, filename, flags, open_flags);
+ return raw_open_common(bs, filename, flags, 0);
}
/* XXX: use host sector size if necessary with:
@@ -191,7 +216,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
}
#endif
#ifdef CONFIG_COCOA
- u_int32_t blockSize = 512;
+ uint32_t blockSize = 512;
if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
bufsize = blockSize;
}
@@ -215,21 +240,18 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
if (ret < 0)
return ret;
- if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
- ++(s->lseek_err_cnt);
- if(s->lseek_err_cnt <= 10) {
- DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
- "] lseek failed : %d = %s\n",
- s->fd, bs->filename, offset, buf, count,
- bs->total_sectors, errno, strerror(errno));
+ ret = pread(s->fd, buf, count, offset);
+ if (ret == count)
+ return ret;
+
+ /* Allow reads beyond the end (needed for pwrite) */
+ if ((ret == 0) && bs->growable) {
+ int64_t size = raw_getlength(bs);
+ if (offset >= size) {
+ memset(buf, 0, count);
+ return count;
}
- return -1;
}
- s->lseek_err_cnt=0;
-
- ret = read(s->fd, buf, count);
- if (ret == count)
- goto label__raw_read__success;
DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] read failed %d : %d = %s\n",
@@ -237,15 +259,13 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
bs->total_sectors, ret, errno, strerror(errno));
/* Try harder for CDrom. */
- if (bs->type == BDRV_TYPE_CDROM) {
- lseek(s->fd, offset, SEEK_SET);
- ret = read(s->fd, buf, count);
+ if (s->type != FTYPE_FILE) {
+ ret = pread(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_read__success;
- lseek(s->fd, offset, SEEK_SET);
- ret = read(s->fd, buf, count);
+ return ret;
+ ret = pread(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_read__success;
+ return ret;
DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] retry read failed %d : %d = %s\n",
@@ -253,8 +273,6 @@ static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
bs->total_sectors, ret, errno, strerror(errno));
}
-label__raw_read__success:
-
return (ret < 0) ? -errno : ret;
}
@@ -275,29 +293,15 @@ static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
if (ret < 0)
return -errno;
- if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
- ++(s->lseek_err_cnt);
- if(s->lseek_err_cnt) {
- DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
- PRId64 "] lseek failed : %d = %s\n",
- s->fd, bs->filename, offset, buf, count,
- bs->total_sectors, errno, strerror(errno));
- }
- return -EIO;
- }
- s->lseek_err_cnt = 0;
-
- ret = write(s->fd, buf, count);
+ ret = pwrite(s->fd, buf, count, offset);
if (ret == count)
- goto label__raw_write__success;
+ return ret;
DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
"] write failed %d : %d = %s\n",
s->fd, bs->filename, offset, buf, count,
bs->total_sectors, ret, errno, strerror(errno));
-label__raw_write__success:
-
return (ret < 0) ? -errno : ret;
}
@@ -352,8 +356,12 @@ static int raw_pread(BlockDriverState *bs, int64_t offset,
size = ALIGNED_BUFFER_SIZE;
ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
- if (ret < 0)
+ if (ret < 0) {
return ret;
+ } else if (ret == 0) {
+ fprintf(stderr, "raw_pread: read beyond end of file\n");
+ abort();
+ }
size = ret;
if (size > count)
@@ -379,8 +387,9 @@ static int raw_read(BlockDriverState *bs, int64_t sector_num,
{
int ret;
- ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
- if (ret == (nb_sectors * 512))
+ ret = raw_pread(bs, sector_num * BDRV_SECTOR_SIZE, buf,
+ nb_sectors * BDRV_SECTOR_SIZE);
+ if (ret == (nb_sectors * BDRV_SECTOR_SIZE))
ret = 0;
return ret;
}
@@ -467,255 +476,84 @@ static int raw_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
int ret;
- ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
- if (ret == (nb_sectors * 512))
+ ret = raw_pwrite(bs, sector_num * BDRV_SECTOR_SIZE, buf,
+ nb_sectors * BDRV_SECTOR_SIZE);
+ if (ret == (nb_sectors * BDRV_SECTOR_SIZE))
ret = 0;
return ret;
}
-#ifdef CONFIG_AIO
-/***********************************************************/
-/* Unix AIO using POSIX AIO */
-
-typedef struct RawAIOCB {
- BlockDriverAIOCB common;
- struct qemu_paiocb aiocb;
- struct RawAIOCB *next;
- int ret;
-} RawAIOCB;
-
-typedef struct PosixAioState
-{
- int rfd, wfd;
- RawAIOCB *first_aio;
-} PosixAioState;
-
-static void posix_aio_read(void *opaque)
-{
- PosixAioState *s = opaque;
- RawAIOCB *acb, **pacb;
- int ret;
- ssize_t len;
-
- /* read all bytes from signal pipe */
- for (;;) {
- char bytes[16];
-
- len = read(s->rfd, bytes, sizeof(bytes));
- if (len == -1 && errno == EINTR)
- continue; /* try again */
- if (len == sizeof(bytes))
- continue; /* more to read */
- break;
- }
-
- for(;;) {
- pacb = &s->first_aio;
- for(;;) {
- acb = *pacb;
- if (!acb)
- goto the_end;
- ret = qemu_paio_error(&acb->aiocb);
- if (ret == ECANCELED) {
- /* remove the request */
- *pacb = acb->next;
- qemu_aio_release(acb);
- } else if (ret != EINPROGRESS) {
- /* end of aio */
- if (ret == 0) {
- ret = qemu_paio_return(&acb->aiocb);
- if (ret == acb->aiocb.aio_nbytes)
- ret = 0;
- else
- ret = -EINVAL;
- } else {
- ret = -ret;
- }
- /* remove the request */
- *pacb = acb->next;
- /* call the callback */
- acb->common.cb(acb->common.opaque, ret);
- qemu_aio_release(acb);
- break;
- } else {
- pacb = &acb->next;
- }
- }
- }
- the_end: ;
-}
-
-static int posix_aio_flush(void *opaque)
-{
- PosixAioState *s = opaque;
- return !!s->first_aio;
-}
-
-static PosixAioState *posix_aio_state;
-
-static void aio_signal_handler(int signum)
-{
- if (posix_aio_state) {
- char byte = 0;
-
- write(posix_aio_state->wfd, &byte, sizeof(byte));
- }
-
- qemu_service_io();
-}
-
-static int posix_aio_init(void)
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+static int qiov_is_aligned(QEMUIOVector *qiov)
{
- struct sigaction act;
- PosixAioState *s;
- int fds[2];
- struct qemu_paioinit ai;
+ int i;
- if (posix_aio_state)
- return 0;
-
- s = qemu_malloc(sizeof(PosixAioState));
-
- sigfillset(&act.sa_mask);
- act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
- act.sa_handler = aio_signal_handler;
- sigaction(SIGUSR2, &act, NULL);
-
- s->first_aio = NULL;
- if (pipe(fds) == -1) {
- fprintf(stderr, "failed to create pipe\n");
- return -errno;
- }
-
- s->rfd = fds[0];
- s->wfd = fds[1];
-
- fcntl(s->rfd, F_SETFL, O_NONBLOCK);
- fcntl(s->wfd, F_SETFL, O_NONBLOCK);
-
- qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
-
- memset(&ai, 0, sizeof(ai));
- ai.aio_threads = 64;
- ai.aio_num = 64;
- qemu_paio_init(&ai);
-
- posix_aio_state = s;
-
- return 0;
-}
-
-static void raw_aio_remove(RawAIOCB *acb)
-{
- RawAIOCB **pacb;
-
- /* remove the callback from the queue */
- pacb = &posix_aio_state->first_aio;
- for(;;) {
- if (*pacb == NULL) {
- fprintf(stderr, "raw_aio_remove: aio request not found!\n");
- break;
- } else if (*pacb == acb) {
- *pacb = acb->next;
- qemu_aio_release(acb);
- break;
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % BDRV_SECTOR_SIZE) {
+ return 0;
}
- pacb = &(*pacb)->next;
}
-}
-
-static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
-{
- int ret;
- RawAIOCB *acb = (RawAIOCB *)blockacb;
- ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
- if (ret == QEMU_PAIO_NOTCANCELED) {
- /* fail safe: if the aio could not be canceled, we wait for
- it */
- while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
- }
-
- raw_aio_remove(acb);
+ return 1;
}
-static AIOPool raw_aio_pool = {
- .aiocb_size = sizeof(RawAIOCB),
- .cancel = raw_aio_cancel,
-};
-
-static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockDriverCompletionFunc *cb, void *opaque)
+static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int type)
{
BDRVRawState *s = bs->opaque;
- RawAIOCB *acb;
if (fd_open(bs) < 0)
return NULL;
- acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
- if (!acb)
- return NULL;
- acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_iov = qiov->iov;
- acb->aiocb.aio_niov = qiov->niov;
- acb->aiocb.aio_nbytes = nb_sectors * 512;
- acb->aiocb.aio_offset = sector_num * 512;
- acb->aiocb.aio_flags = 0;
-
/*
* If O_DIRECT is used the buffer needs to be aligned on a sector
- * boundary. Tell the low level code to ensure that in case it's
- * not done yet.
+ * boundary. Check if this is the case or telll the low-level
+ * driver that it needs to copy the buffer.
*/
- if (s->aligned_buf)
- acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
+ if (s->aligned_buf) {
+ if (!qiov_is_aligned(qiov)) {
+ type |= QEMU_AIO_MISALIGNED;
+#ifdef CONFIG_LINUX_AIO
+ } else if (s->use_aio) {
+ return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
+ nb_sectors, cb, opaque, type);
+#endif
+ }
+ }
- acb->next = posix_aio_state->first_aio;
- posix_aio_state->first_aio = acb;
- return acb;
+ return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
+ cb, opaque, type);
}
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- RawAIOCB *acb;
-
- acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
- if (!acb)
- return NULL;
- if (qemu_paio_read(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
- return &acb->common;
+ return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_READ);
}
static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
{
- RawAIOCB *acb;
-
- acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
- if (!acb)
- return NULL;
- if (qemu_paio_write(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
- return &acb->common;
+ return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
+ cb, opaque, QEMU_AIO_WRITE);
}
-#else /* CONFIG_AIO */
-static int posix_aio_init(void)
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
{
- return 0;
-}
-#endif /* CONFIG_AIO */
+ BDRVRawState *s = bs->opaque;
+
+ if (fd_open(bs) < 0)
+ return NULL;
+ return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
+}
static void raw_close(BlockDriverState *bs)
{
@@ -724,7 +562,7 @@ static void raw_close(BlockDriverState *bs)
close(s->fd);
s->fd = -1;
if (s->aligned_buf != NULL)
- qemu_free(s->aligned_buf);
+ qemu_vfree(s->aligned_buf);
}
}
@@ -757,30 +595,49 @@ static int64_t raw_getlength(BlockDriverState *bs)
} else
return st.st_size;
}
-#else /* !__OpenBSD__ */
-static int64_t raw_getlength(BlockDriverState *bs)
+#elif defined(__sun__)
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ struct dk_minfo minfo;
+ int ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Use the DKIOCGMEDIAINFO ioctl to read the size.
+ */
+ ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
+ if (ret != -1) {
+ return minfo.dki_lbsize * minfo.dki_capacity;
+ }
+
+ /*
+ * There are reports that lseek on some devices fails, but
+ * irc discussion said that contingency on contingency was overkill.
+ */
+ return lseek(s->fd, 0, SEEK_END);
+}
+#elif defined(CONFIG_BSD)
+static int64_t raw_getlength(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int fd = s->fd;
int64_t size;
-#ifdef CONFIG_BSD
struct stat sb;
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
int reopened = 0;
#endif
-#endif
-#ifdef __sun__
- struct dk_minfo minfo;
- int rv;
-#endif
int ret;
ret = fd_open(bs);
if (ret < 0)
return ret;
-#ifdef CONFIG_BSD
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
again:
#endif
if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
@@ -801,7 +658,7 @@ again:
#else
size = lseek(fd, 0LL, SEEK_END);
#endif
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
switch(s->type) {
case FTYPE_CD:
/* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
@@ -814,56 +671,59 @@ again:
}
}
#endif
- } else
-#endif
-#ifdef __sun__
- /*
- * use the DKIOCGMEDIAINFO ioctl to read the size.
- */
- rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
- if ( rv != -1 ) {
- size = minfo.dki_lbsize * minfo.dki_capacity;
- } else /* there are reports that lseek on some devices
- fails, but irc discussion said that contingency
- on contingency was overkill */
-#endif
- {
+ } else {
size = lseek(fd, 0, SEEK_END);
}
return size;
}
+#else
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return lseek(s->fd, 0, SEEK_END);
+}
#endif
static int raw_create(const char *filename, QEMUOptionParameter *options)
{
- int fd, ret;
+ int fd;
+ int result = 0;
int64_t total_size = 0;
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
}
options++;
}
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
- if (fd < 0)
- return -EIO;
- do {
- ret = ftruncate(fd, total_size * 512);
- } while (ret < 0 && errno == EINTR);
- close(fd);
- if (ret != 0)
- return -errno;
- return 0;
+ if (fd < 0) {
+ result = -errno;
+ } else {
+ if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ result = -errno;
+ }
+ if (close(fd) != 0) {
+ result = -errno;
+ }
+ }
+ return result;
}
static void raw_flush(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- fsync(s->fd);
+ qemu_fdatasync(s->fd);
}
@@ -876,21 +736,21 @@ static QEMUOptionParameter raw_create_options[] = {
{ NULL }
};
-static BlockDriver bdrv_raw = {
- .format_name = "raw",
+static BlockDriver bdrv_file = {
+ .format_name = "file",
+ .protocol_name = "file",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe = NULL, /* no probe for protocols */
- .bdrv_open = raw_open,
+ .bdrv_file_open = raw_open,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1007,7 +867,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
#endif
s->type = FTYPE_FILE;
-#if defined(__linux__) && defined(CONFIG_AIO)
+#if defined(__linux__)
if (strstart(filename, "/dev/sg", NULL)) {
bs->sg = 1;
}
@@ -1073,40 +933,18 @@ static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
return ioctl(s->fd, req, buf);
}
-#ifdef CONFIG_AIO
static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockDriverCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- RawAIOCB *acb;
if (fd_open(bs) < 0)
return NULL;
-
- acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
- if (!acb)
- return NULL;
- acb->aiocb.aio_fildes = s->fd;
- acb->aiocb.ev_signo = SIGUSR2;
- acb->aiocb.aio_offset = 0;
- acb->aiocb.aio_flags = 0;
-
- acb->next = posix_aio_state->first_aio;
- posix_aio_state->first_aio = acb;
-
- acb->aiocb.aio_ioctl_buf = buf;
- acb->aiocb.aio_ioctl_cmd = req;
- if (qemu_paio_ioctl(&acb->aiocb) < 0) {
- raw_aio_remove(acb);
- return NULL;
- }
-
- return &acb->common;
+ return paio_ioctl(bs, s->fd, req, buf, cb, opaque);
}
-#endif
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -1135,39 +973,46 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options)
/* Read out options */
while (options && options->name) {
if (!strcmp(options->name, "size")) {
- total_size = options->value.n / 512;
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
}
options++;
}
fd = open(filename, O_WRONLY | O_BINARY);
if (fd < 0)
- return -EIO;
+ return -errno;
if (fstat(fd, &stat_buf) < 0)
- ret = -EIO;
+ ret = -errno;
else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
- ret = -EIO;
- else if (lseek(fd, 0, SEEK_END) < total_size * 512)
+ ret = -ENODEV;
+ else if (lseek(fd, 0, SEEK_END) < total_size * BDRV_SECTOR_SIZE)
ret = -ENOSPC;
close(fd);
return ret;
}
+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+ return 0;
+}
+
static BlockDriver bdrv_host_device = {
- .format_name = "host_device",
- .instance_size = sizeof(BDRVRawState),
- .bdrv_probe_device = hdev_probe_device,
- .bdrv_open = hdev_open,
- .bdrv_close = raw_close,
+ .format_name = "host_device",
+ .protocol_name = "host_device",
+ .instance_size = sizeof(BDRVRawState),
+ .bdrv_probe_device = hdev_probe_device,
+ .bdrv_file_open = hdev_open,
+ .bdrv_close = raw_close,
.bdrv_create = hdev_create,
- .bdrv_flush = raw_flush,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
+ .bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
@@ -1176,10 +1021,8 @@ static BlockDriver bdrv_host_device = {
/* generic scsi device */
#ifdef __linux__
.bdrv_ioctl = hdev_ioctl,
-#ifdef CONFIG_AIO
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
-#endif
};
#ifdef __linux__
@@ -1188,8 +1031,6 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
BDRVRawState *s = bs->opaque;
int ret;
- posix_aio_init();
-
s->type = FTYPE_FD;
/* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
@@ -1207,9 +1048,26 @@ static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
static int floppy_probe_device(const char *filename)
{
+ int fd, ret;
+ int prio = 0;
+ struct floppy_struct fdparam;
+
if (strstart(filename, "/dev/fd", NULL))
- return 100;
- return 0;
+ prio = 50;
+
+ fd = open(filename, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ goto out;
+ }
+
+ /* Attempt to detect via a floppy specific ioctl */
+ ret = ioctl(fd, FDGETPRM, &fdparam);
+ if (ret >= 0)
+ prio = 100;
+
+ close(fd);
+out:
+ return prio;
}
@@ -1257,17 +1115,19 @@ static int floppy_eject(BlockDriverState *bs, int eject_flag)
static BlockDriver bdrv_host_floppy = {
.format_name = "host_floppy",
+ .protocol_name = "host_floppy",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = floppy_probe_device,
- .bdrv_open = floppy_open,
+ .bdrv_file_open = floppy_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
@@ -1291,9 +1151,25 @@ static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
static int cdrom_probe_device(const char *filename)
{
+ int fd, ret;
+ int prio = 0;
+
if (strstart(filename, "/dev/cd", NULL))
- return 100;
- return 0;
+ prio = 50;
+
+ fd = open(filename, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ goto out;
+ }
+
+ /* Attempt to detect via a CDROM specific ioctl */
+ ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+ if (ret >= 0)
+ prio = 100;
+
+ close(fd);
+out:
+ return prio;
}
static int cdrom_is_inserted(BlockDriverState *bs)
@@ -1339,17 +1215,19 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
static BlockDriver bdrv_host_cdrom = {
.format_name = "host_cdrom",
+ .protocol_name = "host_cdrom",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = cdrom_probe_device,
- .bdrv_open = cdrom_open,
+ .bdrv_file_open = cdrom_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
@@ -1362,13 +1240,11 @@ static BlockDriver bdrv_host_cdrom = {
/* generic scsi device */
.bdrv_ioctl = hdev_ioctl,
-#ifdef CONFIG_AIO
.bdrv_aio_ioctl = hdev_aio_ioctl,
-#endif
};
#endif /* __linux__ */
-#ifdef __FreeBSD__
+#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
@@ -1462,17 +1338,19 @@ static int cdrom_set_locked(BlockDriverState *bs, int locked)
static BlockDriver bdrv_host_cdrom = {
.format_name = "host_cdrom",
+ .protocol_name = "host_cdrom",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = cdrom_probe_device,
- .bdrv_open = cdrom_open,
+ .bdrv_file_open = cdrom_open,
.bdrv_close = raw_close,
.bdrv_create = hdev_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_flush = raw_flush,
-#ifdef CONFIG_AIO
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
-#endif
+ .bdrv_aio_flush = raw_aio_flush,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
@@ -1485,21 +1363,21 @@ static BlockDriver bdrv_host_cdrom = {
};
#endif /* __FreeBSD__ */
-static void bdrv_raw_init(void)
+static void bdrv_file_init(void)
{
/*
* Register all the drivers. Note that order is important, the driver
* registered last will get probed first.
*/
- bdrv_register(&bdrv_raw);
+ bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
bdrv_register(&bdrv_host_floppy);
bdrv_register(&bdrv_host_cdrom);
#endif
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
bdrv_register(&bdrv_host_cdrom);
#endif
}
-block_init(bdrv_raw_init);
+block_init(bdrv_file_init);
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 72acad5..503ed39 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -76,21 +76,17 @@ static int set_sparse(int fd)
static int raw_open(BlockDriverState *bs, const char *filename, int flags)
{
BDRVRawState *s = bs->opaque;
- int access_flags, create_flags;
+ int access_flags;
DWORD overlapped;
s->type = FTYPE_FILE;
- if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+ if (flags & BDRV_O_RDWR) {
access_flags = GENERIC_READ | GENERIC_WRITE;
} else {
access_flags = GENERIC_READ;
}
- if (flags & BDRV_O_CREAT) {
- create_flags = CREATE_ALWAYS;
- } else {
- create_flags = OPEN_EXISTING;
- }
+
overlapped = FILE_ATTRIBUTE_NORMAL;
if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
@@ -98,7 +94,7 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
- create_flags, overlapped, NULL);
+ OPEN_EXISTING, overlapped, NULL);
if (s->hfile == INVALID_HANDLE_VALUE) {
int err = GetLastError();
@@ -242,10 +238,11 @@ static QEMUOptionParameter raw_create_options[] = {
{ NULL }
};
-static BlockDriver bdrv_raw = {
- .format_name = "raw",
+static BlockDriver bdrv_file = {
+ .format_name = "file",
+ .protocol_name = "file",
.instance_size = sizeof(BDRVRawState),
- .bdrv_open = raw_open,
+ .bdrv_file_open = raw_open,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_flush = raw_flush,
@@ -337,7 +334,7 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
}
s->type = find_device_type(bs, filename);
- if ((flags & BDRV_O_ACCESS) == O_RDWR) {
+ if (flags & BDRV_O_RDWR) {
access_flags = GENERIC_READ | GENERIC_WRITE;
} else {
access_flags = GENERIC_READ;
@@ -397,23 +394,30 @@ static int raw_set_locked(BlockDriverState *bs, int locked)
}
#endif
+static int hdev_has_zero_init(BlockDriverState *bs)
+{
+ return 0;
+}
+
static BlockDriver bdrv_host_device = {
.format_name = "host_device",
+ .protocol_name = "host_device",
.instance_size = sizeof(BDRVRawState),
.bdrv_probe_device = hdev_probe_device,
- .bdrv_open = hdev_open,
+ .bdrv_file_open = hdev_open,
.bdrv_close = raw_close,
.bdrv_flush = raw_flush,
+ .bdrv_has_zero_init = hdev_has_zero_init,
.bdrv_read = raw_read,
.bdrv_write = raw_write,
.bdrv_getlength = raw_getlength,
};
-static void bdrv_raw_init(void)
+static void bdrv_file_init(void)
{
- bdrv_register(&bdrv_raw);
+ bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
}
-block_init(bdrv_raw_init);
+block_init(bdrv_file_init);
diff --git a/block/raw.c b/block/raw.c
new file mode 100644
index 0000000..61e6748
--- /dev/null
+++ b/block/raw.c
@@ -0,0 +1,280 @@
+
+#include "qemu-common.h"
+#include "block_int.h"
+#include "module.h"
+
+static int raw_open(BlockDriverState *bs, int flags)
+{
+ bs->sg = bs->file->sg;
+ return 0;
+}
+
+/* check for the user attempting to write something that looks like a
+ block format header to the beginning of the image and fail out.
+*/
+static int check_for_block_signature(BlockDriverState *bs, const uint8_t *buf)
+{
+ static const uint8_t signatures[][4] = {
+ { 'Q', 'F', 'I', 0xfb }, /* qcow/qcow2 */
+ { 'C', 'O', 'W', 'D' }, /* VMDK3 */
+ { 'V', 'M', 'D', 'K' }, /* VMDK4 */
+ { 'O', 'O', 'O', 'M' }, /* UML COW */
+ {}
+ };
+ int i;
+
+ for (i = 0; signatures[i][0] != 0; i++) {
+ if (memcmp(buf, signatures[i], 4) == 0) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int check_write_unsafe(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ /* assume that if the user specifies the format explicitly, then assume
+ that they will continue to do so and provide no safety net */
+ if (!bs->probed) {
+ return 0;
+ }
+
+ if (sector_num == 0 && nb_sectors > 0) {
+ return check_for_block_signature(bs, buf);
+ }
+
+ return 0;
+}
+
+static int raw_read(BlockDriverState *bs, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+}
+
+static int raw_write_scrubbed_bootsect(BlockDriverState *bs,
+ const uint8_t *buf)
+{
+ uint8_t bootsect[512];
+
+ /* scrub the dangerous signature */
+ memcpy(bootsect, buf, 512);
+ memset(bootsect, 0, 4);
+
+ return bdrv_write(bs->file, 0, bootsect, 1);
+}
+
+static int raw_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ if (check_write_unsafe(bs, sector_num, buf, nb_sectors)) {
+ int ret;
+
+ ret = raw_write_scrubbed_bootsect(bs, buf);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_write(bs->file, 1, buf + 512, nb_sectors - 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return ret + 512;
+ }
+
+ return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+}
+
+static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+typedef struct RawScrubberBounce
+{
+ BlockDriverCompletionFunc *cb;
+ void *opaque;
+ QEMUIOVector qiov;
+} RawScrubberBounce;
+
+static void raw_aio_writev_scrubbed(void *opaque, int ret)
+{
+ RawScrubberBounce *b = opaque;
+
+ if (ret < 0) {
+ b->cb(b->opaque, ret);
+ } else {
+ b->cb(b->opaque, ret + 512);
+ }
+
+ qemu_iovec_destroy(&b->qiov);
+ qemu_free(b);
+}
+
+static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ const uint8_t *first_buf;
+ int first_buf_index = 0, i;
+
+ /* This is probably being paranoid, but handle cases of zero size
+ vectors. */
+ for (i = 0; i < qiov->niov; i++) {
+ if (qiov->iov[i].iov_len) {
+ assert(qiov->iov[i].iov_len >= 512);
+ first_buf_index = i;
+ break;
+ }
+ }
+
+ first_buf = qiov->iov[first_buf_index].iov_base;
+
+ if (check_write_unsafe(bs, sector_num, first_buf, nb_sectors)) {
+ RawScrubberBounce *b;
+ int ret;
+
+ /* write the first sector using sync I/O */
+ ret = raw_write_scrubbed_bootsect(bs, first_buf);
+ if (ret < 0) {
+ return NULL;
+ }
+
+ /* adjust request to be everything but first sector */
+
+ b = qemu_malloc(sizeof(*b));
+ b->cb = cb;
+ b->opaque = opaque;
+
+ qemu_iovec_init(&b->qiov, qiov->nalloc);
+ qemu_iovec_concat(&b->qiov, qiov, qiov->size);
+
+ b->qiov.size -= 512;
+ b->qiov.iov[first_buf_index].iov_base += 512;
+ b->qiov.iov[first_buf_index].iov_len -= 512;
+
+ return bdrv_aio_writev(bs->file, sector_num + 1, &b->qiov,
+ nb_sectors - 1, raw_aio_writev_scrubbed, b);
+ }
+
+ return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void raw_close(BlockDriverState *bs)
+{
+}
+
+static void raw_flush(BlockDriverState *bs)
+{
+ bdrv_flush(bs->file);
+}
+
+static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_flush(bs->file, cb, opaque);
+}
+
+static int64_t raw_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file);
+}
+
+static int raw_truncate(BlockDriverState *bs, int64_t offset)
+{
+ return bdrv_truncate(bs->file, offset);
+}
+
+static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+ return 1; /* everything can be opened as raw image */
+}
+
+static int raw_is_inserted(BlockDriverState *bs)
+{
+ return bdrv_is_inserted(bs->file);
+}
+
+static int raw_eject(BlockDriverState *bs, int eject_flag)
+{
+ return bdrv_eject(bs->file, eject_flag);
+}
+
+static int raw_set_locked(BlockDriverState *bs, int locked)
+{
+ bdrv_set_locked(bs->file, locked);
+ return 0;
+}
+
+static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ return bdrv_ioctl(bs->file, req, buf);
+}
+
+static BlockDriverAIOCB *raw_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+}
+
+static int raw_create(const char *filename, QEMUOptionParameter *options)
+{
+ return bdrv_create_file(filename, options);
+}
+
+static QEMUOptionParameter raw_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+static int raw_has_zero_init(BlockDriverState *bs)
+{
+ return bdrv_has_zero_init(bs->file);
+}
+
+static BlockDriver bdrv_raw = {
+ .format_name = "raw",
+
+ /* It's really 0, but we need to make qemu_malloc() happy */
+ .instance_size = 1,
+
+ .bdrv_open = raw_open,
+ .bdrv_close = raw_close,
+ .bdrv_read = raw_read,
+ .bdrv_write = raw_write,
+ .bdrv_flush = raw_flush,
+ .bdrv_probe = raw_probe,
+ .bdrv_getlength = raw_getlength,
+ .bdrv_truncate = raw_truncate,
+
+ .bdrv_aio_readv = raw_aio_readv,
+ .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_aio_flush = raw_aio_flush,
+
+ .bdrv_is_inserted = raw_is_inserted,
+ .bdrv_eject = raw_eject,
+ .bdrv_set_locked = raw_set_locked,
+ .bdrv_ioctl = raw_ioctl,
+ .bdrv_aio_ioctl = raw_aio_ioctl,
+
+ .bdrv_create = raw_create,
+ .create_options = raw_create_options,
+ .bdrv_has_zero_init = raw_has_zero_init,
+};
+
+static void bdrv_raw_init(void)
+{
+ bdrv_register(&bdrv_raw);
+}
+
+block_init(bdrv_raw_init);
diff --git a/block/vpc.c b/block/vpc.c
index ba482e9..e50509e 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -1,5 +1,5 @@
/*
- * Block driver for Conectix/Microsoft Virtual PC images
+ * Block driver for Connectix / Microsoft Virtual PC images
*
* Copyright (c) 2005 Alex Beregszaszi
* Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
@@ -150,20 +150,16 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
-static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
+static int vpc_open(BlockDriverState *bs, int flags)
{
BDRVVPCState *s = bs->opaque;
- int ret, i;
+ int i;
struct vhd_footer* footer;
struct vhd_dyndisk_header* dyndisk_header;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
- ret = bdrv_file_open(&s->hd, filename, flags);
- if (ret < 0)
- return ret;
-
- if (bdrv_pread(s->hd, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
+ if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
goto fail;
footer = (struct vhd_footer*) s->footer_buf;
@@ -174,7 +170,7 @@ static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
footer->checksum = 0;
if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
fprintf(stderr, "block-vpc: The header checksum of '%s' is "
- "incorrect.\n", filename);
+ "incorrect.\n", bs->filename);
// The visible size of a image in Virtual PC depends on the geometry
// rather than on the size stored in the footer (the size in the footer
@@ -182,7 +178,7 @@ static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- if (bdrv_pread(s->hd, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
+ if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
!= HEADER_SIZE)
goto fail;
@@ -199,7 +195,7 @@ static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
s->pagetable = qemu_malloc(s->max_table_entries * 4);
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
- if (bdrv_pread(s->hd, s->bat_offset, s->pagetable,
+ if (bdrv_pread(bs->file, s->bat_offset, s->pagetable,
s->max_table_entries * 4) != s->max_table_entries * 4)
goto fail;
@@ -228,7 +224,6 @@ static int vpc_open(BlockDriverState *bs, const char *filename, int flags)
return 0;
fail:
- bdrv_delete(s->hd);
return -1;
}
@@ -266,7 +261,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
- bdrv_pwrite(s->hd, bitmap_offset, bitmap, s->bitmap_size);
+ bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
}
// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
@@ -316,7 +311,7 @@ static int rewrite_footer(BlockDriverState* bs)
BDRVVPCState *s = bs->opaque;
int64_t offset = s->free_data_block_offset;
- ret = bdrv_pwrite(s->hd, offset, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
if (ret < 0)
return ret;
@@ -351,7 +346,8 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
// Initialize the block's bitmap
memset(bitmap, 0xff, s->bitmap_size);
- bdrv_pwrite(s->hd, s->free_data_block_offset, bitmap, s->bitmap_size);
+ bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+ s->bitmap_size);
// Write new footer (the old one will be overwritten)
s->free_data_block_offset += s->block_size + s->bitmap_size;
@@ -362,7 +358,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
// Write BAT entry to disk
bat_offset = s->bat_offset + (4 * index);
bat_value = be32_to_cpu(s->pagetable[index]);
- ret = bdrv_pwrite(s->hd, bat_offset, &bat_value, 4);
+ ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
@@ -379,21 +375,30 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
BDRVVPCState *s = bs->opaque;
int ret;
int64_t offset;
+ int64_t sectors, sectors_per_block;
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 0);
+ sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+ sectors = sectors_per_block - (sector_num % sectors_per_block);
+ if (sectors > nb_sectors) {
+ sectors = nb_sectors;
+ }
+
if (offset == -1) {
- memset(buf, 0, 512);
+ memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
} else {
- ret = bdrv_pread(s->hd, offset, buf, 512);
- if (ret != 512)
+ ret = bdrv_pread(bs->file, offset, buf,
+ sectors * BDRV_SECTOR_SIZE);
+ if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
+ }
}
- nb_sectors--;
- sector_num++;
- buf += 512;
+ nb_sectors -= sectors;
+ sector_num += sectors;
+ buf += sectors * BDRV_SECTOR_SIZE;
}
return 0;
}
@@ -403,24 +408,32 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
{
BDRVVPCState *s = bs->opaque;
int64_t offset;
+ int64_t sectors, sectors_per_block;
int ret;
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 1);
+ sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
+ sectors = sectors_per_block - (sector_num % sectors_per_block);
+ if (sectors > nb_sectors) {
+ sectors = nb_sectors;
+ }
+
if (offset == -1) {
offset = alloc_block(bs, sector_num);
if (offset < 0)
return -1;
}
- ret = bdrv_pwrite(s->hd, offset, buf, 512);
- if (ret != 512)
+ ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
+ if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
+ }
- nb_sectors--;
- sector_num++;
- buf += 512;
+ nb_sectors -= sectors;
+ sector_num += sectors;
+ buf += sectors * BDRV_SECTOR_SIZE;
}
return 0;
@@ -470,9 +483,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
}
}
- // Note: Rounding up deviates from the Virtual PC behaviour
- // However, we need this to avoid truncating images in qemu-img convert
- *cyls = (cyls_times_heads + *heads - 1) / *heads;
+ *cyls = cyls_times_heads / *heads;
return 0;
}
@@ -484,9 +495,9 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
struct vhd_dyndisk_header* dyndisk_header =
(struct vhd_dyndisk_header*) buf;
int fd, i;
- uint16_t cyls;
- uint8_t heads;
- uint8_t secs_per_cyl;
+ uint16_t cyls = 0;
+ uint8_t heads = 0;
+ uint8_t secs_per_cyl = 0;
size_t block_size, num_bat_entries;
int64_t total_sectors = 0;
@@ -503,18 +514,23 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
if (fd < 0)
return -EIO;
- // Calculate matching total_size and geometry
- if (calculate_geometry(total_sectors, &cyls, &heads, &secs_per_cyl))
- return -EFBIG;
+ /* Calculate matching total_size and geometry. Increase the number of
+ sectors requested until we get enough (or fail). */
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ if (calculate_geometry(total_sectors + i,
+ &cyls, &heads, &secs_per_cyl)) {
+ return -EFBIG;
+ }
+ }
total_sectors = (int64_t) cyls * heads * secs_per_cyl;
// Prepare the Hard Disk Footer
memset(buf, 0, 1024);
- strncpy(footer->creator, "conectix", 8);
+ memcpy(footer->creator, "conectix", 8);
// TODO Check if "qemu" creator_app is ok for VPC
- strncpy(footer->creator_app, "qemu", 4);
- strncpy(footer->creator_os, "Wi2k", 4);
+ memcpy(footer->creator_app, "qemu", 4);
+ memcpy(footer->creator_os, "Wi2k", 4);
footer->features = be32_to_cpu(0x02);
footer->version = be32_to_cpu(0x00010000);
@@ -563,7 +579,7 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
// Prepare the Dynamic Disk Header
memset(buf, 0, 1024);
- strncpy(dyndisk_header->magic, "cxsparse", 8);
+ memcpy(dyndisk_header->magic, "cxsparse", 8);
dyndisk_header->data_offset = be64_to_cpu(0xFFFFFFFF);
dyndisk_header->table_offset = be64_to_cpu(3 * 512);
@@ -590,7 +606,6 @@ static void vpc_close(BlockDriverState *bs)
#ifdef CACHE
qemu_free(s->pageentry_u8);
#endif
- bdrv_delete(s->hd);
}
static QEMUOptionParameter vpc_create_options[] = {
diff --git a/block/vvfat.c b/block/vvfat.c
index de50dc7..6d61c2e 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -512,7 +512,7 @@ static inline uint8_t fat_chksum(const direntry_t* entry)
for(i=0;i<11;i++) {
unsigned char c;
- c = (i < 8) ? entry->name[i] : entry->extension[i-8];
+ c = (i <= 8) ? entry->name[i] : entry->extension[i-8];
chksum=(((chksum&0xfe)>>1)|((chksum&0x01)?0x80:0)) + c;
}
@@ -2799,8 +2799,11 @@ static int enable_write_target(BDRVVVFATState *s)
if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0)
return -1;
s->qcow = bdrv_new("");
- if (s->qcow == NULL || bdrv_open(s->qcow, s->qcow_filename, 0) < 0)
+ if (s->qcow == NULL ||
+ bdrv_open(s->qcow, s->qcow_filename, BDRV_O_RDWR, bdrv_qcow) < 0)
+ {
return -1;
+ }
#ifndef _WIN32
unlink(s->qcow_filename);
@@ -2828,7 +2831,7 @@ static void vvfat_close(BlockDriverState *bs)
static BlockDriver bdrv_vvfat = {
.format_name = "vvfat",
.instance_size = sizeof(BDRVVVFATState),
- .bdrv_open = vvfat_open,
+ .bdrv_file_open = vvfat_open,
.bdrv_read = vvfat_read,
.bdrv_write = vvfat_write,
.bdrv_close = vvfat_close,
@@ -2866,7 +2869,7 @@ static void checkpoint(void) {
return;
/* avoid compiler warnings: */
hexdump(NULL, 100);
- remove_mapping(vvv, NULL);
+ remove_mapping(vvv, 0);
print_mapping(NULL);
print_direntry(NULL);
}