On Mon, Jun 08, 2015 at 06:21:20PM +0300, Vladimir Sementsov-Ogievskiy wrote:

I noticed a corner case, it's probably not a problem in practice:

Since the dirty bitmap is stored with the help of a BlockDriverState
(and its bs->file), it's possible that writing the bitmap will cause
bits in the bitmap to be dirtied!

> diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
> new file mode 100644
> index 0000000..bc0167c
> --- /dev/null
> +++ b/block/qcow2-dirty-bitmap.c
> @@ -0,0 +1,503 @@
> +/*
> + * Dirty bitmpas for the QCOW version 2 format

s/bitmpas/bitmaps/

> +int qcow2_read_dirty_bitmaps(BlockDriverState *bs)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    QCowDirtyBitmapHeader h;
> +    QCowDirtyBitmap *bm;
> +    int i, name_size;
> +    int64_t offset;
> +    int ret;
> +
> +    if (!s->nb_dirty_bitmaps) {
> +        s->dirty_bitmaps = NULL;
> +        s->dirty_bitmaps_size = 0;
> +        return 0;
> +    }
> +
> +    offset = s->dirty_bitmaps_offset;
> +    s->dirty_bitmaps = g_new0(QCowDirtyBitmap, s->nb_dirty_bitmaps);

Please use g_try_new0() and handle the NULL return value.
g_new/g_malloc abort the process if there is not enough memory.  When
opening untrusted image files it is possible that large values will be
encountered and allocations fail.  In that case .bdrv_open() should fail
instead of killing QEMU.

Using g_try_*() in QEMU is not an exact science but large data buffers
or allocations where external inputs influence the size are good
candidates.

Other allocations in these patches should do that too.

> +    /* Allocate space for the new dirty bitmap table */
> +    dirty_bitmaps_offset = qcow2_alloc_clusters(bs, dirty_bitmaps_size);
> +    offset = dirty_bitmaps_offset;
> +    if (offset < 0) {
> +        ret = offset;
> +        goto fail;
> +    }
> +    ret = bdrv_flush(bs);

Not sure there is a need for this.  The clusters are inaccessible since
no metadata points to them yet.  Therefore we don't need to flush yet
because there is no risk of seeing an inconsistent state.

> +    /* Write all dirty bitmaps to the new table */
> +    for (i = 0; i < s->nb_dirty_bitmaps; i++) {
> +        bm = s->dirty_bitmaps + i;
> +        memset(&h, 0, sizeof(h));
> +        h.l1_table_offset = cpu_to_be64(bm->l1_table_offset);
> +        h.l1_size = cpu_to_be32(bm->l1_size);
> +        h.bitmap_granularity = cpu_to_be32(bm->bitmap_granularity);
> +        h.bitmap_size = cpu_to_be64(bm->bitmap_size);
> +
> +        name_size = strlen(bm->name);
> +        assert(name_size <= UINT16_MAX);
> +        h.name_size = cpu_to_be16(name_size);
> +        offset = align_offset(offset, 8);
> +
> +        ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += sizeof(h);
> +
> +        ret = bdrv_pwrite(bs->file, offset, bm->name, name_size);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        offset += name_size;
> +    }

If files have many thousands of bitmaps then this loop will be slow.  It
would be much faster to write out 1 cluster at a time.  This probably
doesn't matter in practice since this function doesn't get called much
and normally files will have few bitmaps.

> +
> +    /*
> +     * Update the header extension to point to the new dirty bitmap table. This
> +     * requires the new table and its refcounts to be stable on disk.
> +     */
> +    ret = bdrv_flush(bs);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    s->dirty_bitmaps_offset = dirty_bitmaps_offset;
> +    s->dirty_bitmaps_size = dirty_bitmaps_size;
> +    ret = qcow2_update_header(bs);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not update qcow2 header\n");
> +        goto fail;
> +    }

qcow2_update_header() does not flush.  We need to flush before freeing
the old clusters in order to guarantee that the file now points to the
new clusters.

> +uint8_t *qcow2_dirty_bitmap_load(BlockDriverState *bs,
> +                            const char *name, uint64_t size,
> +                            int granularity)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int i, dirty_bitmap_index, ret;
> +    uint64_t offset;
> +    QCowDirtyBitmap *bm;
> +    uint64_t *l1_table;
> +    uint8_t *buf;
> +
> +    dirty_bitmap_index = find_dirty_bitmap_by_name(bs, name);
> +    if (dirty_bitmap_index < 0) {
> +        return NULL;
> +    }
> +    bm = &s->dirty_bitmaps[dirty_bitmap_index];
> +
> +    if (size != bm->bitmap_size || granularity != bm->bitmap_granularity) {
> +        return NULL;
> +    }
> +
> +    l1_table = g_malloc(bm->l1_size * sizeof(uint64_t));

Please use g_try_malloc() with NULL handling.

> +    ret = bdrv_pread(bs->file, bm->l1_table_offset, l1_table,
> +                     bm->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    buf = g_malloc0(bm->l1_size * s->cluster_size);

What is the maximum l1_size value?  cluster_size and l1_size are 32-bit
so with 64 KB cluster_size this overflows if l1_size > 65535.  Do you
want to cast to size_t?

> +    for (i = 0; i < bm->l1_size; ++i) {
> +        offset = be64_to_cpu(l1_table[i]);
> +        if (!(offset & 1)) {

This doesn't honor the 0 offset means unallocated cluster behavior for
the Standard Cluster Descriptor from the qcow2 specification.

> +            ret = bdrv_pread(bs->file, offset, buf + i * s->cluster_size,
> +                             s->cluster_size);
> +            if (ret < 0) {
> +                goto fail;

Missing g_free(buf)

> +    l1_table = g_try_new(uint64_t, bm->l1_size);
> +    if (l1_table == NULL) {
> +        ret = -ENOMEM;
> +        goto fail;
> +    }
> +
> +    /* initialize with zero clusters */
> +    for (i = 0; i < s->l1_size; i++) {
> +        l1_table[i] = cpu_to_be64(1);
> +    }
> +
> +    ret = qcow2_pre_write_overlap_check(bs, 0, bm->l1_table_offset,
> +                                        s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    ret = bdrv_pwrite(bs->file, bm->l1_table_offset, l1_table,
> +                      s->l1_size * sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto fail;
> +    }

Flush is needed here to ensure the bitmap has reached disk before the
dirty_bitmaps array is written out.

> +
> +    g_free(l1_table);
> +    l1_table = NULL;
> +
> +    /* Append the new dirty bitmap to the dirty bitmap list */
> +    new_dirty_bitmap_list = g_new(QCowDirtyBitmap, s->nb_dirty_bitmaps + 1);
> +    if (s->dirty_bitmaps) {
> +        memcpy(new_dirty_bitmap_list, s->dirty_bitmaps,
> +               s->nb_dirty_bitmaps * sizeof(QCowDirtyBitmap));
> +        old_dirty_bitmap_list = s->dirty_bitmaps;
> +    }
> +    s->dirty_bitmaps = new_dirty_bitmap_list;
> +    s->dirty_bitmaps[s->nb_dirty_bitmaps++] = *bm;
> +
> +    ret = qcow2_write_dirty_bitmaps(bs);
> +    if (ret < 0) {
> +        g_free(s->dirty_bitmaps);
> +        s->dirty_bitmaps = old_dirty_bitmap_list;
> +        s->nb_dirty_bitmaps--;
> +        goto fail;
> +    }
> +
> +    g_free(old_dirty_bitmap_list);
> +
> +    return 0;
> +
> +fail:
> +    g_free(bm->name);
> +    g_free(l1_table);
> +

The l1_table clusters should be freed on failure.