kvm tools: Add QCOW write support
The patch adds QCOW write support for both the versions of QCOW.
The code is based on the QCOW image format specifications which are available on:
http://people.gnome.org/~markmc/qcow-image-format-version-1.html
http://people.gnome.org/~markmc/qcow-image-format.html
Signed-off-by: Prasad Joshi <prasadjoshi124@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
diff --git a/qcow.c b/qcow.c
index 1a77b44..103a5a1 100644
--- a/qcow.c
+++ b/qcow.c
@@ -14,6 +14,7 @@
#include <linux/byteorder.h>
#include <linux/types.h>
+#include <linux/kernel.h>
static inline u64 get_l1_index(struct qcow *q, u64 offset)
{
@@ -130,8 +131,185 @@
return -1;
}
+static inline u64 file_size(int fd)
+{
+ struct stat st;
+ if (fstat(fd, &st) < 0)
+ return 0;
+ return st.st_size;
+}
+
+static inline int pwrite_sync(int fd, void *buf, size_t count, off_t offset)
+{
+ if (pwrite_in_full(fd, buf, count, offset) < 0)
+ return -1;
+ if (sync_file_range(fd, offset, count,
+ SYNC_FILE_RANGE_WAIT_BEFORE |
+ SYNC_FILE_RANGE_WRITE) < 0)
+ return -1;
+ return 0;
+}
+
+/* Writes a level 2 table at the end of the file. */
+static u64 qcow1_write_l2_table(struct qcow *q, u64 *table)
+{
+ struct qcow_header *header = q->header;
+ u64 sz;
+ u64 clust_sz;
+ u64 off;
+ u64 f_sz;
+
+ f_sz = file_size(q->fd);
+ if (!f_sz)
+ return 0;
+
+ sz = 1 << header->l2_bits;
+ clust_sz = 1 << header->cluster_bits;
+ off = ALIGN(f_sz, clust_sz);
+
+ if (pwrite_sync(q->fd, table, sz * sizeof(u64), off) < 0)
+ return 0;
+ return off;
+}
+
+/*
+ * QCOW file might grow during a write operation. Not only data but metadata is
+ * also written at the end of the file. Therefore it is necessary to ensure
+ * every write is committed to disk. Hence we use uses pwrite_sync() to
+ * synchronize the in-core state of QCOW image to disk.
+ *
+ * We also try to restore the image to a consistent state if the metdata
+ * operation fails. The two metadat operations are: level 1 and level 2 table
+ * update. If either of them fails the image is truncated to a consistent state.
+ */
+static ssize_t qcow1_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
+{
+ struct qcow_header *header = q->header;
+ struct qcow_table *table = &q->table;
+
+ u64 l2t_sz;
+ u64 clust_sz;
+ u64 l1t_idx;
+ u64 l2t_idx;
+ u64 clust_off;
+ u64 len;
+ u64 *l2t;
+ u64 f_sz;
+ u64 l2t_off;
+ u64 t;
+ u64 clust_start;
+ bool update_meta = false;
+
+ l2t_sz = 1 << header->l2_bits;
+ clust_sz = 1 << header->cluster_bits;
+
+ l1t_idx = get_l1_index(q, offset);
+ if (l1t_idx >= table->table_size)
+ goto error;
+
+ l2t_idx = get_l2_index(q, offset);
+ if (l2t_idx >= l2t_sz)
+ goto error;
+
+ clust_off = get_cluster_offset(q, offset);
+ if (clust_off >= clust_sz)
+ goto error;
+
+ len = clust_sz - clust_off;
+ if (len > src_len)
+ len = src_len;
+
+ l2t = calloc(l2t_sz, sizeof(u64));
+ if (!l2t)
+ goto error;
+
+ l2t_off = table->l1_table[l1t_idx] & ~header->oflag_mask;
+ if (l2t_off) {
+ if (pread_in_full(q->fd, l2t, l2t_sz * sizeof(u64), l2t_off) < 0)
+ goto free_l2;
+ } else {
+ /* capture the state of the consistent QCOW image */
+ f_sz = file_size(q->fd);
+ if (!f_sz)
+ goto free_l2;
+
+ /* Write the l2 table of 0's at the end of the file */
+ l2t_off = qcow1_write_l2_table(q, l2t);
+ if (!l2t_off)
+ goto free_l2;
+
+ /* Metadata update: update on disk level 1 table */
+ t = cpu_to_be64(l2t_off);
+ if (pwrite_sync(q->fd, &t, sizeof(t), header->l1_table_offset +
+ l1t_idx * sizeof(u64)) < 0) {
+ /* restore file to consistent state */
+ if (ftruncate(q->fd, f_sz) < 0)
+ goto free_l2;
+ goto free_l2;
+ }
+
+ /* update the in-core entry */
+ table->l1_table[l1t_idx] = l2t_off;
+ }
+
+ /* capture the state of the consistent QCOW image */
+ f_sz = file_size(q->fd);
+ if (!f_sz)
+ goto free_l2;
+
+ clust_start = be64_to_cpu(l2t[l2t_idx]) & ~header->oflag_mask;
+ free(l2t);
+ if (!clust_start) {
+ clust_start = ALIGN(f_sz, clust_sz);
+ update_meta = true;
+ }
+
+ /* write actual data */
+ if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
+ goto error;
+
+ if (update_meta) {
+ t = cpu_to_be64(clust_start);
+ if (pwrite_sync(q->fd, &t, sizeof(t), l2t_off +
+ l2t_idx * sizeof(u64)) < 0) {
+ /* restore the file to consistent state */
+ if (ftruncate(q->fd, f_sz) < 0)
+ goto error;
+ goto error;
+ }
+ }
+ return len;
+free_l2:
+ free(l2t);
+error:
+ return -1;
+}
+
static int qcow1_write_sector(struct disk_image *disk, u64 sector, void *src, u32 src_len)
{
+ struct qcow *q = disk->priv;
+ struct qcow_header *header = q->header;
+ char *buf = src;
+ ssize_t nr_write;
+ u64 offset;
+ ssize_t nr;
+
+ nr_write = 0;
+ offset = sector << SECTOR_SHIFT;
+ while (nr_write < src_len) {
+ if (offset >= header->size)
+ goto error;
+
+ nr = qcow1_write_cluster(q, offset, buf, src_len - nr_write);
+ if (nr < 0)
+ goto error;
+
+ nr_write += nr;
+ buf += nr;
+ offset += nr;
+ }
+ return 0;
+error:
return -1;
}