Linux-EROFS Archive mirror
 help / color / mirror / Atom feed
* [PATCH] erofs-utils: mkfs: write lcluster index after compression
@ 2023-11-06 12:58 Yifan Zhao
  2023-11-07  9:23 ` [PATCH v2] erofs-utils: mkfs: generate on-disk indexes " Yifan Zhao
  0 siblings, 1 reply; 7+ messages in thread
From: Yifan Zhao @ 2023-11-06 12:58 UTC (permalink / raw
  To: linux-erofs; +Cc: Yifan Zhao

Currently mkfs writes the lcluster indexes(with on-disk format) in an
extent by extent manner during the compression of a file, which is
inflexible if we want to modify the indexes later in the multi-threaded
compression scenario.

In order to support the multi-threaded compression feature of mkfs,
this patch moves the writing of the lcluster indexes to take place after
the file's compression is completed.

Signed-off-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn>
---
 include/erofs/dedupe.h |   4 +-
 lib/compress.c         | 366 ++++++++++++++++++++++++-----------------
 lib/dedupe.c           |   1 +
 3 files changed, 223 insertions(+), 148 deletions(-)

diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
index 153bd4c..a3e365b 100644
--- a/include/erofs/dedupe.h
+++ b/include/erofs/dedupe.h
@@ -16,7 +16,9 @@ struct z_erofs_inmem_extent {
 	erofs_blk_t blkaddr;
 	unsigned int compressedblks;
 	unsigned int length;
-	bool raw, partial;
+	bool raw, partial, reset_clusterofs;
+
+	struct list_head list;
 };
 
 struct z_erofs_dedupe_ctx {
diff --git a/lib/compress.c b/lib/compress.c
index 4eac363..07ba186 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -30,7 +30,7 @@ struct erofs_compress_cfg {
 
 struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
-	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+	struct list_head elist;		/* (lookahead) extent list */
 
 	struct erofs_inode *inode;
 	struct erofs_compress_cfg *ccfg;
@@ -49,121 +49,151 @@ struct z_erofs_vle_compress_ctx {
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	Z_EROFS_FULL_INDEX_ALIGN(0)
 
-static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_update_clusterofs(struct z_erofs_vle_compress_ctx *ctx)
 {
-	const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
-	struct z_erofs_lcluster_index di;
+	struct z_erofs_inmem_extent *e;
+	unsigned int blksz = erofs_blksiz(ctx->inode->sbi);
+	unsigned int offset;
+
+	if (list_empty(&ctx->elist))
+		return;
 
-	if (!ctx->clusterofs)
+	e = list_last_entry(&ctx->elist, struct z_erofs_inmem_extent, list);
+	if (e->length == 0)
 		return;
 
-	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
-	di.di_u.blkaddr = 0;
-	di.di_advise = cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
+	offset = e->length + ctx->clusterofs;
 
-	memcpy(ctx->metacur, &di, sizeof(di));
-	ctx->metacur += sizeof(di);
+	if (offset < blksz)
+		ctx->clusterofs = 0;
+	else
+		ctx->clusterofs = offset % blksz;
 }
 
 static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 {
 	struct erofs_inode *inode = ctx->inode;
 	struct erofs_sb_info *sbi = inode->sbi;
-	unsigned int clusterofs = ctx->clusterofs;
-	unsigned int count = ctx->e.length;
-	unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
+	struct z_erofs_inmem_extent *e, *n;
+	unsigned int clusterofs = 0;
+	unsigned int count;
+	unsigned int d0, d1;
 	struct z_erofs_lcluster_index di;
 	unsigned int type, advise;
 
-	if (!count)
+	if (list_empty(&ctx->elist))
 		return;
+	
+	list_for_each_entry_safe(e, n, &ctx->elist, list) {
+		count = e->length;
+		if (!count)
+			goto free_entry;
 
-	ctx->e.length = 0;	/* mark as written first */
-	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
+		if (e->reset_clusterofs)
+			clusterofs = 0;
 
-	/* whether the tail-end (un)compressed block or not */
-	if (!d1) {
-		/*
-		 * A lcluster cannot have three parts with the middle one which
-		 * is well-compressed for !ztailpacking cases.
-		 */
-		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
-		DBG_BUGON(ctx->e.partial);
-		type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
-			Z_EROFS_LCLUSTER_TYPE_HEAD1;
-		advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
-		di.di_advise = cpu_to_le16(advise);
-
-		if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-		    !ctx->e.compressedblks)
-			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
-		else
-			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
-		memcpy(ctx->metacur, &di, sizeof(di));
-		ctx->metacur += sizeof(di);
+		d0 = 0;
+		d1 = (clusterofs + count) / erofs_blksiz(sbi);
 
-		/* don't add the final index if the tail-end block exists */
-		ctx->clusterofs = 0;
-		return;
-	}
-
-	do {
-		advise = 0;
-		/* XXX: big pcluster feature should be per-inode */
-		if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
-			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
-						       Z_EROFS_LI_D0_CBLKCNT);
-			di.di_u.delta[1] = cpu_to_le16(d1);
-		} else if (d0) {
-			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
+		di.di_clusterofs = cpu_to_le16(clusterofs);
 
+		/* whether the tail-end (un)compressed block or not */
+		if (!d1) {
 			/*
-			 * If the |Z_EROFS_VLE_DI_D0_CBLKCNT| bit is set, parser
-			 * will interpret |delta[0]| as size of pcluster, rather
-			 * than distance to last head cluster. Normally this
-			 * isn't a problem, because uncompressed extent size are
-			 * below Z_EROFS_VLE_DI_D0_CBLKCNT * BLOCK_SIZE = 8MB.
-			 * But with large pcluster it's possible to go over this
-			 * number, resulting in corrupted compressed indices.
-			 * To solve this, we replace d0 with
-			 * Z_EROFS_VLE_DI_D0_CBLKCNT-1.
-			 */
-			if (d0 >= Z_EROFS_LI_D0_CBLKCNT)
-				di.di_u.delta[0] = cpu_to_le16(
-						Z_EROFS_LI_D0_CBLKCNT - 1);
-			else
-				di.di_u.delta[0] = cpu_to_le16(d0);
-			di.di_u.delta[1] = cpu_to_le16(d1);
-		} else {
-			type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+			* A lcluster cannot have three parts with the middle one which
+			* is well-compressed for !ztailpacking cases.
+			*/
+			DBG_BUGON(!e->raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+			DBG_BUGON(e->partial);
+			type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
 				Z_EROFS_LCLUSTER_TYPE_HEAD1;
+			advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+			di.di_advise = cpu_to_le16(advise);
 
 			if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-			    !ctx->e.compressedblks)
+				!e->compressedblks)
 				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 			else
-				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+				di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
+			memcpy(ctx->metacur, &di, sizeof(di));
+			ctx->metacur += sizeof(di);
 
-			if (ctx->e.partial) {
-				DBG_BUGON(ctx->e.raw);
-				advise |= Z_EROFS_LI_PARTIAL_REF;
-			}
+			/* don't add the final index if the tail-end block exists */
+			clusterofs = 0;
+			goto free_entry;
 		}
-		advise |= type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
-		di.di_advise = cpu_to_le16(advise);
 
-		memcpy(ctx->metacur, &di, sizeof(di));
-		ctx->metacur += sizeof(di);
+		do {
+			advise = 0;
+			/* XXX: big pcluster feature should be per-inode */
+			if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
+				type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
+				di.di_u.delta[0] = cpu_to_le16(e->compressedblks |
+								Z_EROFS_LI_D0_CBLKCNT);
+				di.di_u.delta[1] = cpu_to_le16(d1);
+			} else if (d0) {
+				type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
+
+				/*
+				* If the |Z_EROFS_VLE_DI_D0_CBLKCNT| bit is set, parser
+				* will interpret |delta[0]| as size of pcluster, rather
+				* than distance to last head cluster. Normally this
+				* isn't a problem, because uncompressed extent size are
+				* below Z_EROFS_VLE_DI_D0_CBLKCNT * BLOCK_SIZE = 8MB.
+				* But with large pcluster it's possible to go over this
+				* number, resulting in corrupted compressed indices.
+				* To solve this, we replace d0 with
+				* Z_EROFS_VLE_DI_D0_CBLKCNT-1.
+				*/
+				if (d0 >= Z_EROFS_LI_D0_CBLKCNT)
+					di.di_u.delta[0] = cpu_to_le16(
+							Z_EROFS_LI_D0_CBLKCNT - 1);
+				else
+					di.di_u.delta[0] = cpu_to_le16(d0);
+				di.di_u.delta[1] = cpu_to_le16(d1);
+			} else {
+				type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+					Z_EROFS_LCLUSTER_TYPE_HEAD1;
 
-		count -= erofs_blksiz(sbi) - clusterofs;
-		clusterofs = 0;
+				if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+					!e->compressedblks)
+					di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
+				else
+					di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 
-		++d0;
-		--d1;
-	} while (clusterofs + count >= erofs_blksiz(sbi));
+				if (e->partial) {
+					DBG_BUGON(e->raw);
+					advise |= Z_EROFS_LI_PARTIAL_REF;
+				}
+			}
+			advise |= type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
+			di.di_advise = cpu_to_le16(advise);
 
-	ctx->clusterofs = clusterofs + count;
+			memcpy(ctx->metacur, &di, sizeof(di));
+			ctx->metacur += sizeof(di);
+
+			count -= erofs_blksiz(sbi) - clusterofs;
+			clusterofs = 0;
+
+			++d0;
+			--d1;
+		} while (clusterofs + count >= erofs_blksiz(sbi));
+
+		clusterofs = count;
+
+free_entry:
+		list_del(&e->list);
+		free(e);
+	}
+
+	if (clusterofs) {
+		di.di_clusterofs = cpu_to_le16(clusterofs);
+		di.di_u.blkaddr = 0;
+		di.di_advise = cpu_to_le16(Z_EROFS_LCLUSTER_TYPE_PLAIN <<
+					   Z_EROFS_LI_LCLUSTER_TYPE_BIT);
+		memcpy(ctx->metacur, &di, sizeof(di));
+		ctx->metacur += sizeof(di);
+	}
 }
 
 static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
@@ -172,8 +202,19 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	struct erofs_inode *inode = ctx->inode;
 	const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
 	struct erofs_sb_info *sbi = inode->sbi;
+	struct z_erofs_inmem_extent *e, *newe;
+	int elen = 0;
 	int ret = 0;
 
+	if (list_empty(&ctx->elist)) {
+		e = NULL;
+		elen = 0;
+	} else {
+		e = list_last_entry(&ctx->elist, struct z_erofs_inmem_extent,
+				    list);
+		elen = e->length;
+	}
+
 	/*
 	 * No need dedupe for packed inode since it is composed of
 	 * fragments which have already been deduplicated.
@@ -184,12 +225,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	do {
 		struct z_erofs_dedupe_ctx dctx = {
 			.start = ctx->queue + ctx->head - ({ int rc;
-				if (ctx->e.length <= erofs_blksiz(sbi))
+				if (elen <= erofs_blksiz(sbi))
 					rc = 0;
-				else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+				else if (elen - erofs_blksiz(sbi) >= ctx->head)
 					rc = ctx->head;
 				else
-					rc = ctx->e.length - erofs_blksiz(sbi);
+					rc = elen - erofs_blksiz(sbi);
 				rc; }),
 			.end = ctx->queue + ctx->head + *len,
 			.cur = ctx->queue + ctx->head,
@@ -206,25 +247,25 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		 * decompresssion could be done as another try in practice.
 		 */
 		if (dctx.e.compressedblks > 1 &&
-		    ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+		    ((ctx->clusterofs + elen - delta) & lclustermask) +
 			dctx.e.length < 2 * (lclustermask + 1))
 			break;
 
 		if (delta) {
 			DBG_BUGON(delta < 0);
-			DBG_BUGON(!ctx->e.length);
+			DBG_BUGON(!e);
 
 			/*
 			 * For big pcluster dedupe, if we decide to shorten the
 			 * previous big pcluster, make sure that the previous
 			 * CBLKCNT is still kept.
 			 */
-			if (ctx->e.compressedblks > 1 &&
-			    (ctx->clusterofs & lclustermask) + ctx->e.length
+			if (e->compressedblks > 1 &&
+			    (ctx->clusterofs & lclustermask) + e->length
 				- delta < 2 * (lclustermask + 1))
 				break;
-			ctx->e.partial = true;
-			ctx->e.length -= delta;
+			e->partial = true;
+			e->length -= delta;
 		}
 
 		/* fall back to noncompact indexes for deduplication */
@@ -237,8 +278,16 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
 			  dctx.e.length, dctx.e.raw ? "un" : "",
 			  delta, dctx.e.blkaddr, dctx.e.compressedblks);
-		z_erofs_write_indexes(ctx);
-		ctx->e = dctx.e;
+		z_erofs_update_clusterofs(ctx);
+
+		newe = malloc(sizeof(*newe));
+		if (!newe) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(newe, &dctx.e, sizeof(*newe));
+		list_add_tail(&newe->list, &ctx->elist);
+
 		ctx->head += dctx.e.length - delta;
 		DBG_BUGON(*len < dctx.e.length - delta);
 		*len -= dctx.e.length - delta;
@@ -258,12 +307,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	} while (*len);
 
 out:
-	z_erofs_write_indexes(ctx);
+	z_erofs_update_clusterofs(ctx);
 	return ret;
 }
 
 static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
-				     unsigned int *len, char *dst)
+				     unsigned int *len, char *dst, struct z_erofs_inmem_extent *e)
 {
 	int ret;
 	struct erofs_sb_info *sbi = ctx->inode->sbi;
@@ -275,6 +324,7 @@ static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
 		ctx->head -= ctx->clusterofs;
 		*len += ctx->clusterofs;
 		ctx->clusterofs = 0;
+		e->reset_clusterofs = true;
 	}
 
 	count = min(erofs_blksiz(sbi), *len);
@@ -400,10 +450,11 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 	unsigned int blksz = erofs_blksiz(sbi);
 	char *const dst = dstbuf + blksz;
 	struct erofs_compress *const h = &ctx->ccfg->handle;
+	struct z_erofs_inmem_extent *e = NULL;
 	unsigned int len = ctx->tail - ctx->head;
 	bool is_packed_inode = erofs_is_packed_inode(inode);
 	bool final = !ctx->remaining;
-	int ret;
+	int ret = 0;
 
 	while (len) {
 		bool may_packing = (cfg.c_fragments && final &&
@@ -416,39 +467,44 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 		if (z_erofs_compress_dedupe(ctx, &len) && !final)
 			break;
 
+		e = malloc(sizeof(*e));
+		if (!e)
+			return -ENOMEM;
+		e->reset_clusterofs = false;
+
 		if (len <= ctx->pclustersize) {
 			if (!final || !len)
-				break;
+				goto free_extent;
 			if (may_packing) {
 				if (inode->fragment_size && !fix_dedupedfrag) {
 					ctx->pclustersize = roundup(len, blksz);
 					goto fix_dedupedfrag;
 				}
-				ctx->e.length = len;
+				e->length = len;
 				goto frag_packing;
 			}
 			if (!may_inline && len <= blksz)
 				goto nocompression;
 		}
 
-		ctx->e.length = min(len,
+		e->length = min(len,
 				cfg.c_max_decompressed_extent_bytes);
 
 		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-				&ctx->e.length, dst, ctx->pclustersize);
+				&e->length, dst, ctx->pclustersize);
 		if (ret <= 0) {
 			erofs_err("failed to compress %s: %s", inode->i_srcpath,
 				  erofs_strerror(ret));
-			return ret;
+			goto free_extent;
 		}
 
 		compressedsize = ret;
 		/* even compressed size is smaller, there is no real gain */
-		if (!(may_inline && ctx->e.length == len && ret < blksz))
+		if (!(may_inline && e->length == len && ret < blksz))
 			ret = roundup(ret, blksz);
 
 		/* check if there is enough gain to keep the compressed data */
-		if (ret * h->compress_threshold / 100 >= ctx->e.length) {
+		if (ret * h->compress_threshold / 100 >= e->length) {
 			if (may_inline && len < blksz) {
 				ret = z_erofs_fill_inline_data(inode,
 						ctx->queue + ctx->head,
@@ -457,21 +513,21 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 				may_inline = false;
 				may_packing = false;
 nocompression:
-				ret = write_uncompressed_extent(ctx, &len, dst);
+				ret = write_uncompressed_extent(ctx, &len, dst, e);
 			}
 
 			if (ret < 0)
-				return ret;
-			ctx->e.length = ret;
+				goto free_extent;
+			e->length = ret;
 
 			/*
 			 * XXX: For now, we have to leave `ctx->compressedblks
 			 * = 1' since there is no way to generate compressed
 			 * indexes after the time that ztailpacking is decided.
 			 */
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = true;
-		} else if (may_packing && len == ctx->e.length &&
+			e->compressedblks = 1;
+			e->raw = true;
+		} else if (may_packing && len == e->length &&
 			   compressedsize < ctx->pclustersize &&
 			   (!inode->fragment_size || fix_dedupedfrag)) {
 frag_packing:
@@ -479,18 +535,20 @@ frag_packing:
 						     ctx->queue + ctx->head,
 						     len, ctx->tof_chksum);
 			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 0; /* indicate a fragment */
-			ctx->e.raw = false;
+				goto free_extent;
+			e->compressedblks = 0; /* indicate a fragment */
+			e->raw = false;
 			ctx->fragemitted = true;
 			fix_dedupedfrag = false;
 		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == ctx->e.length &&
+		} else if (may_inline && len == e->length &&
 			   compressedsize < blksz) {
 			if (ctx->clusterofs + len <= blksz) {
 				inode->eof_tailraw = malloc(len);
-				if (!inode->eof_tailraw)
-					return -ENOMEM;
+				if (!inode->eof_tailraw) {
+					ret = -ENOMEM;
+					goto free_extent;
+				}
 
 				memcpy(inode->eof_tailraw,
 				       ctx->queue + ctx->head, len);
@@ -500,9 +558,9 @@ frag_packing:
 			ret = z_erofs_fill_inline_data(inode, dst,
 					compressedsize, false);
 			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = false;
+				goto free_extent;
+			e->compressedblks = 1;
+			e->raw = false;
 		} else {
 			unsigned int tailused, padding;
 
@@ -513,7 +571,7 @@ frag_packing:
 			 * filled up. Fix up the fragment if succeeds.
 			 * Otherwise, just drop it and go to packing.
 			 */
-			if (may_packing && len == ctx->e.length &&
+			if (may_packing && len == e->length &&
 			    (compressedsize & (blksz - 1)) &&
 			    ctx->tail < sizeof(ctx->queue)) {
 				ctx->pclustersize =
@@ -521,15 +579,15 @@ frag_packing:
 				goto fix_dedupedfrag;
 			}
 
-			if (may_inline && len == ctx->e.length)
+			if (may_inline && len == e->length)
 				tryrecompress_trailing(ctx, h,
 						ctx->queue + ctx->head,
-						&ctx->e.length, dst,
+						&e->length, dst,
 						&compressedsize);
 
-			ctx->e.compressedblks = BLK_ROUND_UP(sbi, compressedsize);
-			DBG_BUGON(ctx->e.compressedblks * blksz >=
-				  ctx->e.length);
+			e->compressedblks = BLK_ROUND_UP(sbi, compressedsize);
+			DBG_BUGON(e->compressedblks * blksz >=
+				  e->length);
 
 			padding = 0;
 			tailused = compressedsize & (blksz - 1);
@@ -544,25 +602,27 @@ frag_packing:
 
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  ctx->e.length, ctx->blkaddr,
-				  ctx->e.compressedblks);
+				  e->length, ctx->blkaddr,
+				  e->compressedblks);
 
 			ret = blk_write(sbi, dst - padding, ctx->blkaddr,
-					ctx->e.compressedblks);
+					e->compressedblks);
 			if (ret)
-				return ret;
-			ctx->e.raw = false;
+				goto free_extent;
+			e->raw = false;
 			may_inline = false;
 			may_packing = false;
 		}
-		ctx->e.partial = false;
-		ctx->e.blkaddr = ctx->blkaddr;
+		e->partial = false;
+		e->blkaddr = ctx->blkaddr;
 		if (!may_inline && !may_packing && !is_packed_inode)
-			(void)z_erofs_dedupe_insert(&ctx->e,
+			(void)z_erofs_dedupe_insert(e,
 						    ctx->queue + ctx->head);
-		ctx->blkaddr += ctx->e.compressedblks;
-		ctx->head += ctx->e.length;
-		len -= ctx->e.length;
+		ctx->blkaddr += e->compressedblks;
+		ctx->head += e->length;
+		len -= e->length;
+
+		list_add_tail(&e->list, &ctx->elist);
 
 		if (fix_dedupedfrag &&
 		    z_erofs_fixup_deduped_fragment(ctx, len))
@@ -585,9 +645,14 @@ frag_packing:
 fix_dedupedfrag:
 	DBG_BUGON(!inode->fragment_size);
 	ctx->remaining += inode->fragment_size;
-	ctx->e.length = 0;
 	ctx->fix_dedupedfrag = true;
+	e->length = 0;
+	list_add_tail(&e->list, &ctx->elist);
 	return 0;
+
+free_extent:
+	free(e);
+	return ret;
 }
 
 struct z_erofs_compressindex_vec {
@@ -874,6 +939,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	erofs_blk_t blkaddr, compressed_blocks;
 	unsigned int legacymetasize;
 	int ret;
+	struct z_erofs_inmem_extent *e;
 	struct erofs_sb_info *sbi = inode->sbi;
 	u8 *compressmeta = malloc(BLK_ROUND_UP(sbi, inode->i_size) *
 				  sizeof(struct z_erofs_lcluster_index) +
@@ -942,10 +1008,10 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
-	ctx.e.length = 0;
 	ctx.remaining = inode->i_size - inode->fragment_size;
 	ctx.fix_dedupedfrag = false;
 	ctx.fragemitted = false;
+	init_list_head(&ctx.elist);
 	if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) &&
 	    !inode->fragment_size) {
 		ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum);
@@ -978,17 +1044,23 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 
 	/* generate an extent for the deduplicated fragment */
 	if (inode->fragment_size && !ctx.fragemitted) {
-		z_erofs_write_indexes(&ctx);
-		ctx.e.length = inode->fragment_size;
-		ctx.e.compressedblks = 0;
-		ctx.e.raw = false;
-		ctx.e.partial = false;
-		ctx.e.blkaddr = ctx.blkaddr;
+		z_erofs_update_clusterofs(&ctx);
+		e = malloc(sizeof(*e));
+		if (!e) {
+			ret = -ENOMEM;
+			goto err_free_idata;
+		}
+		e->length = inode->fragment_size;
+		e->compressedblks = 0;
+		e->raw = false;
+		e->partial = false;
+		e->reset_clusterofs = false;
+		e->blkaddr = ctx.blkaddr;
+		list_add_tail(&e->list, &ctx.elist);
 	}
 	z_erofs_fragments_commit(inode);
 
 	z_erofs_write_indexes(&ctx);
-	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (!inode->fragment_size &&
diff --git a/lib/dedupe.c b/lib/dedupe.c
index 19a1c8d..993c7a3 100644
--- a/lib/dedupe.c
+++ b/lib/dedupe.c
@@ -138,6 +138,7 @@ int z_erofs_dedupe_match(struct z_erofs_dedupe_ctx *ctx)
 		ctx->e.partial = e->partial ||
 			(window_size + extra < e->original_length);
 		ctx->e.raw = e->raw;
+		ctx->e.reset_clusterofs = false;
 		ctx->e.blkaddr = e->compressed_blkaddr;
 		ctx->e.compressedblks = e->compressed_blks;
 		return 0;
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2] erofs-utils: mkfs: generate on-disk indexes after compression
  2023-11-06 12:58 [PATCH] erofs-utils: mkfs: write lcluster index after compression Yifan Zhao
@ 2023-11-07  9:23 ` Yifan Zhao
  2023-11-23 11:22   ` Gao Xiang
  2023-12-18 14:57   ` [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill() Gao Xiang
  0 siblings, 2 replies; 7+ messages in thread
From: Yifan Zhao @ 2023-11-07  9:23 UTC (permalink / raw
  To: linux-erofs; +Cc: Yifan Zhao

Currently, mkfs generates the on-disk indexes of each compressed extent
on the fly during compressing, which is inflexible if we'd like to merge
sub-indexes of a file later for the multi-threaded compression scenarios.

Let's generate on-disk indexes after the compression for the file is
completed.

Signed-off-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn>
---
Change since v1:
1. Polish commit message.
2. Iterate the extent list in a wrapper function to avoid massive change
   of indentation.
3. Remove the redundant reset_clusterofs field in struct z_erofs_inmem_extent.
   Instead, we check that if an uncompressed lcluster is aligned to 4K boundary
   and reset clusterofs to 0 during on-disk indexes generation.
   Note that this modification requires the `ctx->head >= ctx->clusterofs`
   being always true (lib/compress.c:328). Please reconfirm it as I may miss
   some corner cases.

 include/erofs/dedupe.h |   1 +
 lib/compress.c         | 253 ++++++++++++++++++++++++++---------------
 2 files changed, 163 insertions(+), 91 deletions(-)

diff --git a/include/erofs/dedupe.h b/include/erofs/dedupe.h
index 153bd4c..3a413bf 100644
--- a/include/erofs/dedupe.h
+++ b/include/erofs/dedupe.h
@@ -17,6 +17,7 @@ struct z_erofs_inmem_extent {
 	unsigned int compressedblks;
 	unsigned int length;
 	bool raw, partial;
+	struct list_head list;
 };
 
 struct z_erofs_dedupe_ctx {
diff --git a/lib/compress.c b/lib/compress.c
index 4eac363..42aa5ed 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -30,7 +30,7 @@ struct erofs_compress_cfg {
 
 struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
-	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+	struct list_head elist;		/* (lookahead) extent list */
 
 	struct erofs_inode *inode;
 	struct erofs_compress_cfg *ccfg;
@@ -49,36 +49,36 @@ struct z_erofs_vle_compress_ctx {
 
 #define Z_EROFS_LEGACY_MAP_HEADER_SIZE	Z_EROFS_FULL_INDEX_ALIGN(0)
 
-static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_update_clusterofs(struct z_erofs_vle_compress_ctx *ctx)
 {
-	const unsigned int type = Z_EROFS_LCLUSTER_TYPE_PLAIN;
-	struct z_erofs_lcluster_index di;
+	struct z_erofs_inmem_extent *e;
+	unsigned int blksz = erofs_blksiz(ctx->inode->sbi);
+	unsigned int offset;
 
-	if (!ctx->clusterofs)
+	if (list_empty(&ctx->elist))
 		return;
 
-	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
-	di.di_u.blkaddr = 0;
-	di.di_advise = cpu_to_le16(type << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
+	e = list_last_entry(&ctx->elist, struct z_erofs_inmem_extent, list);
+	if (e->length == 0)
+		return;
 
-	memcpy(ctx->metacur, &di, sizeof(di));
-	ctx->metacur += sizeof(di);
+	offset = e->length + ctx->clusterofs;
+	ctx->clusterofs = (offset < blksz) ? 0 : offset % blksz;
 }
 
-static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_write_index(struct z_erofs_vle_compress_ctx *ctx,
+				struct z_erofs_inmem_extent *e)
 {
 	struct erofs_inode *inode = ctx->inode;
 	struct erofs_sb_info *sbi = inode->sbi;
-	unsigned int clusterofs = ctx->clusterofs;
-	unsigned int count = ctx->e.length;
-	unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
+	unsigned int count = e->length;
+	unsigned int d0 = 0, d1 = (ctx->clusterofs + count) / erofs_blksiz(sbi);
 	struct z_erofs_lcluster_index di;
 	unsigned int type, advise;
 
 	if (!count)
 		return;
 
-	ctx->e.length = 0;	/* mark as written first */
 	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 
 	/* whether the tail-end (un)compressed block or not */
@@ -87,18 +87,18 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		 * A lcluster cannot have three parts with the middle one which
 		 * is well-compressed for !ztailpacking cases.
 		 */
-		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
-		DBG_BUGON(ctx->e.partial);
-		type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+		DBG_BUGON(!e->raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+		DBG_BUGON(e->partial);
+		type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
 			Z_EROFS_LCLUSTER_TYPE_HEAD1;
 		advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
 		di.di_advise = cpu_to_le16(advise);
 
 		if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-		    !ctx->e.compressedblks)
+			!e->compressedblks)
 			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 		else
-			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+			di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -112,7 +112,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		/* XXX: big pcluster feature should be per-inode */
 		if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
 			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
+			di.di_u.delta[0] = cpu_to_le16(e->compressedblks |
 						       Z_EROFS_LI_D0_CBLKCNT);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else if (d0) {
@@ -136,17 +136,17 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 				di.di_u.delta[0] = cpu_to_le16(d0);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else {
-			type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+			type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
 				Z_EROFS_LCLUSTER_TYPE_HEAD1;
 
 			if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-			    !ctx->e.compressedblks)
+				!e->compressedblks)
 				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 			else
-				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+				di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 
-			if (ctx->e.partial) {
-				DBG_BUGON(ctx->e.raw);
+			if (e->partial) {
+				DBG_BUGON(e->raw);
 				advise |= Z_EROFS_LI_PARTIAL_REF;
 			}
 		}
@@ -156,14 +156,49 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
-		count -= erofs_blksiz(sbi) - clusterofs;
-		clusterofs = 0;
+		count -= erofs_blksiz(sbi) - ctx->clusterofs;
+		ctx->clusterofs = 0;
 
 		++d0;
 		--d1;
-	} while (clusterofs + count >= erofs_blksiz(sbi));
+	} while (ctx->clusterofs + count >= erofs_blksiz(sbi));
+
+	ctx->clusterofs = count;
+}
+
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+{
+	struct z_erofs_inmem_extent *e, *n;
+	struct z_erofs_lcluster_index di;
+
+	if (list_empty(&ctx->elist))
+		return;
+
+	ctx->clusterofs = 0;
+
+	list_for_each_entry_safe(e, n, &ctx->elist, list) {
+		/*  
+		 * Uncompressed lcluster is aligned to lcluster boundary if
+		 * 0padding is disabled. See write_uncompressed_extent().
+		 */
+		if (!erofs_sb_has_lz4_0padding(ctx->inode->sbi) && e->raw &&
+		    ctx->clusterofs)
+			ctx->clusterofs = 0;
+
+		z_erofs_write_index(ctx, e);
+
+		list_del(&e->list);
+		free(e);
+	}
 
-	ctx->clusterofs = clusterofs + count;
+	if (ctx->clusterofs) {
+		di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
+		di.di_u.blkaddr = 0;
+		di.di_advise = cpu_to_le16(Z_EROFS_LCLUSTER_TYPE_PLAIN
+					   << Z_EROFS_LI_LCLUSTER_TYPE_BIT);
+		memcpy(ctx->metacur, &di, sizeof(di));
+		ctx->metacur += sizeof(di);
+	}
 }
 
 static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
@@ -172,8 +207,19 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	struct erofs_inode *inode = ctx->inode;
 	const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
 	struct erofs_sb_info *sbi = inode->sbi;
+	struct z_erofs_inmem_extent *e, *newe;
+	int elen = 0;
 	int ret = 0;
 
+	if (list_empty(&ctx->elist)) {
+		e = NULL;
+		elen = 0;
+	} else {
+		e = list_last_entry(&ctx->elist, struct z_erofs_inmem_extent,
+				    list);
+		elen = e->length;
+	}
+
 	/*
 	 * No need dedupe for packed inode since it is composed of
 	 * fragments which have already been deduplicated.
@@ -184,12 +230,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	do {
 		struct z_erofs_dedupe_ctx dctx = {
 			.start = ctx->queue + ctx->head - ({ int rc;
-				if (ctx->e.length <= erofs_blksiz(sbi))
+				if (elen <= erofs_blksiz(sbi))
 					rc = 0;
-				else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+				else if (elen - erofs_blksiz(sbi) >= ctx->head)
 					rc = ctx->head;
 				else
-					rc = ctx->e.length - erofs_blksiz(sbi);
+					rc = elen - erofs_blksiz(sbi);
 				rc; }),
 			.end = ctx->queue + ctx->head + *len,
 			.cur = ctx->queue + ctx->head,
@@ -206,25 +252,25 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		 * decompresssion could be done as another try in practice.
 		 */
 		if (dctx.e.compressedblks > 1 &&
-		    ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+		    ((ctx->clusterofs + elen - delta) & lclustermask) +
 			dctx.e.length < 2 * (lclustermask + 1))
 			break;
 
 		if (delta) {
 			DBG_BUGON(delta < 0);
-			DBG_BUGON(!ctx->e.length);
+			DBG_BUGON(!e);
 
 			/*
 			 * For big pcluster dedupe, if we decide to shorten the
 			 * previous big pcluster, make sure that the previous
 			 * CBLKCNT is still kept.
 			 */
-			if (ctx->e.compressedblks > 1 &&
-			    (ctx->clusterofs & lclustermask) + ctx->e.length
+			if (e->compressedblks > 1 &&
+			    (ctx->clusterofs & lclustermask) + e->length
 				- delta < 2 * (lclustermask + 1))
 				break;
-			ctx->e.partial = true;
-			ctx->e.length -= delta;
+			e->partial = true;
+			e->length -= delta;
 		}
 
 		/* fall back to noncompact indexes for deduplication */
@@ -237,8 +283,16 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
 			  dctx.e.length, dctx.e.raw ? "un" : "",
 			  delta, dctx.e.blkaddr, dctx.e.compressedblks);
-		z_erofs_write_indexes(ctx);
-		ctx->e = dctx.e;
+		z_erofs_update_clusterofs(ctx);
+
+		newe = malloc(sizeof(*newe));
+		if (!newe) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memcpy(newe, &dctx.e, sizeof(*newe));
+		list_add_tail(&newe->list, &ctx->elist);
+
 		ctx->head += dctx.e.length - delta;
 		DBG_BUGON(*len < dctx.e.length - delta);
 		*len -= dctx.e.length - delta;
@@ -258,7 +312,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	} while (*len);
 
 out:
-	z_erofs_write_indexes(ctx);
+	z_erofs_update_clusterofs(ctx);
 	return ret;
 }
 
@@ -400,10 +454,11 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 	unsigned int blksz = erofs_blksiz(sbi);
 	char *const dst = dstbuf + blksz;
 	struct erofs_compress *const h = &ctx->ccfg->handle;
+	struct z_erofs_inmem_extent *e = NULL;
 	unsigned int len = ctx->tail - ctx->head;
 	bool is_packed_inode = erofs_is_packed_inode(inode);
 	bool final = !ctx->remaining;
-	int ret;
+	int ret = 0;
 
 	while (len) {
 		bool may_packing = (cfg.c_fragments && final &&
@@ -416,39 +471,43 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 		if (z_erofs_compress_dedupe(ctx, &len) && !final)
 			break;
 
+		e = malloc(sizeof(*e));
+		if (!e)
+			return -ENOMEM;
+
 		if (len <= ctx->pclustersize) {
 			if (!final || !len)
-				break;
+				goto free_extent;
 			if (may_packing) {
 				if (inode->fragment_size && !fix_dedupedfrag) {
 					ctx->pclustersize = roundup(len, blksz);
 					goto fix_dedupedfrag;
 				}
-				ctx->e.length = len;
+				e->length = len;
 				goto frag_packing;
 			}
 			if (!may_inline && len <= blksz)
 				goto nocompression;
 		}
 
-		ctx->e.length = min(len,
+		e->length = min(len,
 				cfg.c_max_decompressed_extent_bytes);
 
 		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-				&ctx->e.length, dst, ctx->pclustersize);
+				&e->length, dst, ctx->pclustersize);
 		if (ret <= 0) {
 			erofs_err("failed to compress %s: %s", inode->i_srcpath,
 				  erofs_strerror(ret));
-			return ret;
+			goto free_extent;
 		}
 
 		compressedsize = ret;
 		/* even compressed size is smaller, there is no real gain */
-		if (!(may_inline && ctx->e.length == len && ret < blksz))
+		if (!(may_inline && e->length == len && ret < blksz))
 			ret = roundup(ret, blksz);
 
 		/* check if there is enough gain to keep the compressed data */
-		if (ret * h->compress_threshold / 100 >= ctx->e.length) {
+		if (ret * h->compress_threshold / 100 >= e->length) {
 			if (may_inline && len < blksz) {
 				ret = z_erofs_fill_inline_data(inode,
 						ctx->queue + ctx->head,
@@ -461,17 +520,17 @@ nocompression:
 			}
 
 			if (ret < 0)
-				return ret;
-			ctx->e.length = ret;
+				goto free_extent;
+			e->length = ret;
 
 			/*
 			 * XXX: For now, we have to leave `ctx->compressedblks
 			 * = 1' since there is no way to generate compressed
 			 * indexes after the time that ztailpacking is decided.
 			 */
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = true;
-		} else if (may_packing && len == ctx->e.length &&
+			e->compressedblks = 1;
+			e->raw = true;
+		} else if (may_packing && len == e->length &&
 			   compressedsize < ctx->pclustersize &&
 			   (!inode->fragment_size || fix_dedupedfrag)) {
 frag_packing:
@@ -479,18 +538,20 @@ frag_packing:
 						     ctx->queue + ctx->head,
 						     len, ctx->tof_chksum);
 			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 0; /* indicate a fragment */
-			ctx->e.raw = false;
+				goto free_extent;
+			e->compressedblks = 0; /* indicate a fragment */
+			e->raw = false;
 			ctx->fragemitted = true;
 			fix_dedupedfrag = false;
 		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == ctx->e.length &&
+		} else if (may_inline && len == e->length &&
 			   compressedsize < blksz) {
 			if (ctx->clusterofs + len <= blksz) {
 				inode->eof_tailraw = malloc(len);
-				if (!inode->eof_tailraw)
-					return -ENOMEM;
+				if (!inode->eof_tailraw) {
+					ret = -ENOMEM;
+					goto free_extent;
+				}
 
 				memcpy(inode->eof_tailraw,
 				       ctx->queue + ctx->head, len);
@@ -500,9 +561,9 @@ frag_packing:
 			ret = z_erofs_fill_inline_data(inode, dst,
 					compressedsize, false);
 			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = false;
+				goto free_extent;
+			e->compressedblks = 1;
+			e->raw = false;
 		} else {
 			unsigned int tailused, padding;
 
@@ -513,7 +574,7 @@ frag_packing:
 			 * filled up. Fix up the fragment if succeeds.
 			 * Otherwise, just drop it and go to packing.
 			 */
-			if (may_packing && len == ctx->e.length &&
+			if (may_packing && len == e->length &&
 			    (compressedsize & (blksz - 1)) &&
 			    ctx->tail < sizeof(ctx->queue)) {
 				ctx->pclustersize =
@@ -521,15 +582,14 @@ frag_packing:
 				goto fix_dedupedfrag;
 			}
 
-			if (may_inline && len == ctx->e.length)
+			if (may_inline && len == e->length)
 				tryrecompress_trailing(ctx, h,
 						ctx->queue + ctx->head,
-						&ctx->e.length, dst,
+						&e->length, dst,
 						&compressedsize);
 
-			ctx->e.compressedblks = BLK_ROUND_UP(sbi, compressedsize);
-			DBG_BUGON(ctx->e.compressedblks * blksz >=
-				  ctx->e.length);
+			e->compressedblks = BLK_ROUND_UP(sbi, compressedsize);
+			DBG_BUGON(e->compressedblks * blksz >= e->length);
 
 			padding = 0;
 			tailused = compressedsize & (blksz - 1);
@@ -544,25 +604,26 @@ frag_packing:
 
 			/* write compressed data */
 			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  ctx->e.length, ctx->blkaddr,
-				  ctx->e.compressedblks);
+				  e->length, ctx->blkaddr,
+				  e->compressedblks);
 
 			ret = blk_write(sbi, dst - padding, ctx->blkaddr,
-					ctx->e.compressedblks);
+					e->compressedblks);
 			if (ret)
-				return ret;
-			ctx->e.raw = false;
+				goto free_extent;
+			e->raw = false;
 			may_inline = false;
 			may_packing = false;
 		}
-		ctx->e.partial = false;
-		ctx->e.blkaddr = ctx->blkaddr;
+		e->partial = false;
+		e->blkaddr = ctx->blkaddr;
 		if (!may_inline && !may_packing && !is_packed_inode)
-			(void)z_erofs_dedupe_insert(&ctx->e,
-						    ctx->queue + ctx->head);
-		ctx->blkaddr += ctx->e.compressedblks;
-		ctx->head += ctx->e.length;
-		len -= ctx->e.length;
+			(void)z_erofs_dedupe_insert(e, ctx->queue + ctx->head);
+		ctx->blkaddr += e->compressedblks;
+		ctx->head += e->length;
+		len -= e->length;
+
+		list_add_tail(&e->list, &ctx->elist);
 
 		if (fix_dedupedfrag &&
 		    z_erofs_fixup_deduped_fragment(ctx, len))
@@ -585,9 +646,14 @@ frag_packing:
 fix_dedupedfrag:
 	DBG_BUGON(!inode->fragment_size);
 	ctx->remaining += inode->fragment_size;
-	ctx->e.length = 0;
 	ctx->fix_dedupedfrag = true;
+	e->length = 0;
+	list_add_tail(&e->list, &ctx->elist);
 	return 0;
+
+free_extent:
+	free(e);
+	return ret;
 }
 
 struct z_erofs_compressindex_vec {
@@ -942,10 +1008,10 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
-	ctx.e.length = 0;
 	ctx.remaining = inode->i_size - inode->fragment_size;
 	ctx.fix_dedupedfrag = false;
 	ctx.fragemitted = false;
+	init_list_head(&ctx.elist);
 	if (cfg.c_all_fragments && !erofs_is_packed_inode(inode) &&
 	    !inode->fragment_size) {
 		ret = z_erofs_pack_file_from_fd(inode, fd, ctx.tof_chksum);
@@ -978,17 +1044,22 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 
 	/* generate an extent for the deduplicated fragment */
 	if (inode->fragment_size && !ctx.fragemitted) {
-		z_erofs_write_indexes(&ctx);
-		ctx.e.length = inode->fragment_size;
-		ctx.e.compressedblks = 0;
-		ctx.e.raw = false;
-		ctx.e.partial = false;
-		ctx.e.blkaddr = ctx.blkaddr;
+		struct z_erofs_inmem_extent *e = malloc(sizeof(*e));
+		if (!e) {
+			ret = -ENOMEM;
+			goto err_free_idata;
+		}
+		z_erofs_update_clusterofs(&ctx);
+		e->length = inode->fragment_size;
+		e->compressedblks = 0;
+		e->raw = false;
+		e->partial = false;
+		e->blkaddr = ctx.blkaddr;
+		list_add_tail(&e->list, &ctx.elist);
 	}
 	z_erofs_fragments_commit(inode);
 
 	z_erofs_write_indexes(&ctx);
-	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (!inode->fragment_size &&
-- 
2.42.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] erofs-utils: mkfs: generate on-disk indexes after compression
  2023-11-07  9:23 ` [PATCH v2] erofs-utils: mkfs: generate on-disk indexes " Yifan Zhao
@ 2023-11-23 11:22   ` Gao Xiang
  2023-12-18 14:57   ` [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill() Gao Xiang
  1 sibling, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2023-11-23 11:22 UTC (permalink / raw
  To: Yifan Zhao, linux-erofs

Hi Yifan,

On 2023/11/7 17:23, Yifan Zhao wrote:
> Currently, mkfs generates the on-disk indexes of each compressed extent
> on the fly during compressing, which is inflexible if we'd like to merge
> sub-indexes of a file later for the multi-threaded compression scenarios.
> 
> Let's generate on-disk indexes after the compression for the file is
> completed.
> 
> Signed-off-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn>
> ---

Sorry for the late reply.  There are some issues on my side.
In order to make this work efficient.  Let me to refine this commit as well.

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill()
  2023-11-07  9:23 ` [PATCH v2] erofs-utils: mkfs: generate on-disk indexes " Yifan Zhao
  2023-11-23 11:22   ` Gao Xiang
@ 2023-12-18 14:57   ` Gao Xiang
  2023-12-18 14:57     ` [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one() Gao Xiang
  2023-12-18 14:57     ` [PATCH v3 3/3] erofs-utils: lib: generate compression indexes in memory first Gao Xiang
  1 sibling, 2 replies; 7+ messages in thread
From: Gao Xiang @ 2023-12-18 14:57 UTC (permalink / raw
  To: linux-erofs; +Cc: Gao Xiang

Let's remove redundant logic.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/compress.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index 61328ed..a5ef6e4 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -166,6 +166,22 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->clusterofs = clusterofs + count;
 }
 
+static bool z_erofs_need_refill(struct z_erofs_vle_compress_ctx *ctx)
+{
+	const bool final = !ctx->remaining;
+	unsigned int qh_aligned, qh_after;
+
+	if (final || ctx->head < EROFS_CONFIG_COMPR_MAX_SZ)
+		return false;
+
+	qh_aligned = round_down(ctx->head, erofs_blksiz(ctx->inode->sbi));
+	qh_after = ctx->head - qh_aligned;
+	memmove(ctx->queue, ctx->queue + qh_aligned, ctx->tail - qh_aligned);
+	ctx->tail -= qh_aligned;
+	ctx->head = qh_after;
+	return true;
+}
+
 static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 				   unsigned int *len)
 {
@@ -243,15 +259,7 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		DBG_BUGON(*len < dctx.e.length - delta);
 		*len -= dctx.e.length - delta;
 
-		if (ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
-			const unsigned int qh_aligned =
-				round_down(ctx->head, erofs_blksiz(sbi));
-			const unsigned int qh_after = ctx->head - qh_aligned;
-
-			memmove(ctx->queue, ctx->queue + qh_aligned,
-				*len + qh_after);
-			ctx->head = qh_after;
-			ctx->tail = qh_after + *len;
+		if (z_erofs_need_refill(ctx)) {
 			ret = -EAGAIN;
 			break;
 		}
@@ -413,7 +421,7 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 		bool fix_dedupedfrag = ctx->fix_dedupedfrag;
 		unsigned int compressedsize;
 
-		if (z_erofs_compress_dedupe(ctx, &len) && !final)
+		if (z_erofs_compress_dedupe(ctx, &len))
 			break;
 
 		if (len <= ctx->pclustersize) {
@@ -568,17 +576,8 @@ frag_packing:
 		    z_erofs_fixup_deduped_fragment(ctx, len))
 			break;
 
-		if (!final && ctx->head >= EROFS_CONFIG_COMPR_MAX_SZ) {
-			const unsigned int qh_aligned =
-				round_down(ctx->head, blksz);
-			const unsigned int qh_after = ctx->head - qh_aligned;
-
-			memmove(ctx->queue, ctx->queue + qh_aligned,
-				len + qh_after);
-			ctx->head = qh_after;
-			ctx->tail = qh_after + len;
+		if (z_erofs_need_refill(ctx))
 			break;
-		}
 	}
 	return 0;
 
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one()
  2023-12-18 14:57   ` [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill() Gao Xiang
@ 2023-12-18 14:57     ` Gao Xiang
  2023-12-19  6:00       ` Yue Hu
  2023-12-18 14:57     ` [PATCH v3 3/3] erofs-utils: lib: generate compression indexes in memory first Gao Xiang
  1 sibling, 1 reply; 7+ messages in thread
From: Gao Xiang @ 2023-12-18 14:57 UTC (permalink / raw
  To: linux-erofs; +Cc: Gao Xiang

Split compression for each extent into a new helper for later reworking.

Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/compress.c | 297 ++++++++++++++++++++++++-------------------------
 1 file changed, 147 insertions(+), 150 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index a5ef6e4..eafbad1 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -400,7 +400,8 @@ static bool z_erofs_fixup_deduped_fragment(struct z_erofs_vle_compress_ctx *ctx,
 	return true;
 }
 
-static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
+static int __z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx,
+				  struct z_erofs_inmem_extent *e)
 {
 	static char dstbuf[EROFS_CONFIG_COMPR_MAX_SZ + EROFS_MAX_BLOCK_SIZE];
 	struct erofs_inode *inode = ctx->inode;
@@ -411,181 +412,177 @@ static int vle_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 	unsigned int len = ctx->tail - ctx->head;
 	bool is_packed_inode = erofs_is_packed_inode(inode);
 	bool final = !ctx->remaining;
+	bool may_packing = (cfg.c_fragments && final && !is_packed_inode);
+	bool may_inline = (cfg.c_ztailpacking && final && !may_packing);
+	unsigned int compressedsize;
 	int ret;
 
-	while (len) {
-		bool may_packing = (cfg.c_fragments && final &&
-				   !is_packed_inode);
-		bool may_inline = (cfg.c_ztailpacking && final &&
-				  !may_packing);
-		bool fix_dedupedfrag = ctx->fix_dedupedfrag;
-		unsigned int compressedsize;
-
-		if (z_erofs_compress_dedupe(ctx, &len))
-			break;
-
-		if (len <= ctx->pclustersize) {
-			if (!final || !len)
-				break;
-			if (may_packing) {
-				if (inode->fragment_size && !fix_dedupedfrag) {
-					ctx->pclustersize = roundup(len, blksz);
-					goto fix_dedupedfrag;
-				}
-				ctx->e.length = len;
-				goto frag_packing;
+	if (len <= ctx->pclustersize) {
+		if (!final || !len)
+			return 1;
+		if (may_packing) {
+			if (inode->fragment_size && !ctx->fix_dedupedfrag) {
+				ctx->pclustersize = roundup(len, blksz);
+				goto fix_dedupedfrag;
 			}
-			if (!may_inline && len <= blksz)
-				goto nocompression;
+			e->length = len;
+			goto frag_packing;
 		}
+		if (!may_inline && len <= blksz)
+			goto nocompression;
+	}
 
-		ctx->e.length = min(len,
-				cfg.c_max_decompressed_extent_bytes);
+	e->length = min(len, cfg.c_max_decompressed_extent_bytes);
+	ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
+				      &e->length, dst, ctx->pclustersize);
+	if (ret <= 0) {
+		erofs_err("failed to compress %s: %s", inode->i_srcpath,
+			  erofs_strerror(ret));
+		return ret;
+	}
 
-		ret = erofs_compress_destsize(h, ctx->queue + ctx->head,
-				&ctx->e.length, dst, ctx->pclustersize);
-		if (ret <= 0) {
-			erofs_err("failed to compress %s: %s", inode->i_srcpath,
-				  erofs_strerror(ret));
-			return ret;
-		}
+	compressedsize = ret;
+	/* even compressed size is smaller, there is no real gain */
+	if (!(may_inline && e->length == len && ret < blksz))
+		ret = roundup(ret, blksz);
 
-		compressedsize = ret;
-		/* even compressed size is smaller, there is no real gain */
-		if (!(may_inline && ctx->e.length == len && ret < blksz))
-			ret = roundup(ret, blksz);
-
-		/* check if there is enough gain to keep the compressed data */
-		if (ret * h->compress_threshold / 100 >= ctx->e.length) {
-			if (may_inline && len < blksz) {
-				ret = z_erofs_fill_inline_data(inode,
-						ctx->queue + ctx->head,
-						len, true);
-			} else {
-				may_inline = false;
-				may_packing = false;
+	/* check if there is enough gain to keep the compressed data */
+	if (ret * h->compress_threshold / 100 >= e->length) {
+		if (may_inline && len < blksz) {
+			ret = z_erofs_fill_inline_data(inode,
+					ctx->queue + ctx->head, len, true);
+		} else {
+			may_inline = false;
+			may_packing = false;
 nocompression:
-				ret = write_uncompressed_extent(ctx, &len, dst);
-			}
+			ret = write_uncompressed_extent(ctx, &len, dst);
+		}
 
-			if (ret < 0)
-				return ret;
-			ctx->e.length = ret;
+		if (ret < 0)
+			return ret;
+		e->length = ret;
 
-			/*
-			 * XXX: For now, we have to leave `ctx->compressedblks
-			 * = 1' since there is no way to generate compressed
-			 * indexes after the time that ztailpacking is decided.
-			 */
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = true;
-		} else if (may_packing && len == ctx->e.length &&
-			   compressedsize < ctx->pclustersize &&
-			   (!inode->fragment_size || fix_dedupedfrag)) {
+		/*
+		 * XXX: For now, we have to leave `ctx->compressedblk = 1'
+		 * since there is no way to generate compressed indexes after
+		 * the time that ztailpacking is decided.
+		 */
+		e->compressedblks = 1;
+		e->raw = true;
+	} else if (may_packing && len == e->length &&
+		   compressedsize < ctx->pclustersize &&
+		   (!inode->fragment_size || ctx->fix_dedupedfrag)) {
 frag_packing:
-			ret = z_erofs_pack_fragments(inode,
-						     ctx->queue + ctx->head,
-						     len, ctx->tof_chksum);
-			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 0; /* indicate a fragment */
-			ctx->e.raw = false;
-			ctx->fragemitted = true;
-			fix_dedupedfrag = false;
-		/* tailpcluster should be less than 1 block */
-		} else if (may_inline && len == ctx->e.length &&
-			   compressedsize < blksz) {
-			if (ctx->clusterofs + len <= blksz) {
-				inode->eof_tailraw = malloc(len);
-				if (!inode->eof_tailraw)
-					return -ENOMEM;
-
-				memcpy(inode->eof_tailraw,
-				       ctx->queue + ctx->head, len);
-				inode->eof_tailrawsize = len;
-			}
-
-			ret = z_erofs_fill_inline_data(inode, dst,
-					compressedsize, false);
-			if (ret < 0)
-				return ret;
-			ctx->e.compressedblks = 1;
-			ctx->e.raw = false;
-		} else {
-			unsigned int tailused, padding;
+		ret = z_erofs_pack_fragments(inode, ctx->queue + ctx->head,
+					     len, ctx->tof_chksum);
+		if (ret < 0)
+			return ret;
+		e->compressedblks = 0; /* indicate a fragment */
+		e->raw = false;
+		ctx->fragemitted = true;
+	/* tailpcluster should be less than 1 block */
+	} else if (may_inline && len == e->length && compressedsize < blksz) {
+		if (ctx->clusterofs + len <= blksz) {
+			inode->eof_tailraw = malloc(len);
+			if (!inode->eof_tailraw)
+				return -ENOMEM;
+
+			memcpy(inode->eof_tailraw, ctx->queue + ctx->head, len);
+			inode->eof_tailrawsize = len;
+		}
 
-			/*
-			 * If there's space left for the last round when
-			 * deduping fragments, try to read the fragment and
-			 * recompress a little more to check whether it can be
-			 * filled up. Fix up the fragment if succeeds.
-			 * Otherwise, just drop it and go to packing.
-			 */
-			if (may_packing && len == ctx->e.length &&
-			    (compressedsize & (blksz - 1)) &&
-			    ctx->tail < sizeof(ctx->queue)) {
-				ctx->pclustersize =
-					roundup(compressedsize, blksz);
-				goto fix_dedupedfrag;
-			}
+		ret = z_erofs_fill_inline_data(inode, dst,
+				compressedsize, false);
+		if (ret < 0)
+			return ret;
+		e->compressedblks = 1;
+		e->raw = false;
+	} else {
+		unsigned int tailused, padding;
 
-			if (may_inline && len == ctx->e.length)
-				tryrecompress_trailing(ctx, h,
-						ctx->queue + ctx->head,
-						&ctx->e.length, dst,
-						&compressedsize);
+		/*
+		 * If there's space left for the last round when deduping
+		 * fragments, try to read the fragment and recompress a little
+		 * more to check whether it can be filled up.  Fix the fragment
+		 * if succeeds.  Otherwise, just drop it and go on packing.
+		 */
+		if (may_packing && len == e->length &&
+		    (compressedsize & (blksz - 1)) &&
+		    ctx->tail < sizeof(ctx->queue)) {
+			ctx->pclustersize = roundup(compressedsize, blksz);
+			goto fix_dedupedfrag;
+		}
 
-			ctx->e.compressedblks = BLK_ROUND_UP(sbi, compressedsize);
-			DBG_BUGON(ctx->e.compressedblks * blksz >=
-				  ctx->e.length);
+		if (may_inline && len == e->length)
+			tryrecompress_trailing(ctx, h, ctx->queue + ctx->head,
+					&e->length, dst, &compressedsize);
 
-			padding = 0;
-			tailused = compressedsize & (blksz - 1);
-			if (tailused)
-				padding = blksz - tailused;
-
-			/* zero out garbage trailing data for non-0padding */
-			if (!erofs_sb_has_lz4_0padding(sbi)) {
-				memset(dst + compressedsize, 0, padding);
-				padding = 0;
-			}
+		e->compressedblks = BLK_ROUND_UP(sbi, compressedsize);
+		DBG_BUGON(e->compressedblks * blksz >= e->length);
 
-			/* write compressed data */
-			erofs_dbg("Writing %u compressed data to %u of %u blocks",
-				  ctx->e.length, ctx->blkaddr,
-				  ctx->e.compressedblks);
+		padding = 0;
+		tailused = compressedsize & (blksz - 1);
+		if (tailused)
+			padding = blksz - tailused;
 
-			ret = blk_write(sbi, dst - padding, ctx->blkaddr,
-					ctx->e.compressedblks);
-			if (ret)
-				return ret;
-			ctx->e.raw = false;
-			may_inline = false;
-			may_packing = false;
+		/* zero out garbage trailing data for non-0padding */
+		if (!erofs_sb_has_lz4_0padding(sbi)) {
+			memset(dst + compressedsize, 0, padding);
+			padding = 0;
 		}
-		ctx->e.partial = false;
-		ctx->e.blkaddr = ctx->blkaddr;
-		if (!may_inline && !may_packing && !is_packed_inode)
-			(void)z_erofs_dedupe_insert(&ctx->e,
-						    ctx->queue + ctx->head);
-		ctx->blkaddr += ctx->e.compressedblks;
-		ctx->head += ctx->e.length;
-		len -= ctx->e.length;
 
-		if (fix_dedupedfrag &&
-		    z_erofs_fixup_deduped_fragment(ctx, len))
-			break;
+		/* write compressed data */
+		erofs_dbg("Writing %u compressed data to %u of %u blocks",
+			  e->length, ctx->blkaddr, e->compressedblks);
 
-		if (z_erofs_need_refill(ctx))
-			break;
+		ret = blk_write(sbi, dst - padding, ctx->blkaddr,
+				e->compressedblks);
+		if (ret)
+			return ret;
+		e->raw = false;
+		may_inline = false;
+		may_packing = false;
 	}
+	e->partial = false;
+	e->blkaddr = ctx->blkaddr;
+	if (!may_inline && !may_packing && !is_packed_inode)
+		(void)z_erofs_dedupe_insert(e, ctx->queue + ctx->head);
+	ctx->blkaddr += e->compressedblks;
+	ctx->head += e->length;
 	return 0;
 
 fix_dedupedfrag:
 	DBG_BUGON(!inode->fragment_size);
 	ctx->remaining += inode->fragment_size;
-	ctx->e.length = 0;
+	e->length = 0;
 	ctx->fix_dedupedfrag = true;
+	return 1;
+}
+
+static int z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx)
+{
+	unsigned int len = ctx->tail - ctx->head;
+	int ret;
+
+	while (len) {
+		if (z_erofs_compress_dedupe(ctx, &len))
+			break;
+
+		ret = __z_erofs_compress_one(ctx, &ctx->e);
+		if (ret) {
+			if (ret > 0)
+				break;		/* need more data */
+			return ret;
+		}
+
+		len -= ctx->e.length;
+		if (ctx->fix_dedupedfrag && !ctx->fragemitted &&
+		    z_erofs_fixup_deduped_fragment(ctx, len))
+			break;
+
+		if (z_erofs_need_refill(ctx))
+			break;
+	}
 	return 0;
 }
 
@@ -964,7 +961,7 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 			ctx.remaining -= rx;
 			ctx.tail += rx;
 
-			ret = vle_compress_one(&ctx);
+			ret = z_erofs_compress_one(&ctx);
 			if (ret)
 				goto err_free_idata;
 		}
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v3 3/3] erofs-utils: lib: generate compression indexes in memory first
  2023-12-18 14:57   ` [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill() Gao Xiang
  2023-12-18 14:57     ` [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one() Gao Xiang
@ 2023-12-18 14:57     ` Gao Xiang
  1 sibling, 0 replies; 7+ messages in thread
From: Gao Xiang @ 2023-12-18 14:57 UTC (permalink / raw
  To: linux-erofs; +Cc: Gao Xiang, Yifan Zhao

From: Yifan Zhao <zhaoyifan@sjtu.edu.cn>

Currently, mkfs generates the on-disk indexes of each compressed extent
on the fly during compressing, which is inflexible if we'd like to merge
sub-indexes of a file later for the multi-threaded scenarios.

Let's generate on-disk indexes after the compression is completed.

Signed-off-by: Yifan Zhao <zhaoyifan@sjtu.edu.cn>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
---
 lib/compress.c | 185 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 121 insertions(+), 64 deletions(-)

diff --git a/lib/compress.c b/lib/compress.c
index eafbad1..8f61f92 100644
--- a/lib/compress.c
+++ b/lib/compress.c
@@ -28,9 +28,15 @@ struct erofs_compress_cfg {
 	bool enable;
 } erofs_ccfg[EROFS_MAX_COMPR_CFGS];
 
+struct z_erofs_extent_item {
+	struct list_head list;
+	struct z_erofs_inmem_extent e;
+};
+
 struct z_erofs_vle_compress_ctx {
 	u8 queue[EROFS_CONFIG_COMPR_MAX_SZ * 2];
-	struct z_erofs_inmem_extent e;	/* (lookahead) extent */
+	struct list_head extents;
+	struct z_erofs_extent_item *pivot;
 
 	struct erofs_inode *inode;
 	struct erofs_compress_cfg *ccfg;
@@ -65,20 +71,18 @@ static void z_erofs_write_indexes_final(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->metacur += sizeof(di);
 }
 
-static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+static void z_erofs_write_extent(struct z_erofs_vle_compress_ctx *ctx,
+				 struct z_erofs_inmem_extent *e)
 {
 	struct erofs_inode *inode = ctx->inode;
 	struct erofs_sb_info *sbi = inode->sbi;
 	unsigned int clusterofs = ctx->clusterofs;
-	unsigned int count = ctx->e.length;
+	unsigned int count = e->length;
 	unsigned int d0 = 0, d1 = (clusterofs + count) / erofs_blksiz(sbi);
 	struct z_erofs_lcluster_index di;
 	unsigned int type, advise;
 
-	if (!count)
-		return;
-
-	ctx->e.length = 0;	/* mark as written first */
+	DBG_BUGON(!count);
 	di.di_clusterofs = cpu_to_le16(ctx->clusterofs);
 
 	/* whether the tail-end (un)compressed block or not */
@@ -87,18 +91,18 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		 * A lcluster cannot have three parts with the middle one which
 		 * is well-compressed for !ztailpacking cases.
 		 */
-		DBG_BUGON(!ctx->e.raw && !cfg.c_ztailpacking && !cfg.c_fragments);
-		DBG_BUGON(ctx->e.partial);
-		type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+		DBG_BUGON(!e->raw && !cfg.c_ztailpacking && !cfg.c_fragments);
+		DBG_BUGON(e->partial);
+		type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
 			Z_EROFS_LCLUSTER_TYPE_HEAD1;
 		advise = type << Z_EROFS_LI_LCLUSTER_TYPE_BIT;
 		di.di_advise = cpu_to_le16(advise);
 
 		if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-		    !ctx->e.compressedblks)
+		    !e->compressedblks)
 			di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 		else
-			di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+			di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 		memcpy(ctx->metacur, &di, sizeof(di));
 		ctx->metacur += sizeof(di);
 
@@ -112,7 +116,7 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 		/* XXX: big pcluster feature should be per-inode */
 		if (d0 == 1 && erofs_sb_has_big_pcluster(sbi)) {
 			type = Z_EROFS_LCLUSTER_TYPE_NONHEAD;
-			di.di_u.delta[0] = cpu_to_le16(ctx->e.compressedblks |
+			di.di_u.delta[0] = cpu_to_le16(e->compressedblks |
 						       Z_EROFS_LI_D0_CBLKCNT);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else if (d0) {
@@ -136,17 +140,17 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 				di.di_u.delta[0] = cpu_to_le16(d0);
 			di.di_u.delta[1] = cpu_to_le16(d1);
 		} else {
-			type = ctx->e.raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
+			type = e->raw ? Z_EROFS_LCLUSTER_TYPE_PLAIN :
 				Z_EROFS_LCLUSTER_TYPE_HEAD1;
 
 			if (inode->datalayout == EROFS_INODE_COMPRESSED_FULL &&
-			    !ctx->e.compressedblks)
+			    !e->compressedblks)
 				di.di_u.blkaddr = cpu_to_le32(inode->fragmentoff >> 32);
 			else
-				di.di_u.blkaddr = cpu_to_le32(ctx->e.blkaddr);
+				di.di_u.blkaddr = cpu_to_le32(e->blkaddr);
 
-			if (ctx->e.partial) {
-				DBG_BUGON(ctx->e.raw);
+			if (e->partial) {
+				DBG_BUGON(e->raw);
 				advise |= Z_EROFS_LI_PARTIAL_REF;
 			}
 		}
@@ -166,6 +170,20 @@ static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
 	ctx->clusterofs = clusterofs + count;
 }
 
+static void z_erofs_write_indexes(struct z_erofs_vle_compress_ctx *ctx)
+{
+	struct z_erofs_extent_item *ei, *n;
+
+	ctx->clusterofs = 0;
+	list_for_each_entry_safe(ei, n, &ctx->extents, list) {
+		z_erofs_write_extent(ctx, &ei->e);
+
+		list_del(&ei->list);
+		free(ei);
+	}
+	z_erofs_write_indexes_final(ctx);
+}
+
 static bool z_erofs_need_refill(struct z_erofs_vle_compress_ctx *ctx)
 {
 	const bool final = !ctx->remaining;
@@ -182,13 +200,25 @@ static bool z_erofs_need_refill(struct z_erofs_vle_compress_ctx *ctx)
 	return true;
 }
 
+static void z_erofs_commit_extent(struct z_erofs_vle_compress_ctx *ctx,
+				  struct z_erofs_extent_item *ei)
+{
+	list_add_tail(&ei->list, &ctx->extents);
+	ctx->clusterofs = (ctx->clusterofs + ei->e.length) &
+			(erofs_blksiz(ctx->inode->sbi) - 1);
+
+}
+
 static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 				   unsigned int *len)
 {
 	struct erofs_inode *inode = ctx->inode;
 	const unsigned int lclustermask = (1 << inode->z_logical_clusterbits) - 1;
 	struct erofs_sb_info *sbi = inode->sbi;
-	int ret = 0;
+	struct z_erofs_extent_item *ei = ctx->pivot;
+
+	if (!ei)
+		return 0;
 
 	/*
 	 * No need dedupe for packed inode since it is composed of
@@ -200,12 +230,12 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 	do {
 		struct z_erofs_dedupe_ctx dctx = {
 			.start = ctx->queue + ctx->head - ({ int rc;
-				if (ctx->e.length <= erofs_blksiz(sbi))
+				if (ei->e.length <= erofs_blksiz(sbi))
 					rc = 0;
-				else if (ctx->e.length - erofs_blksiz(sbi) >= ctx->head)
+				else if (ei->e.length - erofs_blksiz(sbi) >= ctx->head)
 					rc = ctx->head;
 				else
-					rc = ctx->e.length - erofs_blksiz(sbi);
+					rc = ei->e.length - erofs_blksiz(sbi);
 				rc; }),
 			.end = ctx->queue + ctx->head + *len,
 			.cur = ctx->queue + ctx->head,
@@ -222,25 +252,31 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		 * decompresssion could be done as another try in practice.
 		 */
 		if (dctx.e.compressedblks > 1 &&
-		    ((ctx->clusterofs + ctx->e.length - delta) & lclustermask) +
+		    ((ctx->clusterofs + ei->e.length - delta) & lclustermask) +
 			dctx.e.length < 2 * (lclustermask + 1))
 			break;
 
+		ctx->pivot = malloc(sizeof(struct z_erofs_extent_item));
+		if (!ctx->pivot) {
+			z_erofs_commit_extent(ctx, ei);
+			return -ENOMEM;
+		}
+
 		if (delta) {
 			DBG_BUGON(delta < 0);
-			DBG_BUGON(!ctx->e.length);
+			DBG_BUGON(!ei->e.length);
 
 			/*
 			 * For big pcluster dedupe, if we decide to shorten the
 			 * previous big pcluster, make sure that the previous
 			 * CBLKCNT is still kept.
 			 */
-			if (ctx->e.compressedblks > 1 &&
-			    (ctx->clusterofs & lclustermask) + ctx->e.length
+			if (ei->e.compressedblks > 1 &&
+			    (ctx->clusterofs & lclustermask) + ei->e.length
 				- delta < 2 * (lclustermask + 1))
 				break;
-			ctx->e.partial = true;
-			ctx->e.length -= delta;
+			ei->e.partial = true;
+			ei->e.length -= delta;
 		}
 
 		/* fall back to noncompact indexes for deduplication */
@@ -253,39 +289,32 @@ static int z_erofs_compress_dedupe(struct z_erofs_vle_compress_ctx *ctx,
 		erofs_dbg("Dedupe %u %scompressed data (delta %d) to %u of %u blocks",
 			  dctx.e.length, dctx.e.raw ? "un" : "",
 			  delta, dctx.e.blkaddr, dctx.e.compressedblks);
-		z_erofs_write_indexes(ctx);
-		ctx->e = dctx.e;
+
+		z_erofs_commit_extent(ctx, ei);
+		ei = ctx->pivot;
+		init_list_head(&ei->list);
+		ei->e = dctx.e;
+
 		ctx->head += dctx.e.length - delta;
 		DBG_BUGON(*len < dctx.e.length - delta);
 		*len -= dctx.e.length - delta;
 
-		if (z_erofs_need_refill(ctx)) {
-			ret = -EAGAIN;
-			break;
-		}
+		if (z_erofs_need_refill(ctx))
+			return 1;
 	} while (*len);
-
 out:
-	z_erofs_write_indexes(ctx);
-	return ret;
+	z_erofs_commit_extent(ctx, ei);
+	ctx->pivot = NULL;
+	return 0;
 }
 
 static int write_uncompressed_extent(struct z_erofs_vle_compress_ctx *ctx,
-				     unsigned int *len, char *dst)
+				     unsigned int len, char *dst)
 {
-	int ret;
 	struct erofs_sb_info *sbi = ctx->inode->sbi;
-	unsigned int count, interlaced_offset, rightpart;
-
-	/* reset clusterofs to 0 if permitted */
-	if (!erofs_sb_has_lz4_0padding(sbi) && ctx->clusterofs &&
-	    ctx->head >= ctx->clusterofs) {
-		ctx->head -= ctx->clusterofs;
-		*len += ctx->clusterofs;
-		ctx->clusterofs = 0;
-	}
-
-	count = min(erofs_blksiz(sbi), *len);
+	unsigned int count = min(erofs_blksiz(sbi), len);
+	unsigned int interlaced_offset, rightpart;
+	int ret;
 
 	/* write interlaced uncompressed data if needed */
 	if (ctx->inode->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER)
@@ -455,7 +484,8 @@ static int __z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx,
 			may_inline = false;
 			may_packing = false;
 nocompression:
-			ret = write_uncompressed_extent(ctx, &len, dst);
+			/* TODO: reset clusterofs to 0 if permitted */
+			ret = write_uncompressed_extent(ctx, len, dst);
 		}
 
 		if (ret < 0)
@@ -554,7 +584,6 @@ frag_packing:
 fix_dedupedfrag:
 	DBG_BUGON(!inode->fragment_size);
 	ctx->remaining += inode->fragment_size;
-	e->length = 0;
 	ctx->fix_dedupedfrag = true;
 	return 1;
 }
@@ -562,20 +591,32 @@ fix_dedupedfrag:
 static int z_erofs_compress_one(struct z_erofs_vle_compress_ctx *ctx)
 {
 	unsigned int len = ctx->tail - ctx->head;
-	int ret;
+	struct z_erofs_extent_item *ei;
 
 	while (len) {
-		if (z_erofs_compress_dedupe(ctx, &len))
+		int ret = z_erofs_compress_dedupe(ctx, &len);
+
+		if (ret > 0)
 			break;
+		else if (ret < 0)
+			return ret;
 
-		ret = __z_erofs_compress_one(ctx, &ctx->e);
+		DBG_BUGON(ctx->pivot);
+		ei = malloc(sizeof(*ei));
+		if (!ei)
+			return -ENOMEM;
+
+		init_list_head(&ei->list);
+		ret = __z_erofs_compress_one(ctx, &ei->e);
 		if (ret) {
+			free(ei);
 			if (ret > 0)
 				break;		/* need more data */
 			return ret;
 		}
 
-		len -= ctx->e.length;
+		len -= ei->e.length;
+		ctx->pivot = ei;
 		if (ctx->fix_dedupedfrag && !ctx->fragemitted &&
 		    z_erofs_fixup_deduped_fragment(ctx, len))
 			break;
@@ -939,7 +980,8 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	ctx.metacur = compressmeta + Z_EROFS_LEGACY_MAP_HEADER_SIZE;
 	ctx.head = ctx.tail = 0;
 	ctx.clusterofs = 0;
-	ctx.e.length = 0;
+	ctx.pivot = NULL;
+	init_list_head(&ctx.extents);
 	ctx.remaining = inode->i_size - inode->fragment_size;
 	ctx.fix_dedupedfrag = false;
 	ctx.fragemitted = false;
@@ -973,19 +1015,34 @@ int erofs_write_compressed_file(struct erofs_inode *inode, int fd)
 	DBG_BUGON(compressed_blocks < !!inode->idata_size);
 	compressed_blocks -= !!inode->idata_size;
 
+	if (ctx.pivot) {
+		z_erofs_commit_extent(&ctx, ctx.pivot);
+		ctx.pivot = NULL;
+	}
+
 	/* generate an extent for the deduplicated fragment */
 	if (inode->fragment_size && !ctx.fragemitted) {
-		z_erofs_write_indexes(&ctx);
-		ctx.e.length = inode->fragment_size;
-		ctx.e.compressedblks = 0;
-		ctx.e.raw = false;
-		ctx.e.partial = false;
-		ctx.e.blkaddr = ctx.blkaddr;
+		struct z_erofs_extent_item *ei;
+
+		ei = malloc(sizeof(*ei));
+		if (!ei) {
+			ret = -ENOMEM;
+			goto err_free_idata;
+		}
+
+		ei->e = (struct z_erofs_inmem_extent) {
+			.length = inode->fragment_size,
+			.compressedblks = 0,
+			.raw = false,
+			.partial = false,
+			.blkaddr = ctx.blkaddr,
+		};
+		init_list_head(&ei->list);
+		z_erofs_commit_extent(&ctx, ei);
 	}
 	z_erofs_fragments_commit(inode);
 
 	z_erofs_write_indexes(&ctx);
-	z_erofs_write_indexes_final(&ctx);
 	legacymetasize = ctx.metacur - compressmeta;
 	/* estimate if data compression saves space or not */
 	if (!inode->fragment_size &&
-- 
2.39.3


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one()
  2023-12-18 14:57     ` [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one() Gao Xiang
@ 2023-12-19  6:00       ` Yue Hu
  0 siblings, 0 replies; 7+ messages in thread
From: Yue Hu @ 2023-12-19  6:00 UTC (permalink / raw
  To: Gao Xiang; +Cc: huyue2, linux-erofs

On Mon, 18 Dec 2023 22:57:09 +0800
Gao Xiang <hsiangkao@linux.alibaba.com> wrote:

> Split compression for each extent into a new helper for later reworking.
> 
> Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>

Reviewed-by: Yue Hu <huyue2@coolpad.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-12-19  6:01 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-11-06 12:58 [PATCH] erofs-utils: mkfs: write lcluster index after compression Yifan Zhao
2023-11-07  9:23 ` [PATCH v2] erofs-utils: mkfs: generate on-disk indexes " Yifan Zhao
2023-11-23 11:22   ` Gao Xiang
2023-12-18 14:57   ` [PATCH v3 1/3] erofs-utils: lib: add z_erofs_need_refill() Gao Xiang
2023-12-18 14:57     ` [PATCH v3 2/3] erofs-utils: lib: split vle_compress_one() Gao Xiang
2023-12-19  6:00       ` Yue Hu
2023-12-18 14:57     ` [PATCH v3 3/3] erofs-utils: lib: generate compression indexes in memory first Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).