From a6ccf5ae1bf1e6cb3a95cda39aa893be5a851502 Mon Sep 17 00:00:00 2001
From: Stephan Bosch <stephan@rename-it.nl>
Date: Sun, 10 Aug 2008 20:20:54 +0200
Subject: [PATCH] Body: fixed bug in the :raw transform, added much comment to
 the body extraction code and added a first simple test to the testsuite.

---
 Makefile.am                                  |   1 +
 src/lib-sieve/plugins/body/ext-body-common.c | 100 +++++++++++++++----
 src/lib-sieve/plugins/body/tst-body.c        |   9 +-
 tests/extensions/body/basic.svtest           |  38 +++++++
 4 files changed, 126 insertions(+), 22 deletions(-)
 create mode 100644 tests/extensions/body/basic.svtest

diff --git a/Makefile.am b/Makefile.am
index ea0608c65..c9b9a54b1 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -41,6 +41,7 @@ test_cases = \
 	tests/extensions/include/errors.svtest \
 	tests/extensions/imapflags/basic.svtest \
 	tests/extensions/imapflags/hasflag.svtest \
+	tests/extensions/body/basic.svtest \
 	tests/compile/compile.svtest \
 	tests/compile/compile-examples.svtest \
 	tests/compile/errors.svtest
diff --git a/src/lib-sieve/plugins/body/ext-body-common.c b/src/lib-sieve/plugins/body/ext-body-common.c
index afbeb4f36..754c942fc 100644
--- a/src/lib-sieve/plugins/body/ext-body-common.c
+++ b/src/lib-sieve/plugins/body/ext-body-common.c
@@ -26,6 +26,7 @@ struct ext_body_part_cached {
 	const char *decoded_body;
 	size_t raw_body_size;
 	size_t decoded_body_size;
+	
 	bool have_body; /* there's the empty end-of-headers line */
 };
 
@@ -42,8 +43,10 @@ static bool _is_wanted_content_type
 	const char *subtype = strchr(content_type, '/');
 	size_t type_len;
 
-	type_len = subtype == NULL ? strlen(content_type) :
-		(size_t)(subtype - content_type);
+	type_len = ( subtype == NULL ? strlen(content_type) :
+		(size_t)(subtype - content_type) );
+
+	i_assert( wanted_types != NULL );
 
 	for (; *wanted_types != NULL; wanted_types++) {
 		const char *wanted_subtype = strchr(*wanted_types, '/');
@@ -75,21 +78,34 @@ static bool ext_body_get_return_parts
 	unsigned int i, count;
 	struct ext_body_part *return_part;
 
+	/* Check whether any body parts are cached already */
 	body_parts = array_get(&ctx->cached_body_parts, &count);
-	if (count == 0)
+	if ( count == 0 )
 		return FALSE;
 
+	/* Clear result array */
 	array_clear(&ctx->return_body_parts);
+	
+	/* Fill result array with requested content_types */
 	for (i = 0; i < count; i++) {
 		if (!body_parts[i].have_body) {
-			/* doesn't match anything */
+			/* Part has no body; according to RFC this MUST not match to anything and 
+			 * therefore it is not included in the result.
+			 */
 			continue;
 		}
 
+		/* Skip content types that are not requested */
 		if (!_is_wanted_content_type(wanted_types, body_parts[i].content_type))
 			continue;
 
+		/* Add new item to the result */
 		return_part = array_append_space(&ctx->return_body_parts);
+		
+		/* Depending on whether a decoded body part is requested, the appropriate
+		 * cache item is read. If it is missing, this function fails and the cache 
+		 * needs to be completed by ext_body_parts_add_missing().
+		 */
 		if (decode_to_plain) {
 			if (body_parts[i].decoded_body == NULL)
 				return FALSE;
@@ -102,6 +118,7 @@ static bool ext_body_get_return_parts
 			return_part->size = body_parts[i].raw_body_size;
 		}
 	}
+
 	return TRUE;
 }
 
@@ -111,7 +128,12 @@ static void ext_body_part_save
 {
 	buffer_t *buf = ctx->tmp_buffer;
 
+	/* Add terminating NUL to the body part buffer */
 	buffer_append_c(buf, '\0');
+	
+	/* Depending on whether the part is decoded or not store message body in the
+	 * appropriate cache location.
+	 */
 	if ( !decoded ) {
 		body_part->raw_body = p_strdup(ctx->pool, buf->data);
 		body_part->raw_body_size = buf->used - 1;
@@ -120,6 +142,8 @@ static void ext_body_part_save
 		body_part->decoded_body = p_strdup(ctx->pool, buf->data);
 		body_part->decoded_body_size = buf->used - 1;
 	}
+	
+	/* Clear buffer */
 	buffer_set_used_size(buf, 0);
 }
 
@@ -137,6 +161,9 @@ static const char *_parse_content_type(const struct message_header_line *hdr)
 	return str_c(content_type);
 }
 
+/* ext_body_parts_add_missing():
+ *   Add requested message body parts to the cache that are missing. 
+ */
 static bool ext_body_parts_add_missing
 (const struct sieve_message_data *msgdata, struct ext_body_message_context *ctx, 
 	const char * const *content_types, bool decode_to_plain)
@@ -152,36 +179,51 @@ static bool ext_body_parts_add_missing
 	bool save_body = FALSE, have_all;
 	int ret;
 
-	if (ext_body_get_return_parts(ctx, content_types, decode_to_plain))
+	/* First check whether any are missing */
+	if (ext_body_get_return_parts(ctx, content_types, decode_to_plain)) {
+		/* Cache hit; all are present */
 		return TRUE;
+	}
 
-	if (mail_get_stream(msgdata->mail, NULL, NULL, &input) < 0)
+	/* Get the message stream */
+	if ( mail_get_stream(msgdata->mail, NULL, NULL, &input) < 0 )
 		return FALSE;
-	if (mail_get_parts(msgdata->mail, &const_parts) < 0)
+		
+	/* ? */
+	if ( mail_get_parts(msgdata->mail, &const_parts) < 0 )
 		return FALSE;
+		
+	/* FIXME: this looks ugly and scary */
 	parts = (struct message_part *)const_parts;
 
 	buffer_set_used_size(ctx->tmp_buffer, 0);
+	
+	/* Initialize body decoder */
 	decoder = decode_to_plain ? message_decoder_init(FALSE) : NULL;
+	
 	parser = message_parser_init_from_parts(parts, input, 0, 0);
-	while ((ret = message_parser_parse_next_block(parser, &block)) > 0) {
-		if (block.part != prev_part) {
-			if (body_part != NULL && save_body) {
+	while ( (ret = message_parser_parse_next_block(parser, &block)) > 0 ) {
+		if ( block.part != prev_part ) {
+			/* Save previous body part */
+			if ( body_part != NULL && save_body ) {
 				ext_body_part_save(ctx, prev_part, body_part, decoder != NULL);
 			}
+			
+			/* Start processing next */
 			prev_part = block.part;
 			body_part = array_idx_modifiable(&ctx->cached_body_parts, idx);
 			idx++;
 			body_part->content_type = "text/plain";
 		}
-		if (block.hdr != NULL || block.size == 0) {
+		
+		if ( block.hdr != NULL || block.size == 0 ) {
 			/* reading headers */
-			if (decoder != NULL) {
+			if ( decoder != NULL ) {
 				(void)message_decoder_decode_next_block(decoder,
 					&block, &decoded);
 			}
 
-			if (block.hdr == NULL) {
+			if ( block.hdr == NULL ) {
 				/* save bodies only if we have a wanted
 				   content-type */
 				save_body = _is_wanted_content_type
@@ -189,22 +231,30 @@ static bool ext_body_parts_add_missing
 				continue;
 			}
 			
-			if (block.hdr->eoh)
+			/* Encountered the empty line that indicates the end of the headers and 
+			 * the start of the body
+			 */
+			if ( block.hdr->eoh )
 				body_part->have_body = TRUE;
 				
 			/* We're interested of only Content-Type: header */
-			if (strcasecmp(block.hdr->name, "Content-Type") != 0)
+			if ( strcasecmp(block.hdr->name, "Content-Type" ) != 0)
 				continue;
 
-			if (block.hdr->continues) {
+			/* Header can have folding whitespace. Acquire the full value before 
+			 * continuing
+			 */
+			if ( block.hdr->continues ) {
 				block.hdr->use_full_value = TRUE;
 				continue;
 			}
 		
+			/* Parse the content type from the Content-type header */
 			T_BEGIN {
 				body_part->content_type =
 					p_strdup(ctx->pool, _parse_content_type(block.hdr));
 			} T_END;
+			
 			continue;
 		}
 
@@ -222,15 +272,22 @@ static bool ext_body_parts_add_missing
 		}
 	}
 
+	/* Save last body part if necessary */
 	if (body_part != NULL && save_body)
 		ext_body_part_save(ctx, prev_part, body_part, decoder != NULL);
 
+	/* Try to fill the return_body_parts array once more */
 	have_all = ext_body_get_return_parts(ctx, content_types, decode_to_plain);
+	
+	/* This time, failure is a bug */
 	i_assert(have_all);
 
+	/* Cleanup */
 	(void)message_parser_deinit(&parser, &parts);
 	if (decoder != NULL)
 		message_decoder_deinit(&decoder);
+	
+	/* Return status */
 	return ( input->stream_errno == 0 );
 }
 
@@ -240,9 +297,11 @@ static struct ext_body_message_context *ext_body_get_context
 	pool_t pool = sieve_message_context_pool(msgctx);
 	struct ext_body_message_context *ctx;
 	
+	/* Get message context (contains cached message body information) */
 	ctx = (struct ext_body_message_context *)
 		sieve_message_context_extension_get(msgctx, &body_extension);
 	
+	/* Create it if it does not exist already */
 	if ( ctx == NULL ) {
 		ctx = p_new(pool, struct ext_body_message_context, 1);	
 		ctx->pool = pool;
@@ -250,8 +309,8 @@ static struct ext_body_message_context *ext_body_get_context
 		p_array_init(&ctx->return_body_parts, pool, 8);
 		ctx->tmp_buffer = buffer_create_dynamic(pool, 1024*64);
 		
-		sieve_message_context_extension_set
-			(msgctx, &body_extension, (void *) ctx);
+		/* Register context */
+		sieve_message_context_extension_set(msgctx, &body_extension, (void *) ctx);
 	}
 	
 	return ctx;
@@ -265,14 +324,17 @@ bool ext_body_get_content
 	struct ext_body_message_context *ctx = ext_body_get_context(renv->msgctx);
 
 	T_BEGIN {
+		/* Fill the return_body_parts array */
 		if ( !ext_body_parts_add_missing
 			(renv->msgdata, ctx, content_types, decode_to_plain != 0) )
 			result = FALSE;
 	} T_END;
 	
+	/* Check status */
 	if ( !result ) return FALSE;
 
-	(void)array_append_space(&ctx->return_body_parts); /* NULL-terminate */
+	/* Return the array of body items */
+	(void) array_append_space(&ctx->return_body_parts); /* NULL-terminate */
 	*parts_r = array_idx_modifiable(&ctx->return_body_parts, 0);
 
 	return result;
diff --git a/src/lib-sieve/plugins/body/tst-body.c b/src/lib-sieve/plugins/body/tst-body.c
index b848cba7c..b74107da1 100644
--- a/src/lib-sieve/plugins/body/tst-body.c
+++ b/src/lib-sieve/plugins/body/tst-body.c
@@ -300,6 +300,8 @@ static int ext_body_operation_execute
 (const struct sieve_operation *op ATTR_UNUSED,
 	const struct sieve_runtime_env *renv, sieve_size_t *address)
 {
+	static const char * const _no_content_types[] = { "", NULL };
+	
 	int ret = SIEVE_EXEC_OK;
 	int opt_code = 0;
 	int mret;
@@ -308,7 +310,7 @@ static int ext_body_operation_execute
 	enum tst_body_transform transform;
 	struct sieve_coded_stringlist *key_list, *ctype_list = NULL;
 	struct sieve_match_context *mctx;
-	const char * const *content_types = { NULL };
+	const char * const *content_types = _no_content_types;
 	struct ext_body_part *body_parts;
 	bool matched;
 
@@ -335,7 +337,8 @@ static int ext_body_operation_execute
 			if ( transform == TST_BODY_TRANSFORM_CONTENT ) {				
 				if ( (ctype_list=sieve_opr_stringlist_read(renv, address)) 
 					== NULL ) {
-					sieve_runtime_trace_error(renv, "invalid body transform operand");
+					sieve_runtime_trace_error(renv, 
+						"invalid :content body transform operand");
 					return SIEVE_EXEC_BIN_CORRUPT;
 				}
 			}
@@ -360,7 +363,7 @@ static int ext_body_operation_execute
 	}
 
 	if ( !ext_body_get_content
-		(renv, content_types, transform == TST_BODY_TRANSFORM_RAW, &body_parts) ) {
+		(renv, content_types, transform != TST_BODY_TRANSFORM_RAW, &body_parts) ) {
 		return SIEVE_EXEC_FAILURE;
 	}
 
diff --git a/tests/extensions/body/basic.svtest b/tests/extensions/body/basic.svtest
new file mode 100644
index 000000000..786ab5b2a
--- /dev/null
+++ b/tests/extensions/body/basic.svtest
@@ -0,0 +1,38 @@
+require "vnd.dovecot.testsuite";
+
+require "body";
+
+test_set "message" text:
+From: stephan@rename-it.nl
+To: tss@iki.fi
+Subject: Test message.
+
+Test!
+
+.
+;
+
+test "The empty line" {
+
+	if not body :raw :is text:
+Test!
+
+.
+	{
+		test_fail "invalid message body extracted";
+	}
+
+	if body :raw :is text:
+
+Test!
+
+.
+	{
+		test_fail "invalid message body extracted";
+	}
+
+	if body :raw :is "Test"
+	{
+		test_fail "body test matches nonsense";
+	}
+}
-- 
GitLab