From b9339d5b4b3cb79c0597cc862936b93315167b16 Mon Sep 17 00:00:00 2001 From: Stephan Bosch <stephan@rename-it.nl> Date: Sun, 29 Nov 2015 11:48:21 +0100 Subject: [PATCH] lib-sieve: Reworked message body parsing to store the message parts in a tree structure. --- src/lib-sieve/plugins/body/ext-body-common.c | 6 +- src/lib-sieve/sieve-message.c | 190 ++++++++++--------- src/lib-sieve/sieve-message.h | 8 +- tests/extensions/body/content.svtest | 5 +- 4 files changed, 107 insertions(+), 102 deletions(-) diff --git a/src/lib-sieve/plugins/body/ext-body-common.c b/src/lib-sieve/plugins/body/ext-body-common.c index 7b8062d58..3d74c9ff3 100644 --- a/src/lib-sieve/plugins/body/ext-body-common.c +++ b/src/lib-sieve/plugins/body/ext-body-common.c @@ -30,8 +30,8 @@ static void ext_body_stringlist_reset struct ext_body_stringlist { struct sieve_stringlist strlist; - struct sieve_message_body_part *body_parts; - struct sieve_message_body_part *body_parts_iter; + struct sieve_message_part_data *body_parts; + struct sieve_message_part_data *body_parts_iter; }; int ext_body_get_part_list @@ -40,7 +40,7 @@ int ext_body_get_part_list { static const char * const _no_content_types[] = { "", NULL }; struct ext_body_stringlist *strlist; - struct sieve_message_body_part *body_parts = NULL; + struct sieve_message_part_data *body_parts = NULL; int ret; *strlist_r = NULL; diff --git a/src/lib-sieve/sieve-message.c b/src/lib-sieve/sieve-message.c index df7ebc06e..57ffd36ff 100644 --- a/src/lib-sieve/sieve-message.c +++ b/src/lib-sieve/sieve-message.c @@ -59,7 +59,9 @@ struct sieve_message_version { struct edit_mail *edit_mail; }; -struct sieve_message_body_part_cached { +struct sieve_message_part { + struct sieve_message_part *parent, *next, *children; + const char *content_type; const char *content_disposition; @@ -68,7 +70,8 @@ struct sieve_message_body_part_cached { size_t decoded_body_size; size_t text_body_size; - bool have_body; /* there's the empty end-of-headers line */ + unsigned int have_body:1; /* there's the empty end-of-headers line */ + unsigned int epilogue:1; /* this is a multipart epilogue */ }; struct sieve_message_context { @@ -101,8 +104,8 @@ struct sieve_message_context { /* Body */ - ARRAY(struct sieve_message_body_part_cached) cached_body_parts; - ARRAY(struct sieve_message_body_part) return_body_parts; + ARRAY(struct sieve_message_part *) cached_body_parts; + ARRAY(struct sieve_message_part_data) return_body_parts; buffer_t *raw_body; unsigned int edit_snapshot:1; @@ -842,33 +845,15 @@ static bool _is_wanted_content_type return FALSE; } -static bool _want_multipart_content_type -(const char * const *wanted_types) -{ - for (; *wanted_types != NULL; wanted_types++) { - if (**wanted_types == '\0') { - /* empty string matches everything */ - return TRUE; - } - - /* match only main type */ - if ( strncasecmp(*wanted_types, "multipart", 9) == 0 && - ( strlen(*wanted_types) == 9 || *(*wanted_types+9) == '/' ) ) - return TRUE; - } - - return FALSE; -} - static bool sieve_message_body_get_return_parts (const struct sieve_runtime_env *renv, const char * const *wanted_types, bool extract_text) { struct sieve_message_context *msgctx = renv->msgctx; - const struct sieve_message_body_part_cached *body_parts; + struct sieve_message_part *const *body_parts; unsigned int i, count; - struct sieve_message_body_part *return_part; + struct sieve_message_part_data *return_part; /* Check whether any body parts are cached already */ body_parts = array_get(&msgctx->cached_body_parts, &count); @@ -880,7 +865,7 @@ static bool sieve_message_body_get_return_parts /* Fill result array with requested content_types */ for (i = 0; i < count; i++) { - if (!body_parts[i].have_body) { + if (!body_parts[i]->have_body) { /* Part has no body; according to RFC this MUST not match to anything and * therefore it is not included in the result. */ @@ -888,37 +873,38 @@ static bool sieve_message_body_get_return_parts } /* Skip content types that are not requested */ - if (!_is_wanted_content_type(wanted_types, body_parts[i].content_type)) + if (!_is_wanted_content_type + (wanted_types, body_parts[i]->content_type)) continue; /* Add new item to the result */ return_part = array_append_space(&msgctx->return_body_parts); - return_part->content_type = body_parts[i].content_type; - return_part->content_disposition = body_parts[i].content_disposition; + return_part->content_type = body_parts[i]->content_type; + return_part->content_disposition = body_parts[i]->content_disposition; /* Depending on whether a decoded body part is requested, the appropriate * cache item is read. If it is missing, this function fails and the cache - * needs to be completed by sieve_message_body_parts_add_missing(). + * needs to be completed by sieve_message_parts_add_missing(). */ if (extract_text) { - if (body_parts[i].text_body == NULL) + if (body_parts[i]->text_body == NULL) return FALSE; - return_part->content = body_parts[i].text_body; - return_part->size = body_parts[i].text_body_size; + return_part->content = body_parts[i]->text_body; + return_part->size = body_parts[i]->text_body_size; } else { - if (body_parts[i].decoded_body == NULL) + if (body_parts[i]->decoded_body == NULL) return FALSE; - return_part->content = body_parts[i].decoded_body; - return_part->size = body_parts[i].decoded_body_size; + return_part->content = body_parts[i]->decoded_body; + return_part->size = body_parts[i]->decoded_body_size; } } return TRUE; } -static void sieve_message_body_part_save +static void sieve_message_part_save (const struct sieve_runtime_env *renv, buffer_t *buf, - struct sieve_message_body_part_cached *body_part, + struct sieve_message_part *body_part, bool extract_text) { struct sieve_message_context *msgctx = renv->msgctx; @@ -1017,26 +1003,28 @@ _parse_content_disposition(const struct message_header_line *hdr) return str_c(content_disp); } -/* sieve_message_body_parts_add_missing(): +/* sieve_message_parts_add_missing(): * Add requested message body parts to the cache that are missing. */ -static int sieve_message_body_parts_add_missing +static int sieve_message_parts_add_missing (const struct sieve_runtime_env *renv, - const char *const *content_types, bool extract_text) + const char *const *content_types, + bool extract_text) { struct sieve_message_context *msgctx = renv->msgctx; pool_t pool = msgctx->context_pool; struct mail *mail = sieve_message_get_mail(renv->msgctx); - struct sieve_message_body_part_cached *body_part = NULL, *header_part = NULL; + enum message_parser_flags mparser_flags = + MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS; + struct sieve_message_part *body_part, *header_part, *last_part; struct message_parser_ctx *parser; struct message_decoder_context *decoder; struct message_block block, decoded; - struct message_part *parts, *prev_part = NULL; - ARRAY(struct message_part *) part_index; + struct message_part *mparts, *prev_mpart = NULL; buffer_t *buf; struct istream *input; unsigned int idx = 0; - bool save_body = FALSE, want_multipart, have_all; + bool save_body = FALSE, have_all; int ret; /* First check whether any are missing */ @@ -1051,71 +1039,91 @@ static int sieve_message_body_parts_add_missing return sieve_runtime_mail_error(renv, mail, "failed to open input message"); } - if (mail_get_parts(mail, &parts) < 0) { + if (mail_get_parts(mail, &mparts) < 0) { return sieve_runtime_mail_error(renv, mail, "failed to parse input message parts"); } - if ( (want_multipart=_want_multipart_content_type(content_types)) ) { - t_array_init(&part_index, 8); - } - buf = buffer_create_dynamic(default_pool, 4096); + body_part = header_part = last_part = NULL; /* Initialize body decoder */ decoder = message_decoder_init(NULL, 0); - //parser = message_parser_init_from_parts(parts, input, 0, - //MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS); - parser = message_parser_init(pool_datastack_create(), input, 0, - MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS); + // FIXME: currently not tested with edit-mail. + //parser = message_parser_init_from_parts(parts, input, 0, + //mparser_flags); + parser = message_parser_init + (pool_datastack_create(), input, 0, mparser_flags); while ( (ret=message_parser_parse_next_block (parser, &block)) > 0 ) { + struct sieve_message_part **body_part_idx; - if ( block.part != prev_part ) { + if ( block.part != prev_mpart ) { bool message_rfc822 = FALSE; /* Save previous body part */ if ( body_part != NULL ) { /* Treat message/rfc822 separately; headers become content */ - if ( block.part->parent == prev_part && + if ( block.part->parent == prev_mpart && strcmp(body_part->content_type, "message/rfc822") == 0 ) { message_rfc822 = TRUE; } else { if ( save_body ) { - sieve_message_body_part_save + sieve_message_part_save (renv, buf, body_part, extract_text); } } } - /* Start processing next */ - body_part = array_idx_modifiable + /* Start processing next part */ + body_part_idx = array_idx_modifiable (&msgctx->cached_body_parts, idx); + if (*body_part_idx == NULL) + *body_part_idx = p_new(pool, struct sieve_message_part, 1); + body_part = *body_part_idx; body_part->content_type = "text/plain"; - /* Check whether this is the epilogue block of a wanted multipart part */ - if ( want_multipart ) { - array_idx_set(&part_index, idx, &block.part); - - if ( prev_part != NULL && prev_part->next != block.part && - block.part->parent != prev_part ) { - struct message_part *const *iparts; - unsigned int count, i; - - iparts = array_get(&part_index, &count); - for ( i = 0; i < count; i++ ) { - if ( iparts[i] == block.part ) { - const struct sieve_message_body_part_cached *parent = - array_idx(&msgctx->cached_body_parts, i); - body_part->content_type = parent->content_type; - body_part->have_body = TRUE; - save_body = _is_wanted_content_type - (content_types, body_part->content_type); - break; - } + /* Copy tree structure */ + if ( block.part->context != NULL ) { + struct sieve_message_part *epipart = + (struct sieve_message_part *)block.part->context; + i_assert(epipart != NULL); + + /* multipart epilogue */ + body_part->content_type = epipart->content_type; + body_part->have_body = TRUE; + body_part->epilogue = TRUE; + save_body = _is_wanted_content_type + (content_types, body_part->content_type); + + } else { + struct sieve_message_part *parent = NULL; + + if ( block.part->parent != NULL ) { + body_part->parent = parent = + (struct sieve_message_part *) + block.part->parent->context; + } + + /* new part */ + block.part->context = (void*)body_part; + + if ( last_part != NULL ) { + i_assert( parent != NULL ); + if ( last_part->parent == parent ) { + last_part->next = body_part; + } else if (parent->children == NULL) { + parent->children = body_part; + } else { + struct sieve_message_part *child = parent->children; + while (child->next != NULL && child != body_part) + child = child->next; + if (child->next == NULL) + child->next = body_part; } } + last_part = body_part; } /* If this is message/rfc822 content, retain the enveloping part for @@ -1123,13 +1131,13 @@ static int sieve_message_body_parts_add_missing */ if ( message_rfc822 ) { i_assert(idx > 0); - header_part = array_idx_modifiable + header_part = *array_idx (&msgctx->cached_body_parts, idx-1); } else { header_part = NULL; } - prev_part = block.part; + prev_mpart = block.part; idx++; } @@ -1146,7 +1154,7 @@ static int sieve_message_body_parts_add_missing if ( block.hdr == NULL ) { /* Save headers for message/rfc822 part */ if ( header_part != NULL ) { - sieve_message_body_part_save + sieve_message_part_save (renv, buf, header_part, extract_text); header_part = NULL; } @@ -1218,10 +1226,10 @@ static int sieve_message_body_parts_add_missing /* Save last body part if necessary */ if ( header_part != NULL ) { - sieve_message_body_part_save + sieve_message_part_save (renv, buf, header_part, FALSE); } else if ( body_part != NULL && save_body ) { - sieve_message_body_part_save + sieve_message_part_save (renv, buf, body_part, extract_text); } @@ -1233,7 +1241,7 @@ static int sieve_message_body_parts_add_missing i_assert(have_all); /* Cleanup */ - (void)message_parser_deinit(&parser, &parts); + (void)message_parser_deinit(&parser, &mparts); message_decoder_deinit(&decoder); buffer_free(&buf); @@ -1251,14 +1259,14 @@ static int sieve_message_body_parts_add_missing int sieve_message_body_get_content (const struct sieve_runtime_env *renv, const char * const *content_types, - struct sieve_message_body_part **parts_r) + struct sieve_message_part_data **parts_r) { struct sieve_message_context *msgctx = renv->msgctx; int status; T_BEGIN { /* Fill the return_body_parts array */ - status = sieve_message_body_parts_add_missing + status = sieve_message_parts_add_missing (renv, content_types, FALSE); } T_END; @@ -1275,7 +1283,7 @@ int sieve_message_body_get_content int sieve_message_body_get_text (const struct sieve_runtime_env *renv, - struct sieve_message_body_part **parts_r) + struct sieve_message_part_data **parts_r) { static const char * const _text_content_types[] = { "application/xhtml+xml", "text", NULL }; @@ -1292,7 +1300,7 @@ int sieve_message_body_get_text T_BEGIN { /* Fill the return_body_parts array */ - status = sieve_message_body_parts_add_missing + status = sieve_message_parts_add_missing (renv, _text_content_types, TRUE); } T_END; @@ -1309,10 +1317,10 @@ int sieve_message_body_get_text int sieve_message_body_get_raw (const struct sieve_runtime_env *renv, - struct sieve_message_body_part **parts_r) + struct sieve_message_part_data **parts_r) { struct sieve_message_context *msgctx = renv->msgctx; - struct sieve_message_body_part *return_part; + struct sieve_message_part_data *return_part; buffer_t *buf; if ( msgctx->raw_body == NULL ) { @@ -1373,5 +1381,3 @@ int sieve_message_body_get_raw return SIEVE_EXEC_OK; } - - diff --git a/src/lib-sieve/sieve-message.h b/src/lib-sieve/sieve-message.h index 54c05e89a..511b53b2e 100644 --- a/src/lib-sieve/sieve-message.h +++ b/src/lib-sieve/sieve-message.h @@ -172,7 +172,7 @@ int sieve_message_get_header_fields * Message body */ -struct sieve_message_body_part { +struct sieve_message_part_data { const char *content_type; const char *content_disposition; @@ -183,13 +183,13 @@ struct sieve_message_body_part { int sieve_message_body_get_content (const struct sieve_runtime_env *renv, const char * const *content_types, - struct sieve_message_body_part **parts_r); + struct sieve_message_part_data **parts_r); int sieve_message_body_get_text (const struct sieve_runtime_env *renv, - struct sieve_message_body_part **parts_r); + struct sieve_message_part_data **parts_r); int sieve_message_body_get_raw (const struct sieve_runtime_env *renv, - struct sieve_message_body_part **parts_r); + struct sieve_message_part_data **parts_r); #endif /* __SIEVE_MESSAGE_H */ diff --git a/tests/extensions/body/content.svtest b/tests/extensions/body/content.svtest index b82da9341..2eb383775 100644 --- a/tests/extensions/body/content.svtest +++ b/tests/extensions/body/content.svtest @@ -283,8 +283,6 @@ test "Multipart Content" { test_fail "missed second multipart body part"; } -/* FIXME: FAILS - if not body :content "multipart" :contains "This is the end of the inner MIME multipart" { test_fail "missed third multipart body part"; @@ -302,7 +300,6 @@ test "Multipart Content" { if body :content "multipart" :contains "--outer" { test_fail "outer boundary is part of match"; } -*/ } /* RFC5173, Section 5.2: @@ -331,3 +328,5 @@ Subject: Hello, this is an elaborate request for you to finally say hello } + + -- GitLab