federation-api: allow multipart body without preceding CRLF on first boundary
RFC 2046[1] is somewhat ambiguous on whether or not it's valid to omit the preceding CRLF for the first boundary. The prose on page 19 suggests that it is not: > The boundary delimiter MUST occur at the beginning of a line, i.e., > following a CRLF, and the initial CRLF is considered to be attached > to the boundary delimiter line rather than part of the preceding > part. The boundary may be followed by zero or more characters of > linear whitespace. It is then terminated by either another CRLF and > the header fields for the next part, or by two CRLFs, in which case > there are no header fields for the next part. If no Content-Type > field is present it is assumed to be "message/rfc822" in a > "multipart/digest" and "text/plain" otherwise. > > NOTE: The CRLF preceding the boundary delimiter line is conceptually > attached to the boundary so that it is possible to have a part that > does not end with a CRLF (line break). Body parts that must be > considered to end with line breaks, therefore, must have two CRLFs > preceding the boundary delimiter line, the first of which is part of > the preceding body part, and the second of which is part of the > encapsulation boundary. But the BNF on page 22 suggests that it is, as long as there is no preamble: > dash-boundary := "--" boundary > ; boundary taken from the value of > ; boundary parameter of the > ; Content-Type field. > > multipart-body := [preamble CRLF] > dash-boundary transport-padding CRLF > body-part *encapsulation > close-delimiter transport-padding > [CRLF epilogue] Dendrite currently generates multipart responses without a preceding CRLF for the first boundary[2], which were rejected by the previous ruma parsing logic. [1]: https://datatracker.ietf.org/doc/html/rfc2046 [2]: https://github.com/matrix-org/dendrite/issues/3414
This commit is contained in:
parent
92a35381b5
commit
61f5150358
@ -172,12 +172,20 @@ fn try_from_multipart_mixed_response<T: AsRef<[u8]>>(
|
||||
let mut full_boundary = Vec::with_capacity(boundary.len() + 4);
|
||||
full_boundary.extend_from_slice(b"\r\n--");
|
||||
full_boundary.extend_from_slice(boundary);
|
||||
let full_boundary_no_crlf = full_boundary.strip_prefix(b"\r\n").unwrap();
|
||||
|
||||
let mut boundaries = memchr::memmem::find_iter(body, &full_boundary);
|
||||
|
||||
let metadata_start = boundaries.next().ok_or_else(|| {
|
||||
MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
|
||||
})? + full_boundary.len();
|
||||
let metadata_start = if body.starts_with(full_boundary_no_crlf) {
|
||||
// If there is no preamble before the first boundary, it may omit the
|
||||
// preceding CRLF.
|
||||
full_boundary_no_crlf.len()
|
||||
} else {
|
||||
boundaries.next().ok_or_else(|| MultipartMixedDeserializationError::MissingBodyParts {
|
||||
expected: 2,
|
||||
found: 0,
|
||||
})? + full_boundary.len()
|
||||
};
|
||||
let metadata_end = boundaries.next().ok_or_else(|| {
|
||||
MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
|
||||
})?;
|
||||
@ -411,6 +419,15 @@ mod tests {
|
||||
.unwrap();
|
||||
|
||||
try_from_multipart_mixed_response(response).unwrap_err();
|
||||
|
||||
// Boundary without CRLF with preamble.
|
||||
let body = "foo--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
|
||||
let response = http::Response::builder()
|
||||
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
|
||||
.body(body)
|
||||
.unwrap();
|
||||
|
||||
try_from_multipart_mixed_response(response).unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -477,6 +494,36 @@ mod tests {
|
||||
assert_eq!(file_content.content_type.unwrap(), "text/plain");
|
||||
assert_eq!(file_content.content_disposition, None);
|
||||
|
||||
// No leading CRLF (and no preamble)
|
||||
let body = "--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
|
||||
let response = http::Response::builder()
|
||||
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
|
||||
.body(body)
|
||||
.unwrap();
|
||||
|
||||
let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
|
||||
|
||||
assert_matches!(content, FileOrLocation::File(file_content));
|
||||
assert_eq!(file_content.file, b"some plain text");
|
||||
assert_eq!(file_content.content_type, None);
|
||||
assert_eq!(file_content.content_disposition, None);
|
||||
|
||||
// Boundary text in preamble, but no leading CRLF, so it should be
|
||||
// ignored.
|
||||
let body =
|
||||
"foo--abcdef\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
|
||||
let response = http::Response::builder()
|
||||
.header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
|
||||
.body(body)
|
||||
.unwrap();
|
||||
|
||||
let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
|
||||
|
||||
assert_matches!(content, FileOrLocation::File(file_content));
|
||||
assert_eq!(file_content.file, b"some plain text");
|
||||
assert_eq!(file_content.content_type, None);
|
||||
assert_eq!(file_content.content_disposition, None);
|
||||
|
||||
// No body part headers.
|
||||
let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
|
||||
let response = http::Response::builder()
|
||||
|
Loading…
x
Reference in New Issue
Block a user