3RNN/Lib/site-packages/werkzeug/sansio/multipart.py

from __future__ import annotations

import re
import typing as t
from dataclasses import dataclass
from enum import auto
from enum import Enum

from ..datastructures import Headers
from ..exceptions import RequestEntityTooLarge
from ..http import parse_options_header


class Event:
    pass


@dataclass(frozen=True)
class Preamble(Event):
    data: bytes


@dataclass(frozen=True)
class Field(Event):
    name: str
    headers: Headers


@dataclass(frozen=True)
class File(Event):
    name: str
    filename: str
    headers: Headers


@dataclass(frozen=True)
class Data(Event):
    data: bytes
    more_data: bool


@dataclass(frozen=True)
class Epilogue(Event):
    data: bytes


class NeedData(Event):
    pass


NEED_DATA = NeedData()


class State(Enum):
    PREAMBLE = auto()
    PART = auto()
    DATA = auto()
    DATA_START = auto()
    EPILOGUE = auto()
    COMPLETE = auto()


# Multipart line breaks MUST be CRLF (\r\n) by RFC-7578, except that
# many implementations break this and either use CR or LF alone.
LINE_BREAK = b"(?:\r\n|\n|\r)"
BLANK_LINE_RE = re.compile(b"(?:\r\n\r\n|\r\r|\n\n)", re.MULTILINE)
LINE_BREAK_RE = re.compile(LINE_BREAK, re.MULTILINE)
# Header values can be continued via a space or tab after the linebreak, as
# per RFC2231
HEADER_CONTINUATION_RE = re.compile(b"%s[ \t]" % LINE_BREAK, re.MULTILINE)
# This must be long enough to contain any line breaks plus any
# additional boundary markers (--) such that they will be found in a
# subsequent search
SEARCH_EXTRA_LENGTH = 8


class MultipartDecoder:
    """Decodes a multipart message as bytes into Python events.

    The part data is returned as available to allow the caller to save
    the data from memory to disk, if desired.
    """

    def __init__(
        self,
        boundary: bytes,
        max_form_memory_size: int | None = None,
        *,
        max_parts: int | None = None,
    ) -> None:
        self.buffer = bytearray()
        self.complete = False
        self.max_form_memory_size = max_form_memory_size
        self.max_parts = max_parts
        self.state = State.PREAMBLE
        self.boundary = boundary

        # Note in the below \h i.e. horizontal whitespace is used
        # as [^\S\n\r] as \h isn't supported in python.

        # The preamble must end with a boundary where the boundary is
        # prefixed by a line break, RFC2046. Except that many
        # implementations including Werkzeug's tests omit the line
        # break prefix. In addition the first boundary could be the
        # epilogue boundary (for empty form-data) hence the matching
        # group to understand if it is an epilogue boundary.
        self.preamble_re = re.compile(
            rb"%s?--%s(--[^\S\n\r]*%s?|[^\S\n\r]*%s)"
            % (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),
            re.MULTILINE,
        )
        # A boundary must include a line break prefix and suffix, and
        # may include trailing whitespace. In addition the boundary
        # could be the epilogue boundary hence the matching group to
        # understand if it is an epilogue boundary.
        self.boundary_re = re.compile(
            rb"%s--%s(--[^\S\n\r]*%s?|[^\S\n\r]*%s)"
            % (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),
            re.MULTILINE,
        )
        self._search_position = 0
        self._parts_decoded = 0

    def last_newline(self, data: bytes) -> int:
        try:
            last_nl = data.rindex(b"\n")
        except ValueError:
            last_nl = len(data)
        try:
            last_cr = data.rindex(b"\r")
        except ValueError:
            last_cr = len(data)

        return min(last_nl, last_cr)

    def receive_data(self, data: bytes | None) -> None:
        if data is None:
            self.complete = True
        elif (
            self.max_form_memory_size is not None
            and len(self.buffer) + len(data) > self.max_form_memory_size
        ):
            raise RequestEntityTooLarge()
        else:
            self.buffer.extend(data)

    def next_event(self) -> Event:
        event: Event = NEED_DATA

        if self.state == State.PREAMBLE:
            match = self.preamble_re.search(self.buffer, self._search_position)
            if match is not None:
                if match.group(1).startswith(b"--"):
                    self.state = State.EPILOGUE
                else:
                    self.state = State.PART
                data = bytes(self.buffer[: match.start()])
                del self.buffer[: match.end()]
                event = Preamble(data=data)
                self._search_position = 0
            else:
                # Update the search start position to be equal to the
                # current buffer length (already searched) minus a
                # safe buffer for part of the search target.
                self._search_position = max(
                    0, len(self.buffer) - len(self.boundary) - SEARCH_EXTRA_LENGTH
                )

        elif self.state == State.PART:
            match = BLANK_LINE_RE.search(self.buffer, self._search_position)
            if match is not None:
                headers = self._parse_headers(self.buffer[: match.start()])
                # The final header ends with a single CRLF, however a
                # blank line indicates the start of the
                # body. Therefore the end is after the first CRLF.
                headers_end = (match.start() + match.end()) // 2
                del self.buffer[:headers_end]

                if "content-disposition" not in headers:
                    raise ValueError("Missing Content-Disposition header")

                disposition, extra = parse_options_header(
                    headers["content-disposition"]
                )
                name = t.cast(str, extra.get("name"))
                filename = extra.get("filename")
                if filename is not None:
                    event = File(
                        filename=filename,
                        headers=headers,
                        name=name,
                    )
                else:
                    event = Field(
                        headers=headers,
                        name=name,
                    )
                self.state = State.DATA_START
                self._search_position = 0
                self._parts_decoded += 1

                if self.max_parts is not None and self._parts_decoded > self.max_parts:
                    raise RequestEntityTooLarge()
            else:
                # Update the search start position to be equal to the
                # current buffer length (already searched) minus a
                # safe buffer for part of the search target.
                self._search_position = max(0, len(self.buffer) - SEARCH_EXTRA_LENGTH)

        elif self.state == State.DATA_START:
            data, del_index, more_data = self._parse_data(self.buffer, start=True)
            del self.buffer[:del_index]
            event = Data(data=data, more_data=more_data)
            if more_data:
                self.state = State.DATA

        elif self.state == State.DATA:
            data, del_index, more_data = self._parse_data(self.buffer, start=False)
            del self.buffer[:del_index]
            if data or not more_data:
                event = Data(data=data, more_data=more_data)

        elif self.state == State.EPILOGUE and self.complete:
            event = Epilogue(data=bytes(self.buffer))
            del self.buffer[:]
            self.state = State.COMPLETE

        if self.complete and isinstance(event, NeedData):
            raise ValueError(f"Invalid form-data cannot parse beyond {self.state}")

        return event

    def _parse_headers(self, data: bytes) -> Headers:
        headers: list[tuple[str, str]] = []
        # Merge the continued headers into one line
        data = HEADER_CONTINUATION_RE.sub(b" ", data)
        # Now there is one header per line
        for line in data.splitlines():
            line = line.strip()

            if line != b"":
                name, _, value = line.decode().partition(":")
                headers.append((name.strip(), value.strip()))
        return Headers(headers)

    def _parse_data(self, data: bytes, *, start: bool) -> tuple[bytes, int, bool]:
        # Body parts must start with CRLF (or CR or LF)
        if start:
            match = LINE_BREAK_RE.match(data)
            data_start = t.cast(t.Match[bytes], match).end()
        else:
            data_start = 0

        boundary = b"--" + self.boundary

        if self.buffer.find(boundary) == -1:
            # No complete boundary in the buffer, but there may be
            # a partial boundary at the end. As the boundary
            # starts with either a nl or cr find the earliest and
            # return up to that as data.
            data_end = del_index = self.last_newline(data[data_start:]) + data_start
            # If amount of data after last newline is far from
            # possible length of partial boundary, we should
            # assume that there is no partial boundary in the buffer
            # and return all pending data.
            if (len(data) - data_end) > len(b"\n" + boundary):
                data_end = del_index = len(data)
            more_data = True
        else:
            match = self.boundary_re.search(data)
            if match is not None:
                if match.group(1).startswith(b"--"):
                    self.state = State.EPILOGUE
                else:
                    self.state = State.PART
                data_end = match.start()
                del_index = match.end()
            else:
                data_end = del_index = self.last_newline(data[data_start:]) + data_start
            more_data = match is None

        return bytes(data[data_start:data_end]), del_index, more_data


class MultipartEncoder:
    def __init__(self, boundary: bytes) -> None:
        self.boundary = boundary
        self.state = State.PREAMBLE

    def send_event(self, event: Event) -> bytes:
        if isinstance(event, Preamble) and self.state == State.PREAMBLE:
            self.state = State.PART
            return event.data
        elif isinstance(event, (Field, File)) and self.state in {
            State.PREAMBLE,
            State.PART,
            State.DATA,
        }:
            data = b"\r\n--" + self.boundary + b"\r\n"
            data += b'Content-Disposition: form-data; name="%s"' % event.name.encode()
            if isinstance(event, File):
                data += b'; filename="%s"' % event.filename.encode()
            data += b"\r\n"
            for name, value in t.cast(Field, event).headers:
                if name.lower() != "content-disposition":
                    data += f"{name}: {value}\r\n".encode()
            self.state = State.DATA_START
            return data
        elif isinstance(event, Data) and self.state == State.DATA_START:
            self.state = State.DATA
            if len(event.data) > 0:
                return b"\r\n" + event.data
            else:
                return event.data
        elif isinstance(event, Data) and self.state == State.DATA:
            return event.data
        elif isinstance(event, Epilogue):
            self.state = State.COMPLETE
            return b"\r\n--" + self.boundary + b"--\r\n" + event.data
        else:
            raise ValueError(f"Cannot generate {event} in state: {self.state}")
1.0 2024-05-26 19:49:15 +02:00			`from __future__ import annotations`

			`import re`
			`import typing as t`
			`from dataclasses import dataclass`
			`from enum import auto`
			`from enum import Enum`

			`from ..datastructures import Headers`
			`from ..exceptions import RequestEntityTooLarge`
			`from ..http import parse_options_header`


			`class Event:`
			`pass`


			`@dataclass(frozen=True)`
			`class Preamble(Event):`
			`data: bytes`


			`@dataclass(frozen=True)`
			`class Field(Event):`
			`name: str`
			`headers: Headers`


			`@dataclass(frozen=True)`
			`class File(Event):`
			`name: str`
			`filename: str`
			`headers: Headers`


			`@dataclass(frozen=True)`
			`class Data(Event):`
			`data: bytes`
			`more_data: bool`


			`@dataclass(frozen=True)`
			`class Epilogue(Event):`
			`data: bytes`


			`class NeedData(Event):`
			`pass`


			`NEED_DATA = NeedData()`


			`class State(Enum):`
			`PREAMBLE = auto()`
			`PART = auto()`
			`DATA = auto()`
			`DATA_START = auto()`
			`EPILOGUE = auto()`
			`COMPLETE = auto()`


			`# Multipart line breaks MUST be CRLF (\r\n) by RFC-7578, except that`
			`# many implementations break this and either use CR or LF alone.`
			`LINE_BREAK = b"(?:\r\n\|\n\|\r)"`
			`BLANK_LINE_RE = re.compile(b"(?:\r\n\r\n\|\r\r\|\n\n)", re.MULTILINE)`
			`LINE_BREAK_RE = re.compile(LINE_BREAK, re.MULTILINE)`
			`# Header values can be continued via a space or tab after the linebreak, as`
			`# per RFC2231`
			`HEADER_CONTINUATION_RE = re.compile(b"%s[ \t]" % LINE_BREAK, re.MULTILINE)`
			`# This must be long enough to contain any line breaks plus any`
			`# additional boundary markers (--) such that they will be found in a`
			`# subsequent search`
			`SEARCH_EXTRA_LENGTH = 8`


			`class MultipartDecoder:`
			`"""Decodes a multipart message as bytes into Python events.`

			`The part data is returned as available to allow the caller to save`
			`the data from memory to disk, if desired.`
			`"""`

			`def __init__(`
			`self,`
			`boundary: bytes,`
			`max_form_memory_size: int \| None = None,`
			`*,`
			`max_parts: int \| None = None,`
			`) -> None:`
			`self.buffer = bytearray()`
			`self.complete = False`
			`self.max_form_memory_size = max_form_memory_size`
			`self.max_parts = max_parts`
			`self.state = State.PREAMBLE`
			`self.boundary = boundary`

			`# Note in the below \h i.e. horizontal whitespace is used`
			`# as [^\S\n\r] as \h isn't supported in python.`

			`# The preamble must end with a boundary where the boundary is`
			`# prefixed by a line break, RFC2046. Except that many`
			`# implementations including Werkzeug's tests omit the line`
			`# break prefix. In addition the first boundary could be the`
			`# epilogue boundary (for empty form-data) hence the matching`
			`# group to understand if it is an epilogue boundary.`
			`self.preamble_re = re.compile(`
			`rb"%s?--%s(--[^\S\n\r]%s?\|[^\S\n\r]%s)"`
			`% (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),`
			`re.MULTILINE,`
			`)`
			`# A boundary must include a line break prefix and suffix, and`
			`# may include trailing whitespace. In addition the boundary`
			`# could be the epilogue boundary hence the matching group to`
			`# understand if it is an epilogue boundary.`
			`self.boundary_re = re.compile(`
			`rb"%s--%s(--[^\S\n\r]%s?\|[^\S\n\r]%s)"`
			`% (LINE_BREAK, re.escape(boundary), LINE_BREAK, LINE_BREAK),`
			`re.MULTILINE,`
			`)`
			`self._search_position = 0`
			`self._parts_decoded = 0`

			`def last_newline(self, data: bytes) -> int:`
			`try:`
			`last_nl = data.rindex(b"\n")`
			`except ValueError:`
			`last_nl = len(data)`
			`try:`
			`last_cr = data.rindex(b"\r")`
			`except ValueError:`
			`last_cr = len(data)`

			`return min(last_nl, last_cr)`

			`def receive_data(self, data: bytes \| None) -> None:`
			`if data is None:`
			`self.complete = True`
			`elif (`
			`self.max_form_memory_size is not None`
			`and len(self.buffer) + len(data) > self.max_form_memory_size`
			`):`
			`raise RequestEntityTooLarge()`
			`else:`
			`self.buffer.extend(data)`

			`def next_event(self) -> Event:`
			`event: Event = NEED_DATA`

			`if self.state == State.PREAMBLE:`
			`match = self.preamble_re.search(self.buffer, self._search_position)`
			`if match is not None:`
			`if match.group(1).startswith(b"--"):`
			`self.state = State.EPILOGUE`
			`else:`
			`self.state = State.PART`
			`data = bytes(self.buffer[: match.start()])`
			`del self.buffer[: match.end()]`
			`event = Preamble(data=data)`
			`self._search_position = 0`
			`else:`
			`# Update the search start position to be equal to the`
			`# current buffer length (already searched) minus a`
			`# safe buffer for part of the search target.`
			`self._search_position = max(`
			`0, len(self.buffer) - len(self.boundary) - SEARCH_EXTRA_LENGTH`
			`)`

			`elif self.state == State.PART:`
			`match = BLANK_LINE_RE.search(self.buffer, self._search_position)`
			`if match is not None:`
			`headers = self._parse_headers(self.buffer[: match.start()])`
			`# The final header ends with a single CRLF, however a`
			`# blank line indicates the start of the`
			`# body. Therefore the end is after the first CRLF.`
			`headers_end = (match.start() + match.end()) // 2`
			`del self.buffer[:headers_end]`

			`if "content-disposition" not in headers:`
			`raise ValueError("Missing Content-Disposition header")`

			`disposition, extra = parse_options_header(`
			`headers["content-disposition"]`
			`)`
			`name = t.cast(str, extra.get("name"))`
			`filename = extra.get("filename")`
			`if filename is not None:`
			`event = File(`
			`filename=filename,`
			`headers=headers,`
			`name=name,`
			`)`
			`else:`
			`event = Field(`
			`headers=headers,`
			`name=name,`
			`)`
			`self.state = State.DATA_START`
			`self._search_position = 0`
			`self._parts_decoded += 1`

			`if self.max_parts is not None and self._parts_decoded > self.max_parts:`
			`raise RequestEntityTooLarge()`
			`else:`
			`# Update the search start position to be equal to the`
			`# current buffer length (already searched) minus a`
			`# safe buffer for part of the search target.`
			`self._search_position = max(0, len(self.buffer) - SEARCH_EXTRA_LENGTH)`

			`elif self.state == State.DATA_START:`
			`data, del_index, more_data = self._parse_data(self.buffer, start=True)`
			`del self.buffer[:del_index]`
			`event = Data(data=data, more_data=more_data)`
			`if more_data:`
			`self.state = State.DATA`

			`elif self.state == State.DATA:`
			`data, del_index, more_data = self._parse_data(self.buffer, start=False)`
			`del self.buffer[:del_index]`
			`if data or not more_data:`
			`event = Data(data=data, more_data=more_data)`

			`elif self.state == State.EPILOGUE and self.complete:`
			`event = Epilogue(data=bytes(self.buffer))`
			`del self.buffer[:]`
			`self.state = State.COMPLETE`

			`if self.complete and isinstance(event, NeedData):`
			`raise ValueError(f"Invalid form-data cannot parse beyond {self.state}")`

			`return event`

			`def _parse_headers(self, data: bytes) -> Headers:`
			`headers: list[tuple[str, str]] = []`
			`# Merge the continued headers into one line`
			`data = HEADER_CONTINUATION_RE.sub(b" ", data)`
			`# Now there is one header per line`
			`for line in data.splitlines():`
			`line = line.strip()`

			`if line != b"":`
			`name, _, value = line.decode().partition(":")`
			`headers.append((name.strip(), value.strip()))`
			`return Headers(headers)`

			`def _parse_data(self, data: bytes, *, start: bool) -> tuple[bytes, int, bool]:`
			`# Body parts must start with CRLF (or CR or LF)`
			`if start:`
			`match = LINE_BREAK_RE.match(data)`
			`data_start = t.cast(t.Match[bytes], match).end()`
			`else:`
			`data_start = 0`

			`boundary = b"--" + self.boundary`

			`if self.buffer.find(boundary) == -1:`
			`# No complete boundary in the buffer, but there may be`
			`# a partial boundary at the end. As the boundary`
			`# starts with either a nl or cr find the earliest and`
			`# return up to that as data.`
			`data_end = del_index = self.last_newline(data[data_start:]) + data_start`
			`# If amount of data after last newline is far from`
			`# possible length of partial boundary, we should`
			`# assume that there is no partial boundary in the buffer`
			`# and return all pending data.`
			`if (len(data) - data_end) > len(b"\n" + boundary):`
			`data_end = del_index = len(data)`
			`more_data = True`
			`else:`
			`match = self.boundary_re.search(data)`
			`if match is not None:`
			`if match.group(1).startswith(b"--"):`
			`self.state = State.EPILOGUE`
			`else:`
			`self.state = State.PART`
			`data_end = match.start()`
			`del_index = match.end()`
			`else:`
			`data_end = del_index = self.last_newline(data[data_start:]) + data_start`
			`more_data = match is None`

			`return bytes(data[data_start:data_end]), del_index, more_data`


			`class MultipartEncoder:`
			`def __init__(self, boundary: bytes) -> None:`
			`self.boundary = boundary`
			`self.state = State.PREAMBLE`

			`def send_event(self, event: Event) -> bytes:`
			`if isinstance(event, Preamble) and self.state == State.PREAMBLE:`
			`self.state = State.PART`
			`return event.data`
			`elif isinstance(event, (Field, File)) and self.state in {`
			`State.PREAMBLE,`
			`State.PART,`
			`State.DATA,`
			`}:`
			`data = b"\r\n--" + self.boundary + b"\r\n"`
			`data += b'Content-Disposition: form-data; name="%s"' % event.name.encode()`
			`if isinstance(event, File):`
			`data += b'; filename="%s"' % event.filename.encode()`
			`data += b"\r\n"`
			`for name, value in t.cast(Field, event).headers:`
			`if name.lower() != "content-disposition":`
			`data += f"{name}: {value}\r\n".encode()`
			`self.state = State.DATA_START`
			`return data`
			`elif isinstance(event, Data) and self.state == State.DATA_START:`
			`self.state = State.DATA`
			`if len(event.data) > 0:`
			`return b"\r\n" + event.data`
			`else:`
			`return event.data`
			`elif isinstance(event, Data) and self.state == State.DATA:`
			`return event.data`
			`elif isinstance(event, Epilogue):`
			`self.state = State.COMPLETE`
			`return b"\r\n--" + self.boundary + b"--\r\n" + event.data`
			`else:`
			`raise ValueError(f"Cannot generate {event} in state: {self.state}")`