Source code for pulsar.utils.httpurl

'''This is a substantial module which imports several classes and functions
from the standard library in a python 2.6 to python 3.3 compatible fashion.
On top of that, it implements the :class:`HttpClient` for handling synchronous
and asynchronous HTTP requests in a pythonic way.

It is a thin layer on top of urllib2 in python2 / urllib in Python 3.
Several opensource efforts have been used as source of snippets:

* http-parser_
* request_
* urllib3_
* werkzeug_


.. _tools-http-headers:

HTTP Headers
~~~~~~~~~~~~~~~~~

.. autoclass:: Headers
   :members:
   :member-order: bysource


.. _tools-http-parser:

HTTP Parser
~~~~~~~~~~~~~~~~~

.. autoclass:: HttpParser
   :members:
   :member-order: bysource


.. _http-parser: https://github.com/benoitc/http-parser
.. _urllib3: https://github.com/shazow/urllib3
.. _request: https://github.com/kennethreitz/requests
.. _werkzeug: https://github.com/mitsuhiko/werkzeug
.. _`HTTP cookie`: http://en.wikipedia.org/wiki/HTTP_cookie
'''
import os
import sys
import re
import string
import mimetypes
from hashlib import sha1, md5
from uuid import uuid4
from email.utils import formatdate
from io import BytesIO
import zlib
from collections import deque, OrderedDict
from urllib import request as urllibr
from http import client as httpclient
from urllib.parse import quote, urlsplit, splitport
from http.cookiejar import CookieJar, Cookie
from http.cookies import SimpleCookie

from .structures import mapping_iterator
from .string import to_bytes, to_string
from .html import capfirst
#
# The http_parser has several bugs, therefore it is switched off
hasextensions = False
CHttpParser = None
try:
    from http_parser.parser import HttpParser as CHttpParser

    hasextensions = True
except ImportError:
    pass

_Http_Parser = CHttpParser


def setDefaultHttpParser(parser):   # pragma    nocover
    global _Http_Parser
    _Http_Parser = parser


def http_parser(**kwargs):
    global _Http_Parser
    return _Http_Parser(**kwargs)


getproxies_environment = urllibr.getproxies_environment
ascii_letters = string.ascii_letters
HTTPError = urllibr.HTTPError
URLError = urllibr.URLError
parse_http_list = urllibr.parse_http_list


# ###################################################    URI & IRI SUFF
#
# The reserved URI characters (RFC 3986 - section 2.2)
# Default is charset is "iso-8859-1" (latin-1) from section 3.7.1
# http://www.ietf.org/rfc/rfc2616.txt
DEFAULT_CHARSET = 'ISO-8859-1'
URI_GEN_DELIMS = frozenset(':/?#[]@')
URI_SUB_DELIMS = frozenset("!$&'()*+,;=")
URI_RESERVED_SET = URI_GEN_DELIMS.union(URI_SUB_DELIMS)
URI_RESERVED_CHARS = ''.join(URI_RESERVED_SET)
# The unreserved URI characters (RFC 3986 - section 2.3)
URI_UNRESERVED_SET = frozenset(ascii_letters + string.digits + '-._~')
URI_SAFE_CHARS = URI_RESERVED_CHARS + '%~'
HEADER_TOKEN_CHARS = frozenset("!#$%&'*+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                               '^_`abcdefghijklmnopqrstuvwxyz|~')
MAX_CHUNK_SIZE = 65536

# ###################################################    CONTENT TYPES
JSON_CONTENT_TYPES = ('application/json',
                      'application/javascript',
                      'text/json',
                      'text/x-json')
# ###################################################    REQUEST METHODS
ENCODE_URL_METHODS = frozenset(['DELETE', 'GET', 'HEAD', 'OPTIONS'])
ENCODE_BODY_METHODS = frozenset(['PATCH', 'POST', 'PUT', 'TRACE'])
REDIRECT_CODES = (301, 302, 303, 305, 307)
NO_CONTENT_CODES = frozenset((204, 304))

CRLF = '\r\n'
LWS = '\r\n '


def escape(s):
    return quote(s, safe='~')


def urlquote(iri):
    return quote(iri, safe=URI_RESERVED_CHARS)


def _gen_unquote(uri):
    unreserved_set = URI_UNRESERVED_SET
    for n, part in enumerate(to_string(uri, 'latin1').split('%')):
        if not n:
            yield part
        else:
            h = part[0:2]
            if len(h) == 2:
                c = chr(int(h, 16))
                if c in unreserved_set:
                    yield c + part[2:]
                else:
                    yield '%' + part
            else:
                yield '%' + part


def unquote_unreserved(uri):
    """Un-escape any percent-escape sequences in a URI that are unreserved
characters. This leaves all reserved, illegal and non-ASCII bytes encoded."""
    return ''.join(_gen_unquote(uri))


def requote_uri(uri):
    """Re-quote the given URI.

    This function passes the given URI through an unquote/quote cycle to
    ensure that it is fully and consistently quoted.
    """
    # Unquote only the unreserved characters
    # Then quote only illegal characters (do not quote reserved, unreserved,
    # or '%')
    return quote(unquote_unreserved(uri), safe=URI_SAFE_CHARS)


def iri_to_uri(iri, kwargs=None):
    '''Convert an Internationalised Resource Identifier (IRI) portion
    to a URI portion that is suitable for inclusion in a URL.
    This is the algorithm from section 3.1 of RFC 3987.
    Returns an ASCII native string containing the encoded result.
    '''
    if iri is None:
        return iri
    if kwargs:
        iri = '%s?%s' % (to_string(iri, 'latin1'),
                         '&'.join(('%s=%s' % kv for kv in kwargs.items())))
    return urlquote(unquote_unreserved(iri))


def host_and_port(host):
    host, port = splitport(host)
    return host, int(port) if port else None


def default_port(scheme):
    if scheme in ("http", "ws"):
        return '80'
    elif scheme in ("https", "wss"):
        return '443'


def host_and_port_default(scheme, host):
    host, port = splitport(host)
    if not port:
        port = default_port(scheme)
    return host, port


def host_no_default_port(scheme, netloc):
    host, port = splitport(netloc)
    if port and port == default_port(scheme):
        return host
    else:
        return netloc


def get_hostport(scheme, full_host):
    host, port = host_and_port(full_host)
    if port is None:
        i = host.rfind(':')
        j = host.rfind(']')         # ipv6 addresses have [...]
        if i > j:
            try:
                port = int(host[i+1:])
            except ValueError:
                if host[i+1:] == "":  # http://foo.com:/ == http://foo.com/
                    port = default_port(scheme)
                else:
                    raise httpclient.InvalidURL("nonnumeric port: '%s'"
                                                % host[i+1:])
            host = host[:i]
        else:
            port = default_port(scheme)
        if host and host[0] == '[' and host[-1] == ']':
            host = host[1:-1]
    return host, int(port)


def remove_double_slash(route):
    if '//' in route:
        route = re.sub('/+', '/', route)
    return route


def has_empty_content(status, method=None):
    """204, 304 and 1xx codes have no content, same for HEAD requests"""
    return (status in NO_CONTENT_CODES or
            100 <= status < 200 or
            method == "HEAD")


def is_succesful(status):
    '''2xx status is succesful'''
    return status >= 200 and status < 300


def capheader(name):
    name = name.replace('_', '-')
    return '-'.join((b for b in (capfirst(n) for n in name.split('-')) if b))


def header_field(name):
    """Return a header `name` in Camel case.

    For example::

        header_field('connection') == 'Connection'
        header_field('accept-charset') == 'Accept-Charset'

    If ``header_set`` is given, only return headers included in the set.
    """
    return capheader(name.lower())


#    HEADERS UTILITIES
HEADER_FIELDS_JOINER = {'Cookie': '; ',
                        'Set-Cookie': None,
                        'Set-Cookie2': None}


def split_comma(value):
    return [v for v in (v.strip() for v in value.split(',')) if v]


def parse_cookies(value):
    return [c.OutputString() for c in SimpleCookie(value).values()]


header_parsers = {'Connection': split_comma,
                  'Cookie': parse_cookies}


def header_values(header, value):
    assert isinstance(value, str)
    if header in header_parsers:
        return header_parsers[header](value)
    else:
        return [value]


def quote_header_value(value, extra_chars='', allow_token=True):
    """Quote a header value if necessary.

    :param value: the value to quote.
    :param extra_chars: a list of extra characters to skip quoting.
    :param allow_token: if this is enabled token values are returned
        unchanged.
    """
    value = to_string(value)
    if allow_token:
        token_chars = HEADER_TOKEN_CHARS | set(extra_chars)
        if set(value).issubset(token_chars):
            return value
    return '"%s"' % value.replace('\\', '\\\\').replace('"', '\\"')


def unquote_header_value(value, is_filename=False):
    """Unquotes a header value.

    Reversal of :func:`quote_header_value`. This does not use the real
    un-quoting but what browsers are actually using for quoting.

    :param value: the header value to unquote.
    """
    if value and value[0] == value[-1] == '"':
        # this is not the real unquoting, but fixing this so that the
        # RFC is met will result in bugs with internet explorer and
        # probably some other browsers as well.  IE for example is
        # uploading files with "C:\foo\bar.txt" as filename
        value = value[1:-1]
        # if this is a filename and the starting characters look like
        # a UNC path, then just return the value without quotes.  Using the
        # replace sequence below on a UNC path has the effect of turning
        # the leading double slash into a single slash and then
        # _fix_ie_filename() doesn't work correctly.  See #458.
        if not is_filename or value[:2] != '\\\\':
            return value.replace('\\\\', '\\').replace('\\"', '"')
    return value


def parse_dict_header(value):
    """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
    convert them into a python dict:

    >>> d = parse_dict_header('foo="is a fish", bar="as well"')
    >>> type(d) is dict
    True
    >>> sorted(d.items())
    [('bar', 'as well'), ('foo', 'is a fish')]

    If there is no value for a key it will be `None`:

    >>> parse_dict_header('key_without_value')
    {'key_without_value': None}

    To create a header from the :class:`dict` again, use the
    :func:`dump_header` function.

    :param value: a string with a dict header.
    :return: :class:`dict`
    """
    result = {}
    for item in parse_http_list(value):
        if '=' not in item:
            result[item] = None
            continue
        name, value = item.split('=', 1)
        if value[:1] == value[-1:] == '"':
            value = unquote_header_value(value[1:-1])
        result[name] = value
    return result


_special = re.escape('()<>@,;:\\"/[]?={} \t')
_re_special = re.compile('[%s]' % _special)
_qstr = '"(?:\\\\.|[^"])*"'  # Quoted string
_value = '(?:[^%s]+|%s)' % (_special, _qstr)  # Save or quoted string
_option = '(?:;|^)\s*([^%s]+)\s*=\s*(%s)' % (_special, _value)
_re_option = re.compile(_option)  # key=value part of an Content-Type header


def header_unquote(val, filename=False):
    if val[0] == val[-1] == '"':
        val = val[1:-1]
        if val[1:3] == ':\\' or val[:2] == '\\\\':
            val = val.split('\\')[-1]  # fix ie6 bug: full path --> filename
        return val.replace('\\\\', '\\').replace('\\"', '"')
    return val


def parse_options_header(header, options=None):
    if ';' not in header:
        return header.lower().strip(), {}
    ctype, tail = header.split(';', 1)
    options = options or {}
    for match in _re_option.finditer(tail):
        key = match.group(1).lower()
        value = header_unquote(match.group(2), key == 'filename')
        options[key] = value
    return ctype, options


[docs]class Headers:
    '''Utility for managing HTTP headers for both clients and servers.

    It has a dictionary like interface with few extra functions to facilitate
    the insertion of multiple header values. Header fields are
    **case insensitive**, therefore doing::

        >>> h = Headers()
        >>> h['Content-Length'] = '1050'

    is equivalent to

        >>> h['content-length'] = '1050'

    :param headers: optional iterable over header field/value pairs.
    :param kind: optional headers type, one of ``server``, ``client`` or
        ``both``.
    :param strict: if ``True`` only valid headers field will be included.

    This :class:`Headers` container maintains an ordering as suggested by
    http://www.w3.org/Protocols/rfc2616/rfc2616.html:

    .. epigraph::

        The order in which header fields with differing field names are
        received is not significant. However, it is "good practice" to send
        general-header fields first, followed by request-header or
        response-header fields, and ending with the entity-header fields.

        -- rfc2616 section 4.2

    The strict parameter is rarely used and it forces the omission on
    non-standard header fields.
    '''
    @classmethod
    def make(cls, headers):
        if not isinstance(headers, cls):
            headers = cls(headers)
        return headers

    def __init__(self, *args, **kwargs):
        self._headers = OrderedDict()
        if args or kwargs:
            self.update(*args, **kwargs)

    def __repr__(self):
        return self._headers.__repr__()

    def __str__(self):
        return '\r\n'.join(self._ordered())

    def __bytes__(self):
        return str(self).encode(DEFAULT_CHARSET)

    def __len__(self):
        return len(self._headers)

[docs]    def update(self, *args, **kwargs):
        """Extend the headers with an ``iterable``.

        :param iterable: a dictionary or an iterable over keys, values tuples.
        """
        if len(args) == 1:
            for key, value in mapping_iterator(args[0]):
                self.add_header(key, value)
        elif args:
            raise TypeError('update expected at most 1 arguments, got %d' %
                            len(args))
        for key, value in kwargs.items():
            self.add_header(key, value)

[docs]    def override(self, iterable):
        '''Extend headers by overriding fields form iterable.

        :param iterable: a dictionary or an iterable over keys, values tuples.
        '''
        seen = set()
        for key, value in mapping_iterator(iterable):
            key = key.lower()
            if key in seen:
                self.add_header(key, value)
            else:
                seen.add(key)
                self[key] = value

    def copy(self):
        return self.__class__(self)

    def __contains__(self, key):
        return header_field(key) in self._headers

    def __getitem__(self, key):
        key = header_field(key)
        values = self._headers[key]
        joiner = HEADER_FIELDS_JOINER.get(key, ', ')
        if joiner is None:
            joiner = '; '
        return joiner.join(values)

    def __delitem__(self, key):
        self._headers.__delitem__(header_field(key))

    def __setitem__(self, key, value):
        key = header_field(key)
        if key and value:
            if not isinstance(value, list):
                value = header_values(key, value)
            self._headers[key] = value

[docs]    def get(self, key, default=None):
        '''Get the field value at ``key`` as comma separated values.

        For example::

            >>> from pulsar.utils.httpurl import Headers
            >>> h = Headers(kind='client')
            >>> h.add_header('accept-encoding', 'gzip')
            >>> h.add_header('accept-encoding', 'deflate')
            >>> h.get('accept-encoding')

        results in::

            'gzip, deflate'
        '''
        if key in self:
            return self.__getitem__(key)
        else:
            return default

[docs]    def get_all(self, key, default=None):
        '''Get the values at header ``key`` as a list rather than a
        string separated by comma (which is returned by the
        :meth:`get` method).

        For example::

            >>> from pulsar.utils.httpurl import Headers
            >>> h = Headers(kind='client')
            >>> h.add_header('accept-encoding', 'gzip')
            >>> h.add_header('accept-encoding', 'deflate')
            >>> h.get_all('accept-encoding')

        results in::

            ['gzip', 'deflate']
        '''
        return self._headers.get(header_field(key), default)

[docs]    def has(self, field, value):
        '''Check if ``value`` is available in header ``field``.'''
        value = value.lower()
        for c in self.get_all(field, ()):
            if c.lower() == value:
                return True
        return False

    def pop(self, key, *args):
        return self._headers.pop(header_field(key), *args)

[docs]    def clear(self):
        '''Same as :meth:`dict.clear`, it removes all headers.
        '''
        self._headers.clear()

[docs]    def getheaders(self, key):  # pragma    nocover
        '''Required by cookielib in python 2.

        If the key is not available, it returns an empty list.
        '''
        return self._headers.get(header_field(key), [])

[docs]    def add_header(self, key, values):
        '''Add ``values`` to ``key`` header.

        If the header is already available, append the value to the list.

        :param key: header name
        :param values: a string value or a list/tuple of strings values
            for header ``key``
        '''
        key = header_field(key)
        if key and values:
            if not isinstance(values, (tuple, list)):
                values = header_values(key, values)
            current = self._headers.get(key, [])
            for value in values:
                if value and value not in current:
                    current.append(value)
            self._headers[key] = current

[docs]    def remove_header(self, key, value=None):
        '''Remove the header at ``key``.

        If ``value`` is provided, it removes only that value if found.
        '''
        key = header_field(key)
        if key:
            if value:
                value = value.lower()
                values = self._headers.get(key, [])
                removed = None
                for v in values:
                    if v.lower() == value:
                        removed = v
                        values.remove(v)
                self._headers[key] = values
                return removed
            else:
                return self._headers.pop(key, None)

[docs]    def flat(self, version, status):
        '''Full headers bytes representation'''
        vs = version + (status, self)
        return ('HTTP/%s.%s %s\r\n%s' % vs).encode(DEFAULT_CHARSET)

    def __iter__(self):
        dj = ', '
        for k, values in self._headers.items():
            joiner = HEADER_FIELDS_JOINER.get(k, dj)
            if joiner:
                yield k, joiner.join(values)
            else:
                for value in values:
                    yield k, value

    def _ordered(self):
        for key, header in self:
            yield "%s: %s" % (key, header)
        yield ''
        yield ''


###############################################################################
#    HTTP PARSER
###############################################################################
METHOD_RE = re.compile("[A-Z0-9$-_.]{3,20}")
VERSION_RE = re.compile("HTTP/(\d+).(\d+)")
STATUS_RE = re.compile("(\d{3})\s*(\w*)")
HEADER_RE = re.compile("[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]")

# errors
BAD_FIRST_LINE = 0
INVALID_HEADER = 1
INVALID_CHUNK = 2


class InvalidRequestLine(Exception):
    """ error raised when first line is invalid """


class InvalidHeader(Exception):
    """ error raised on invalid header """


class InvalidChunkSize(Exception):
    """ error raised when we parse an invalid chunk size """


[docs]class HttpParser:
    '''A python HTTP parser.

    Original code from https://github.com/benoitc/http-parser

    2011 (c) Benoit Chesneau <benoitc@e-engura.org>
    '''
    def __init__(self, kind=2, decompress=False):
        self.decompress = decompress
        # errors vars
        self.errno = None
        self.errstr = ""
        # protected variables
        self._buf = []
        self._version = None
        self._method = None
        self._status_code = None
        self._status = None
        self._reason = None
        self._url = None
        self._path = None
        self._query_string = None
        self._kind = kind
        self._fragment = None
        self._headers = Headers()
        self._chunked = False
        self._body = []
        self._trailers = None
        self._partial_body = False
        self._clen = None
        self._clen_rest = None
        # private events
        self.__on_firstline = False
        self.__on_headers_complete = False
        self.__on_message_begin = False
        self.__on_message_complete = False
        # decompress
        self.__decompress_obj = None
        self.__decompress_first_try = True

    @property
    def kind(self):
        return self._kind

    def get_version(self):
        return self._version

    def get_method(self):
        return self._method

    def get_status_code(self):
        return self._status_code

    def get_url(self):
        return self._url

    def get_path(self):
        return self._path

    def get_query_string(self):
        return self._query_string

    def get_fragment(self):
        return self._fragment

    def get_headers(self):
        return self._headers

[docs]    def recv_body(self):
        """ return last chunk of the parsed body"""
        body = b''.join(self._body)
        self._body = []
        self._partial_body = False
        return body

[docs]    def is_headers_complete(self):
        """ return True if all headers have been parsed. """
        return self.__on_headers_complete

[docs]    def is_partial_body(self):
        """ return True if a chunk of body have been parsed """
        return self._partial_body

[docs]    def is_message_begin(self):
        """ return True if the parsing start """
        return self.__on_message_begin

[docs]    def is_message_complete(self):
        """ return True if the parsing is done (we get EOF) """
        return self.__on_message_complete

[docs]    def is_chunked(self):
        """ return True if Transfer-Encoding header value is chunked"""
        return self._chunked

    def execute(self, data, length):
        # end of body can be passed manually by putting a length of 0
        if length == 0:
            self.__on_message_complete = True
            return length
        #
        data = bytes(data)
        # start to parse
        nb_parsed = 0
        while True:
            if not self.__on_firstline:
                idx = data.find(b'\r\n')
                if idx < 0:
                    self._buf.append(data)
                    return len(data)
                else:
                    self.__on_firstline = True
                    self._buf.append(data[:idx])
                    first_line = to_string(b''.join(self._buf),
                                           DEFAULT_CHARSET)
                    rest = data[idx+2:]
                    data = b''
                    if self._parse_firstline(first_line):
                        nb_parsed = nb_parsed + idx + 2
                        self._buf = [rest]
                    else:
                        return nb_parsed
            elif not self.__on_headers_complete:
                if data:
                    self._buf.append(data)
                    data = b''
                try:
                    to_parse = b''.join(self._buf)
                    ret = self._parse_headers(to_parse)
                    if ret is False:
                        return length
                    nb_parsed = nb_parsed + (len(to_parse) - ret)
                except InvalidHeader as e:
                    self.errno = INVALID_HEADER
                    self.errstr = str(e)
                    return nb_parsed
            elif not self.__on_message_complete:
                self.__on_message_begin = True
                if data:
                    self._buf.append(data)
                    data = b''
                ret = self._parse_body()
                if ret is None:
                    return length
                elif ret < 0:
                    return ret
                elif ret == 0:
                    self.__on_message_complete = True
                    return length
                else:
                    nb_parsed = max(length, ret)
            else:
                return 0

    def _parse_firstline(self, line):
        try:
            if self.kind == 2:  # auto detect
                try:
                    self._parse_request_line(line)
                except InvalidRequestLine:
                    self._parse_response_line(line)
            elif self.kind == 1:
                self._parse_response_line(line)
            elif self.kind == 0:
                self._parse_request_line(line)
        except InvalidRequestLine as e:
            self.errno = BAD_FIRST_LINE
            self.errstr = str(e)
            return False
        return True

    def _parse_response_line(self, line):
        bits = line.split(None, 1)
        if len(bits) != 2:
            raise InvalidRequestLine(line)

        # version
        matchv = VERSION_RE.match(bits[0])
        if matchv is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[0])
        self._version = (int(matchv.group(1)), int(matchv.group(2)))

        # status
        matchs = STATUS_RE.match(bits[1])
        if matchs is None:
            raise InvalidRequestLine("Invalid status %" % bits[1])

        self._status = bits[1]
        self._status_code = int(matchs.group(1))
        self._reason = matchs.group(2)

    def _parse_request_line(self, line):
        bits = line.split(None, 2)
        if len(bits) != 3:
            raise InvalidRequestLine(line)
        # Method
        if not METHOD_RE.match(bits[0]):
            raise InvalidRequestLine("invalid Method: %s" % bits[0])
        self._method = bits[0].upper()
        # URI
        self._url = bits[1]
        parts = urlsplit('http://dummy.com%s' % bits[1])
        self._path = parts.path or ""
        self._query_string = parts.query or ""
        self._fragment = parts.fragment or ""
        # Version
        match = VERSION_RE.match(bits[2])
        if match is None:
            raise InvalidRequestLine("Invalid HTTP version: %s" % bits[2])
        self._version = (int(match.group(1)), int(match.group(2)))

    def _parse_headers(self, data):
        if data == b'\r\n':
            self.__on_headers_complete = True
            self._buf = []
            return 0
        idx = data.find(b'\r\n\r\n')
        if idx < 0:  # we don't have all headers
            return False
        chunk = to_string(data[:idx], DEFAULT_CHARSET)
        # Split lines on \r\n keeping the \r\n on each line
        lines = deque(('%s\r\n' % line for line in chunk.split('\r\n')))
        # Parse headers into key/value pairs paying attention
        # to continuation lines.
        while len(lines):
            # Parse initial header name : value pair.
            curr = lines.popleft()
            if curr.find(":") < 0:
                continue
            name, value = curr.split(":", 1)
            name = name.rstrip(" \t").upper()
            if HEADER_RE.search(name):
                raise InvalidHeader("invalid header name %s" % name)
            name, value = header_field(name.strip()), [value.lstrip()]
            # Consume value continuation lines
            while len(lines) and lines[0].startswith((" ", "\t")):
                value.append(lines.popleft())
            value = ''.join(value).rstrip()
            self._headers.add_header(name, value)
        # detect now if body is sent by chunks.
        clen = self._headers.get('Content-Length')
        if 'Transfer-Encoding' in self._headers:
            te = self._headers['Transfer-Encoding'].lower()
            self._chunked = (te == 'chunked')
        else:
            self._chunked = False
        #
        status = self._status_code
        if status and has_empty_content(status, self._method):
            clen = 0
        elif clen is not None:
            try:
                clen = int(clen)
            except ValueError:
                clen = None
            else:
                if clen < 0:  # ignore nonsensical negative lengths
                    clen = None
        #
        if clen is None:
            self._clen_rest = sys.maxsize
        else:
            self._clen_rest = self._clen = clen
        #
        # detect encoding and set decompress object
        if self.decompress and 'Content-Encoding' in self._headers:
            encoding = self._headers['Content-Encoding']
            if encoding == "gzip":
                self.__decompress_obj = zlib.decompressobj(16+zlib.MAX_WBITS)
                self.__decompress_first_try = False
            elif encoding == "deflate":
                self.__decompress_obj = zlib.decompressobj()

        rest = data[idx+4:]
        self._buf = [rest]
        self.__on_headers_complete = True
        self.__on_message_begin = True
        return len(rest)

    def _parse_body(self):
        data = b''.join(self._buf)
        #
        if not self._chunked:
            #
            if not data and self._clen is None:
                if not self._status:    # message complete only for servers
                    self.__on_message_complete = True
            else:
                if self._clen_rest is not None:
                    self._clen_rest -= len(data)

                # maybe decompress
                data = self._decompress(data)

                self._partial_body = True
                if data:
                    self._body.append(data)
                self._buf = []
                if self._clen_rest <= 0:
                    self.__on_message_complete = True
            return
        else:
            try:
                size, rest = self._parse_chunk_size(data)
            except InvalidChunkSize as e:
                self.errno = INVALID_CHUNK
                self.errstr = "invalid chunk size [%s]" % str(e)
                return -1
            if size == 0:
                return size
            if size is None or len(rest) < size + 2:
                return None
            body_part, rest = rest[:size], rest[size:]

            # maybe decompress
            body_part = self._decompress(body_part)
            self._partial_body = True
            self._body.append(body_part)
            rest = rest[2:]
            self._buf = [rest] if rest else []
            return len(rest) + 2

    def _parse_chunk_size(self, data):
        idx = data.find(b'\r\n')
        if idx < 0:
            return None, None
        line, rest_chunk = data[:idx], data[idx+2:]
        chunk_size = line.split(b';', 1)[0].strip()
        try:
            chunk_size = int(chunk_size, 16)
        except ValueError:
            raise InvalidChunkSize(chunk_size)
        if chunk_size == 0:
            self._parse_trailers(rest_chunk)
            return 0, None
        return chunk_size, rest_chunk

    def _parse_trailers(self, data):
        idx = data.find(b'\r\n\r\n')
        if data[:2] == b'\r\n':
            self._trailers = self._parse_headers(data[:idx])

    def _decompress(self, data):
        deco = self.__decompress_obj
        if deco is not None:
            if not self.__decompress_first_try:
                data = deco.decompress(data)
            else:
                try:
                    data = deco.decompress(data)
                except zlib.error:
                    self.__decompress_obj = zlib.decompressobj(-zlib.MAX_WBITS)
                    deco = self.__decompress_obj
                    data = deco.decompress(data)
                self.__decompress_first_try = False
        return data


if not hasextensions:   # pragma    nocover
    setDefaultHttpParser(HttpParser)


# ############################################    UTILITIES, ENCODERS, PARSERS
absolute_http_url_re = re.compile(r"^https?://", re.I)


def is_absolute_uri(location):
    '''Check if a ``location`` is absolute, i.e. it includes the scheme
    '''
    return location and absolute_http_url_re.match(location)


def get_environ_proxies():
    """Return a dict of environment proxies. From requests_."""

    proxy_keys = [
        'all',
        'http',
        'https',
        'ftp',
        'socks',
        'ws',
        'wss',
        'no'
    ]

    def get_proxy(k):
        return os.environ.get(k) or os.environ.get(k.upper())

    proxies = [(key, get_proxy(key + '_proxy')) for key in proxy_keys]
    return dict([(key, val) for (key, val) in proxies if val])


def appendslash(url):
    '''Append a slash to *url* if it does not have one.'''
    if not url.endswith('/'):
        url = '%s/' % url
    return url


def choose_boundary():
    """Our embarassingly-simple replacement for mimetools.choose_boundary."""
    return uuid4().hex


def get_content_type(filename):
    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'


def encode_multipart_formdata(fields, boundary=None, charset=None):
    """Encode a dictionary of ``fields`` using the multipart/form-data format.

    :param fields:
        Dictionary of fields or list of (key, value) field tuples.  The key is
        treated as the field name, and the value as the body of the form-data
        bytes. If the value is a tuple of two elements, then the first element
        is treated as the filename of the form-data section.

        Field names and filenames must be unicode.

    :param boundary:
        If not specified, then a random boundary will be generated using
        :func:`mimetools.choose_boundary`.
    """
    charset = charset or 'utf-8'
    body = BytesIO()
    if boundary is None:
        boundary = choose_boundary()
    for fieldname, value in mapping_iterator(fields):
        body.write(('--%s\r\n' % boundary).encode(charset))
        if isinstance(value, tuple):
            filename, data = value
            body.write(('Content-Disposition: form-data; name="%s"; '
                        'filename="%s"\r\n' % (fieldname, filename))
                       .encode(charset))
            body.write(('Content-Type: %s\r\n\r\n' %
                       (get_content_type(filename))).encode(charset))
        else:
            data = value
            body.write(('Content-Disposition: form-data; name="%s"\r\n'
                        % (fieldname)).encode(charset))
            body.write(b'Content-Type: text/plain\r\n\r\n')
        body.write(to_bytes(data))
        body.write(b'\r\n')
    body.write(('--%s--\r\n' % (boundary)).encode(charset))
    content_type = 'multipart/form-data; boundary=%s' % boundary
    return body.getvalue(), content_type


def hexmd5(x):
    return md5(to_bytes(x)).hexdigest()


def hexsha1(x):
    return sha1(to_bytes(x)).hexdigest()


def http_date(epoch_seconds=None):
    """
    Formats the time to match the RFC1123 date format as specified by HTTP
    RFC2616 section 3.3.1.

    Accepts a floating point number expressed in seconds since the epoch, in
    UTC - such as that outputted by time.time(). If set to None, defaults to
    the current time.

    Outputs a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'.
    """
    return formatdate(epoch_seconds, usegmt=True)


# ################################################################# COOKIES
def create_cookie(name, value, **kwargs):
    """Make a cookie from underspecified parameters.

    By default, the pair of `name` and `value` will be set for the domain ''
    and sent on every request (this is sometimes called a "supercookie").
    """
    result = dict(
        version=0,
        name=name,
        value=value,
        port=None,
        domain='',
        path='/',
        secure=False,
        expires=None,
        discard=True,
        comment=None,
        comment_url=None,
        rest={'HttpOnly': None},
        rfc2109=False,)
    badargs = set(kwargs) - set(result)
    if badargs:
        err = 'create_cookie() got unexpected keyword arguments: %s'
        raise TypeError(err % list(badargs))
    result.update(kwargs)
    result['port_specified'] = bool(result['port'])
    result['domain_specified'] = bool(result['domain'])
    result['domain_initial_dot'] = result['domain'].startswith('.')
    result['path_specified'] = bool(result['path'])
    return Cookie(**result)


def cookiejar_from_dict(*cookie_dicts):
    """Returns a CookieJar from a key/value dictionary.

    :param cookie_dict: Dict of key/values to insert into CookieJar.
    """
    cookie_dicts = tuple((d for d in cookie_dicts if d))
    if len(cookie_dicts) == 1 and isinstance(cookie_dicts[0], CookieJar):
        return cookie_dicts[0]
    cookiejar = CookieJar()
    for cookie_dict in cookie_dicts:
        if isinstance(cookie_dict, CookieJar):
            for cookie in cookie_dict:
                cookiejar.set_cookie(cookie)
        else:
            for name in cookie_dict:
                cookiejar.set_cookie(create_cookie(name, cookie_dict[name]))
    return cookiejar


# ################################################################# VARY HEADER
cc_delim_re = re.compile(r'\s*,\s*')


def patch_vary_headers(response, newheaders):
    """Adds (or updates) the "Vary" header in the given HttpResponse object.

    newheaders is a list of header names that should be in "Vary". Existing
    headers in "Vary" aren't removed.

    For information on the Vary header, see:

        http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.44
    """
    # Note that we need to keep the original order intact, because cache
    # implementations may rely on the order of the Vary contents in, say,
    # computing an MD5 hash.
    if 'Vary' in response:
        vary_headers = cc_delim_re.split(response['Vary'])
    else:
        vary_headers = []
    # Use .lower() here so we treat headers as case-insensitive.
    existing_headers = set([header.lower() for header in vary_headers])
    additional_headers = [newheader for newheader in newheaders
                          if newheader.lower() not in existing_headers]
    response['Vary'] = ', '.join(vary_headers + additional_headers)


def has_vary_header(response, header_query):
    """
    Checks to see if the response has a given header name in its Vary header.
    """
    if not response.has_header('Vary'):
        return False
    vary_headers = cc_delim_re.split(response['Vary'])
    existing_headers = set([header.lower() for header in vary_headers])
    return header_query.lower() in existing_headers


class CacheControl:
    '''
    http://www.mnot.net/cache_docs/

.. attribute:: maxage

    Specifies the maximum amount of time that a representation will be
    considered fresh.
    '''
    def __init__(self, maxage=None, private=False,
                 must_revalidate=False, proxy_revalidate=False,
                 nostore=False):
        self.maxage = maxage
        self.private = private
        self.must_revalidate = must_revalidate
        self.proxy_revalidate = proxy_revalidate
        self.nostore = nostore

    def __call__(self, headers, etag=None):
        if self.nostore:
            headers['cache-control'] = ('no-store, no-cache, must-revalidate,'
                                        ' max-age=0')
        elif self.maxage:
            headers['cache-control'] = 'max-age=%s' % self.maxage
            if etag:
                headers['etag'] = '"%s"' % etag
            if self.private:
                headers.add_header('cache-control', 'private')
            else:
                headers.add_header('cache-control', 'public')
            if self.must_revalidate:
                headers.add_header('cache-control', 'must-revalidate')
            elif self.proxy_revalidate:
                headers.add_header('cache-control', 'proxy-revalidate')
        else:
            headers['cache-control'] = 'no-cache'


def chunk_encoding(chunk):
    '''Write a chunk::

        chunk-size(hex) CRLF
        chunk-data CRLF

    If the size is 0, this is the last chunk, and an extra CRLF is appended.
    '''
    head = ("%X\r\n" % len(chunk)).encode('utf-8')
    return head + chunk + b'\r\n'


def http_chunks(data, finish=False):
    while len(data) >= MAX_CHUNK_SIZE:
        chunk, data = data[:MAX_CHUNK_SIZE], data[MAX_CHUNK_SIZE:]
        yield chunk_encoding(chunk)
    if data:
        yield chunk_encoding(data)
    if finish:
        yield chunk_encoding(data)


def parse_header_links(value):
    """Return a dict of parsed link headers proxies

    i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",
    <http://.../back.jpeg>; rel=back;type="image/jpeg"

    Original code from https://github.com/kennethreitz/requests

    Copyright 2016 Kenneth Reitz
    """
    links = []
    replace_chars = " '\""

    for val in re.split(", *<", value):
        try:
            url, params = val.split(";", 1)
        except ValueError:
            url, params = val, ''
        link = {}
        link["url"] = url.strip("<> '\"")
        for param in params.split(";"):
            try:
                key, value = param.split("=")
            except ValueError:
                break

            link[key.strip(replace_chars)] = value.strip(replace_chars)
        links.append(link)
    return links