1
0
mirror of synced 2025-01-19 22:32:45 +01:00
Jennifer Taylor 509cb4f0d9 Convert most of the format() string calls to f-strings using libcst.
Exact commands run were:

  python3 -m libcst.tool codemod convert_format_to_fstring.ConvertFormatStringCommand . --no-format
  python3 setup.py build_ext --inplace
2020-01-07 21:29:07 +00:00

158 lines
5.7 KiB
Python

from typing import Any, Dict, List, Optional, Tuple
class HTTP:
@staticmethod
def parse(data: bytes, request: bool=False, response: bool=False) -> Optional[Dict[str, Any]]:
"""
A very lazy and hastily coded HTTP parser.
Assumes that data is a valid HTTP stream and just tokenizes relevant pieces.
This can probably be made far more robust by scrapping it and using flask or
another real parser. However, it does the job.
Parameters:
data - A string blob that represents an HTTP request
request - Set to true if this is expected to be a request
response - Set to true if this is expected to be a response
Returns a dictionary containing:
version - HTTP version
headers - Dictionary of headers keyed by header name
data - Post body in unmolested form
uri - Requested URI when this is a request
method - Requested method on URI when this is a request
code - HTTP response code when this is a response
"""
try:
# Try to get the headers and post body as two separate elemenst
binary_headers, data = data.split(b"\r\n\r\n", 1)
except ValueError:
# Can't even separate header from post body
return None
# Split headers individually
headerlist = binary_headers.split(b"\r\n")
try:
if request:
# Remove the first header as this is the HTTP request
method, uri, version = headerlist.pop(0).split(b' ', 2)
elif response:
# Remove the first header as this is the HTTP response
version, code, error = headerlist.pop(0).split(b' ', 2)
else:
raise Exception("Logic error!")
except ValueError:
# Can't parse the headers returned
return None
headers: Dict[str, str] = {}
preserved: List[Tuple[str, str]] = []
# This is lazy because we can have multiple values, but whatever, it works
for header in headerlist:
name, info = header.split(b":", 1)
key = name.decode('ascii').lower()
value = info.decode('ascii').strip()
headers[key] = value
preserved.append((key, value))
# Cap post body to length if we have a content-length header
if 'content-length' in headers:
data = data[:int(headers['content-length'])]
valid = len(data) == int(headers['content-length'])
elif 'transfer-encoding' in headers and headers['transfer-encoding'] == 'chunked':
real_data = b''
while True:
try:
size_bytes, rest = data.split(b"\r\n", 1)
except ValueError:
# Not enough values to unpack
size_bytes = b'0'
size = int(size_bytes, 16)
if size == 0:
# End of chunks
break
# Grab the real data
real_data = real_data + rest[:size]
# Skip past data and \r\n
data = rest[(size + 2):]
data = real_data
valid = True
else:
valid = True
if request:
return {
'method': method.decode('ascii').lower(),
'uri': uri.decode('ascii'),
'version': version.decode('ascii'),
'headers': headers,
'preserved_headers': preserved,
'data': data,
'valid': valid,
}
elif response:
return {
'code': code.decode('ascii'),
'version': version.decode('ascii'),
'error': error.decode('ascii'),
'headers': headers,
'preserved_headers': preserved,
'data': data,
'valid': valid,
}
else:
return None
@staticmethod
def generate(parsed_headers: Dict[str, Any], data: bytes, request: bool=False, response: bool=False) -> bytes:
"""
A very lazy and hastily coded HTTP packet generator.
Parameters:
parsed_headers - A dictionary of headers to include
data - Bytes which should make up the body of the HTTP packet
request - Set to True if this is a request
response - Set to True if this is a response
Returns:
Binary data which can be sent over the wire to a HTTP server.
"""
out = []
# Add first part of header
if request:
out.append(f'{parsed_headers["method"]} {parsed_headers["uri"]} {parsed_headers["version"]}')
elif response:
out.append(f'{parsed_headers["version"]} {parsed_headers["code"]} {parsed_headers["error"]}')
else:
raise Exception("Logic error!")
# Add the rest of the headers
for header in parsed_headers['preserved_headers']:
name, value = header
if name.lower() == 'content-length':
# Fix this
value = len(data)
elif name.lower() == 'transfer-encoding':
# Either we support and strip this, or error!
if value.lower() == 'chunked':
# We support parsing this, but aren't going to re-generate
continue
else:
# Woah, can't figure this out!
raise Exception(f"Unknown transfer-encodign {value}")
out.append(f"{name}: {value}")
# Concatenate it with the binary data
return "\r\n".join(out).encode('ascii') + b'\r\n\r\n' + data