Mini Shell
# -*- encoding: utf-8 -*-
import requests
import ftplib
import base64
from requests.compat import urlparse
from requests.hooks import dispatch_hook
from requests import Response, codes
from io import BytesIO
import cgi
import os
import socket
import logging
from requests.exceptions import ConnectionError, ConnectTimeout, ReadTimeout
from requests.exceptions import RequestException
from requests.utils import prepend_scheme_if_needed
class FTPSession(requests.Session):
def __init__(self):
super(FTPSession, self).__init__()
self.mount('ftp://', FTPAdapter())
# Define our helper methods.
def list(self, url, **kwargs):
'''Sends an FTP LIST. Returns a Response object.'''
return self.request('LIST', url, **kwargs)
def retr(self, url, **kwargs):
'''Sends an FTP RETR for a given url. Returns a Response object whose
content field contains the binary data.'''
return self.request('RETR', url, **kwargs)
def stor(self, url, files=None, **kwargs):
'''Sends an FTP STOR to a given URL. Returns a Response object. Expects
to be given one file by the standard Requests method. The remote
filename will be given by the URL provided.'''
return self.request('STOR', url, files=files, **kwargs)
def nlst(self, url, **kwargs):
'''Sends an FTP NLST. Returns a Response object.'''
return self.request('NLST', url, **kwargs)
def size(self, url, **kwargs):
'''Sends an FTP SIZE. Returns a decimal number.'''
return self.request('SIZE', url, **kwargs)
log = logging.getLogger(__name__)
def monkeypatch_session():
'''Monkeypatch Requests Sessions to provide all the helper
methods needed for use with FTP.'''
requests.Session = FTPSession
return
def parse_multipart_files(request):
'''Given a prepared reqest, return a file-like object containing the
original data. This is pretty hacky.'''
# Start by grabbing the pdict.
_, pdict = cgi.parse_header(request.headers['Content-Type'])
# Now, wrap the multipart data in a BytesIO buffer. This is annoying.
buf = BytesIO()
buf.write(request.body)
buf.seek(0)
# Parse the data. Simply take the first file.
data = cgi.parse_multipart(buf, pdict)
_, filedata = data.popitem()
buf.close()
# Get a BytesIO now, and write the file into it.
buf = BytesIO()
buf.write(''.join(filedata))
buf.seek(0)
return buf
def data_callback_factory(variable):
'''Returns a callback suitable for use by the FTP library. This callback
will repeatedly save data into the variable provided to this function. This
variable should be a file-like structure.'''
def callback(data):
variable.write(data)
if hasattr(variable, "content_len"):
variable.content_len += len(data)
else:
variable.content_len = len(data)
return
return callback
def build_text_response(request, data, code):
'''Build a response for textual data.'''
return build_response(request, data, code, 'ascii')
def build_binary_response(request, data, code):
'''Build a response for data whose encoding is unknown.'''
return build_response(request, data, code, None)
def get_status_code_from_code_response(code):
'''
The idea is to handle complicated code response (even multi lines).
We get the status code in two ways:
- extracting the code from the last valid line in the response
- getting it from the 3 first digits in the code
After a comparison between the two values,
we can safely set the code or raise a warning.
Examples:
- get_status_code_from_code_response('200 Welcome') == 200
- multi_line_code = '226-File successfully transferred\n226 0.000 seconds'
get_status_code_from_code_response(multi_line_code) == 226
- multi_line_with_code_conflicts = '200-File successfully transferred\n226 0.000 seconds'
get_status_code_from_code_response(multi_line_with_code_conflicts) == 226
For more detail see RFC 959, page 36, on multi-line responses:
https://www.ietf.org/rfc/rfc959.txt
"Thus the format for multi-line replies is that the first line
will begin with the exact required reply code, followed
immediately by a Hyphen, "-" (also known as Minus), followed by
text. The last line will begin with the same code, followed
immediately by Space <SP>, optionally some text, and the Telnet
end-of-line code."
'''
last_valid_line_from_code = [line for line in code.split('\n') if line][-1]
status_code_from_last_line = int(last_valid_line_from_code.split()[0])
status_code_from_first_digits = int(code[:3])
if status_code_from_last_line != status_code_from_first_digits:
log.warning(
'FTP response status code seems to be inconsistent.\n'
'Code received: %s, extracted: %s and %s',
code,
status_code_from_last_line,
status_code_from_first_digits
)
return status_code_from_last_line
def build_response(request, data, code, encoding):
'''Builds a response object from the data returned by ftplib, using the
specified encoding.'''
response = Response()
response.encoding = encoding
# Fill in some useful fields.
response.raw = data
response.url = request.url
response.request = request
response.status_code = get_status_code_from_code_response(code)
if hasattr(data, "content_len"):
response.headers['Content-Length'] = str(data.content_len)
# Make sure to seek the file-like raw object back to the start.
response.raw.seek(0)
# Run the response hook.
response = dispatch_hook('response', request.hooks, response)
return response
class FTPAdapter(requests.adapters.BaseAdapter):
'''A Requests Transport Adapter that handles FTP urls.'''
def __init__(self):
super(FTPAdapter, self).__init__()
# Build a dictionary keyed off the methods we support in upper case.
# The values of this dictionary should be the functions we use to
# send the specific queries.
self.func_table = {'LIST': self.list,
'RETR': self.retr,
'STOR': self.stor,
'NLST': self.nlst,
'SIZE': self.size,
'HEAD': self.head,
'GET': self.get,}
def send(self, request, **kwargs):
'''Sends a PreparedRequest object over FTP. Returns a response object.
'''
# Get the authentication from the prepared request, if any.
auth = self.get_username_password_from_header(request)
# Next, get the host and the path.
scheme, host, port, path = self.get_host_and_path_from_url(request)
# Sort out the timeout.
timeout = kwargs.get('timeout', None)
# Look for a proxy
proxies = kwargs.get('proxies', {})
proxy = proxies.get(scheme)
# If there is a proxy, then we actually want to make a HTTP request
if proxy:
return self.send_proxy(request, proxy, **kwargs)
# Establish the connection and login if needed.
self.conn = ftplib.FTP()
# Use a flag to distinguish read vs connection timeouts, and a flat set
# of except blocks instead of a nested try-except, because python 3
# exception chaining makes things weird
connected = False
try:
self.conn.connect(host, port, timeout)
connected = True
if auth is not None:
self.conn.login(auth[0], auth[1])
else:
self.conn.login()
# Get the method and attempt to find the function to call.
resp = self.func_table[request.method](path, request)
except socket.timeout as e:
# requests distinguishes between connection timeouts and others
if connected:
raise ReadTimeout(e, request=request)
else:
raise ConnectTimeout(e, request=request)
# ftplib raises EOFError if the connection is unexpectedly closed.
# Convert that or any other socket error to a ConnectionError.
except (EOFError, socket.error) as e:
raise ConnectionError(e, request=request)
# Raised for 5xx errors. FTP uses 550 for both ENOENT and EPERM type
# errors, so just translate all of these into a http-ish 404
except ftplib.error_perm as e:
# The exception message is probably from the server, so if it's
# non-ascii, who knows what the encoding is. Latin1 has the
# advantage of not being able to fail.
resp = build_text_response(request,
BytesIO(str(e).encode('latin1')), str(codes.not_found))
# 4xx reply, translate to a http 503
except ftplib.error_temp as e:
resp = build_text_response(request,
BytesIO(str(e).encode('latin1')), str(codes.unavailable))
# error_reply is an unexpected status code, and error_proto is an
# invalid status code. Error is the generic ftplib error, usually
# raised when a line is too long. Translate all of them to a generic
# RequestException
except (ftplib.error_reply, ftplib.error_proto, ftplib.Error) as e:
raise RequestException(e, request=request)
# Return the response.
return resp
def close(self):
'''Dispose of any internal state.'''
# Currently this is a no-op.
pass
def send_proxy(self, request, proxy, **kwargs):
'''Send a FTP request through a HTTP proxy'''
# Direct the request through a HTTP adapter instead
proxy_url = prepend_scheme_if_needed(proxy, 'http')
s = requests.Session()
adapter = s.get_adapter(proxy_url)
try:
return adapter.send(request, **kwargs)
finally:
adapter.close()
def list(self, path, request):
'''Executes the FTP LIST command on the given path.'''
data = BytesIO()
# To ensure the BytesIO object gets cleaned up, we need to alias its
# close method to the release_conn() method. This is a dirty hack, but
# there you go.
data.release_conn = data.close
self.conn.cwd(path)
code = self.conn.retrbinary('LIST', data_callback_factory(data))
# When that call has finished executing, we'll have all our data.
response = build_text_response(request, data, code)
# Close the connection.
self.conn.close()
return response
def retr(self, path, request):
'''Executes the FTP RETR command on the given path.'''
data = BytesIO()
# To ensure the BytesIO gets cleaned up, we need to alias its close
# method. See self.list().
data.release_conn = data.close
code = self.conn.retrbinary('RETR ' + path, data_callback_factory(data))
response = build_binary_response(request, data, code)
# Close the connection.
self.conn.close()
return response
def get(self, path, request):
'''Executes the FTP RETR command on the given path.
This is the same as retr except that the FTP server code is
converted to a HTTP 200.
'''
response = self.retr(path, request)
# Errors are handled in send(), so assume everything is ok if we
# made it this far
response.status_code = codes.ok
return response
def size(self, path, request):
'''Executes the FTP SIZE command on the given path.'''
self.conn.voidcmd('TYPE I') # SIZE is not usually allowed in ASCII mode
size = self.conn.size(path)
if not str(size).isdigit():
self.conn.close()
return None
data = BytesIO(bytes(size))
# To ensure the BytesIO gets cleaned up, we need to alias its close
# method to the release_conn() method. This is a dirty hack, but there
# you go.
data.release_conn = data.close
data.content_len = size
response = build_text_response(request, data, '213')
self.conn.close()
return response
def head(self, path, request):
'''Executes the FTP SIZE command on the given path.
This is the same as size except that the FTP server code is
converted to a HTTP 200.
'''
response = self.size(path, request)
response.status_code = codes.ok
return response
def stor(self, path, request):
'''Executes the FTP STOR command on the given path.'''
# First, get the file handle. We assume (bravely)
# that there is only one file to be sent to a given URL. We also
# assume that the filename is sent as part of the URL, not as part of
# the files argument. Both of these assumptions are rarely correct,
# but they are easy.
data = parse_multipart_files(request)
# Split into the path and the filename.
path, filename = os.path.split(path)
# Switch directories and upload the data.
self.conn.cwd(path)
code = self.conn.storbinary('STOR ' + filename, data)
# Close the connection and build the response.
self.conn.close()
response = build_binary_response(request, BytesIO(), code)
return response
def nlst(self, path, request):
'''Executes the FTP NLST command on the given path.'''
data = BytesIO()
# Alias the close method.
data.release_conn = data.close
self.conn.cwd(path)
code = self.conn.retrbinary('NLST', data_callback_factory(data))
# When that call has finished executing, we'll have all our data.
response = build_text_response(request, data, code)
# Close the connection.
self.conn.close()
return response
def get_username_password_from_header(self, request):
'''Given a PreparedRequest object, reverse the process of adding HTTP
Basic auth to obtain the username and password. Allows the FTP adapter
to piggyback on the basic auth notation without changing the control
flow.'''
auth_header = request.headers.get('Authorization')
if auth_header:
# The basic auth header is of the form 'Basic xyz'. We want the
# second part. Check that we have the right kind of auth though.
encoded_components = auth_header.split()[:2]
if encoded_components[0] != 'Basic':
raise AuthError('Invalid form of Authentication used.')
else:
encoded = encoded_components[1]
# Decode the base64 encoded string.
decoded = base64.b64decode(encoded)
# The auth string was encoded to bytes by requests using latin1,
# and will be encoded to bytes by ftplib (in python 3) using
# latin1. In the meantime, use a str
decoded = decoded.decode('latin1')
# The string is of the form 'username:password'. Split on the
# colon.
components = decoded.split(':')
username = components[0]
password = components[1]
return (username, password)
else:
# No auth header. Return None.
return None
def get_host_and_path_from_url(self, request):
'''Given a PreparedRequest object, split the URL in such a manner as to
determine the host and the path. This is a separate method to wrap some
of urlparse's craziness.'''
url = request.url
# scheme, netloc, path, params, query, fragment = urlparse(url)
parsed = urlparse(url)
scheme = parsed.scheme
path = parsed.path
# If there is a slash on the front of the path, chuck it.
if path.startswith('/'):
path = path[1:]
host = parsed.hostname
port = parsed.port or 0
return (scheme, host, port, path)
class AuthError(Exception):
'''Denotes an error with authentication.'''
pass