Add python venv

2022-10-31 10:10:52 -03:00
parent fb1a0435c1
commit a50f49d2c8
913 changed files with 287881 additions and 0 deletions
@@ -0,0 +1,11 @@
+"""CacheControl import Interface.
+
+Make it easy to import from cachecontrol without long namespaces.
+"""
+__author__ = "Eric Larson"
+__email__ = "eric@ionrock.org"
+__version__ = "0.12.6"
+
+from .wrapper import CacheControl
+from .adapter import CacheControlAdapter
+from .controller import CacheController
@@ -0,0 +1,57 @@
+import logging
+
+from pip._vendor import requests
+
+from pip._vendor.cachecontrol.adapter import CacheControlAdapter
+from pip._vendor.cachecontrol.cache import DictCache
+from pip._vendor.cachecontrol.controller import logger
+
+from argparse import ArgumentParser
+
+
+def setup_logging():
+    logger.setLevel(logging.DEBUG)
+    handler = logging.StreamHandler()
+    logger.addHandler(handler)
+
+
+def get_session():
+    adapter = CacheControlAdapter(
+        DictCache(), cache_etags=True, serializer=None, heuristic=None
+    )
+    sess = requests.Session()
+    sess.mount("http://", adapter)
+    sess.mount("https://", adapter)
+
+    sess.cache_controller = adapter.controller
+    return sess
+
+
+def get_args():
+    parser = ArgumentParser()
+    parser.add_argument("url", help="The URL to try and cache")
+    return parser.parse_args()
+
+
+def main(args=None):
+    args = get_args()
+    sess = get_session()
+
+    # Make a request to get a response
+    resp = sess.get(args.url)
+
+    # Turn on logging
+    setup_logging()
+
+    # try setting the cache
+    sess.cache_controller.cache_response(resp.request, resp.raw)
+
+    # Now try to get it
+    if sess.cache_controller.cached_request(resp.request):
+        print("Cached!")
+    else:
+        print("Not cached :(")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,133 @@
+import types
+import functools
+import zlib
+
+from pip._vendor.requests.adapters import HTTPAdapter
+
+from .controller import CacheController
+from .cache import DictCache
+from .filewrapper import CallbackFileWrapper
+
+
+class CacheControlAdapter(HTTPAdapter):
+    invalidating_methods = {"PUT", "DELETE"}
+
+    def __init__(
+        self,
+        cache=None,
+        cache_etags=True,
+        controller_class=None,
+        serializer=None,
+        heuristic=None,
+        cacheable_methods=None,
+        *args,
+        **kw
+    ):
+        super(CacheControlAdapter, self).__init__(*args, **kw)
+        self.cache = DictCache() if cache is None else cache
+        self.heuristic = heuristic
+        self.cacheable_methods = cacheable_methods or ("GET",)
+
+        controller_factory = controller_class or CacheController
+        self.controller = controller_factory(
+            self.cache, cache_etags=cache_etags, serializer=serializer
+        )
+
+    def send(self, request, cacheable_methods=None, **kw):
+        """
+        Send a request. Use the request information to see if it
+        exists in the cache and cache the response if we need to and can.
+        """
+        cacheable = cacheable_methods or self.cacheable_methods
+        if request.method in cacheable:
+            try:
+                cached_response = self.controller.cached_request(request)
+            except zlib.error:
+                cached_response = None
+            if cached_response:
+                return self.build_response(request, cached_response, from_cache=True)
+
+            # check for etags and add headers if appropriate
+            request.headers.update(self.controller.conditional_headers(request))
+
+        resp = super(CacheControlAdapter, self).send(request, **kw)
+
+        return resp
+
+    def build_response(
+        self, request, response, from_cache=False, cacheable_methods=None
+    ):
+        """
+        Build a response by making a request or using the cache.
+
+        This will end up calling send and returning a potentially
+        cached response
+        """
+        cacheable = cacheable_methods or self.cacheable_methods
+        if not from_cache and request.method in cacheable:
+            # Check for any heuristics that might update headers
+            # before trying to cache.
+            if self.heuristic:
+                response = self.heuristic.apply(response)
+
+            # apply any expiration heuristics
+            if response.status == 304:
+                # We must have sent an ETag request. This could mean
+                # that we've been expired already or that we simply
+                # have an etag. In either case, we want to try and
+                # update the cache if that is the case.
+                cached_response = self.controller.update_cached_response(
+                    request, response
+                )
+
+                if cached_response is not response:
+                    from_cache = True
+
+                # We are done with the server response, read a
+                # possible response body (compliant servers will
+                # not return one, but we cannot be 100% sure) and
+                # release the connection back to the pool.
+                response.read(decode_content=False)
+                response.release_conn()
+
+                response = cached_response
+
+            # We always cache the 301 responses
+            elif response.status == 301:
+                self.controller.cache_response(request, response)
+            else:
+                # Wrap the response file with a wrapper that will cache the
+                #   response when the stream has been consumed.
+                response._fp = CallbackFileWrapper(
+                    response._fp,
+                    functools.partial(
+                        self.controller.cache_response, request, response
+                    ),
+                )
+                if response.chunked:
+                    super_update_chunk_length = response._update_chunk_length
+
+                    def _update_chunk_length(self):
+                        super_update_chunk_length()
+                        if self.chunk_left == 0:
+                            self._fp._close()
+
+                    response._update_chunk_length = types.MethodType(
+                        _update_chunk_length, response
+                    )
+
+        resp = super(CacheControlAdapter, self).build_response(request, response)
+
+        # See if we should invalidate the cache.
+        if request.method in self.invalidating_methods and resp.ok:
+            cache_url = self.controller.cache_url(request.url)
+            self.cache.delete(cache_url)
+
+        # Give the request a from_cache attr to let people use it
+        resp.from_cache = from_cache
+
+        return resp
+
+    def close(self):
+        self.cache.close()
+        super(CacheControlAdapter, self).close()
@@ -0,0 +1,39 @@
+"""
+The cache object API for implementing caches. The default is a thread
+safe in-memory dictionary.
+"""
+from threading import Lock
+
+
+class BaseCache(object):
+
+    def get(self, key):
+        raise NotImplementedError()
+
+    def set(self, key, value):
+        raise NotImplementedError()
+
+    def delete(self, key):
+        raise NotImplementedError()
+
+    def close(self):
+        pass
+
+
+class DictCache(BaseCache):
+
+    def __init__(self, init_dict=None):
+        self.lock = Lock()
+        self.data = init_dict or {}
+
+    def get(self, key):
+        return self.data.get(key, None)
+
+    def set(self, key, value):
+        with self.lock:
+            self.data.update({key: value})
+
+    def delete(self, key):
+        with self.lock:
+            if key in self.data:
+                self.data.pop(key)
@@ -0,0 +1,2 @@
+from .file_cache import FileCache  # noqa
+from .redis_cache import RedisCache  # noqa
@@ -0,0 +1,146 @@
+import hashlib
+import os
+from textwrap import dedent
+
+from ..cache import BaseCache
+from ..controller import CacheController
+
+try:
+    FileNotFoundError
+except NameError:
+    # py2.X
+    FileNotFoundError = (IOError, OSError)
+
+
+def _secure_open_write(filename, fmode):
+    # We only want to write to this file, so open it in write only mode
+    flags = os.O_WRONLY
+
+    # os.O_CREAT | os.O_EXCL will fail if the file already exists, so we only
+    #  will open *new* files.
+    # We specify this because we want to ensure that the mode we pass is the
+    # mode of the file.
+    flags |= os.O_CREAT | os.O_EXCL
+
+    # Do not follow symlinks to prevent someone from making a symlink that
+    # we follow and insecurely open a cache file.
+    if hasattr(os, "O_NOFOLLOW"):
+        flags |= os.O_NOFOLLOW
+
+    # On Windows we'll mark this file as binary
+    if hasattr(os, "O_BINARY"):
+        flags |= os.O_BINARY
+
+    # Before we open our file, we want to delete any existing file that is
+    # there
+    try:
+        os.remove(filename)
+    except (IOError, OSError):
+        # The file must not exist already, so we can just skip ahead to opening
+        pass
+
+    # Open our file, the use of os.O_CREAT | os.O_EXCL will ensure that if a
+    # race condition happens between the os.remove and this line, that an
+    # error will be raised. Because we utilize a lockfile this should only
+    # happen if someone is attempting to attack us.
+    fd = os.open(filename, flags, fmode)
+    try:
+        return os.fdopen(fd, "wb")
+
+    except:
+        # An error occurred wrapping our FD in a file object
+        os.close(fd)
+        raise
+
+
+class FileCache(BaseCache):
+
+    def __init__(
+        self,
+        directory,
+        forever=False,
+        filemode=0o0600,
+        dirmode=0o0700,
+        use_dir_lock=None,
+        lock_class=None,
+    ):
+
+        if use_dir_lock is not None and lock_class is not None:
+            raise ValueError("Cannot use use_dir_lock and lock_class together")
+
+        try:
+            from lockfile import LockFile
+            from lockfile.mkdirlockfile import MkdirLockFile
+        except ImportError:
+            notice = dedent(
+                """
+            NOTE: In order to use the FileCache you must have
+            lockfile installed. You can install it via pip:
+              pip install lockfile
+            """
+            )
+            raise ImportError(notice)
+
+        else:
+            if use_dir_lock:
+                lock_class = MkdirLockFile
+
+            elif lock_class is None:
+                lock_class = LockFile
+
+        self.directory = directory
+        self.forever = forever
+        self.filemode = filemode
+        self.dirmode = dirmode
+        self.lock_class = lock_class
+
+    @staticmethod
+    def encode(x):
+        return hashlib.sha224(x.encode()).hexdigest()
+
+    def _fn(self, name):
+        # NOTE: This method should not change as some may depend on it.
+        #       See: https://github.com/ionrock/cachecontrol/issues/63
+        hashed = self.encode(name)
+        parts = list(hashed[:5]) + [hashed]
+        return os.path.join(self.directory, *parts)
+
+    def get(self, key):
+        name = self._fn(key)
+        try:
+            with open(name, "rb") as fh:
+                return fh.read()
+
+        except FileNotFoundError:
+            return None
+
+    def set(self, key, value):
+        name = self._fn(key)
+
+        # Make sure the directory exists
+        try:
+            os.makedirs(os.path.dirname(name), self.dirmode)
+        except (IOError, OSError):
+            pass
+
+        with self.lock_class(name) as lock:
+            # Write our actual file
+            with _secure_open_write(lock.path, self.filemode) as fh:
+                fh.write(value)
+
+    def delete(self, key):
+        name = self._fn(key)
+        if not self.forever:
+            try:
+                os.remove(name)
+            except FileNotFoundError:
+                pass
+
+
+def url_to_file_path(url, filecache):
+    """Return the file cache path based on the URL.
+
+    This does not ensure the file exists!
+    """
+    key = CacheController.cache_url(url)
+    return filecache._fn(key)
@@ -0,0 +1,33 @@
+from __future__ import division
+
+from datetime import datetime
+from pip._vendor.cachecontrol.cache import BaseCache
+
+
+class RedisCache(BaseCache):
+
+    def __init__(self, conn):
+        self.conn = conn
+
+    def get(self, key):
+        return self.conn.get(key)
+
+    def set(self, key, value, expires=None):
+        if not expires:
+            self.conn.set(key, value)
+        else:
+            expires = expires - datetime.utcnow()
+            self.conn.setex(key, int(expires.total_seconds()), value)
+
+    def delete(self, key):
+        self.conn.delete(key)
+
+    def clear(self):
+        """Helper for clearing all the keys in a database. Use with
+        caution!"""
+        for key in self.conn.keys():
+            self.conn.delete(key)
+
+    def close(self):
+        """Redis uses connection pooling, no need to close the connection."""
+        pass
@@ -0,0 +1,29 @@
+try:
+    from urllib.parse import urljoin
+except ImportError:
+    from urlparse import urljoin
+
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+
+
+# Handle the case where the requests module has been patched to not have
+# urllib3 bundled as part of its source.
+try:
+    from pip._vendor.requests.packages.urllib3.response import HTTPResponse
+except ImportError:
+    from pip._vendor.urllib3.response import HTTPResponse
+
+try:
+    from pip._vendor.requests.packages.urllib3.util import is_fp_closed
+except ImportError:
+    from pip._vendor.urllib3.util import is_fp_closed
+
+# Replicate some six behaviour
+try:
+    text_type = unicode
+except NameError:
+    text_type = str
@@ -0,0 +1,376 @@
+"""
+The httplib2 algorithms ported for use with requests.
+"""
+import logging
+import re
+import calendar
+import time
+from email.utils import parsedate_tz
+
+from pip._vendor.requests.structures import CaseInsensitiveDict
+
+from .cache import DictCache
+from .serialize import Serializer
+
+
+logger = logging.getLogger(__name__)
+
+URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
+
+
+def parse_uri(uri):
+    """Parses a URI using the regex given in Appendix B of RFC 3986.
+
+        (scheme, authority, path, query, fragment) = parse_uri(uri)
+    """
+    groups = URI.match(uri).groups()
+    return (groups[1], groups[3], groups[4], groups[6], groups[8])
+
+
+class CacheController(object):
+    """An interface to see if request should cached or not.
+    """
+
+    def __init__(
+        self, cache=None, cache_etags=True, serializer=None, status_codes=None
+    ):
+        self.cache = DictCache() if cache is None else cache
+        self.cache_etags = cache_etags
+        self.serializer = serializer or Serializer()
+        self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
+
+    @classmethod
+    def _urlnorm(cls, uri):
+        """Normalize the URL to create a safe key for the cache"""
+        (scheme, authority, path, query, fragment) = parse_uri(uri)
+        if not scheme or not authority:
+            raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
+
+        scheme = scheme.lower()
+        authority = authority.lower()
+
+        if not path:
+            path = "/"
+
+        # Could do syntax based normalization of the URI before
+        # computing the digest. See Section 6.2.2 of Std 66.
+        request_uri = query and "?".join([path, query]) or path
+        defrag_uri = scheme + "://" + authority + request_uri
+
+        return defrag_uri
+
+    @classmethod
+    def cache_url(cls, uri):
+        return cls._urlnorm(uri)
+
+    def parse_cache_control(self, headers):
+        known_directives = {
+            # https://tools.ietf.org/html/rfc7234#section-5.2
+            "max-age": (int, True),
+            "max-stale": (int, False),
+            "min-fresh": (int, True),
+            "no-cache": (None, False),
+            "no-store": (None, False),
+            "no-transform": (None, False),
+            "only-if-cached": (None, False),
+            "must-revalidate": (None, False),
+            "public": (None, False),
+            "private": (None, False),
+            "proxy-revalidate": (None, False),
+            "s-maxage": (int, True),
+        }
+
+        cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
+
+        retval = {}
+
+        for cc_directive in cc_headers.split(","):
+            if not cc_directive.strip():
+                continue
+
+            parts = cc_directive.split("=", 1)
+            directive = parts[0].strip()
+
+            try:
+                typ, required = known_directives[directive]
+            except KeyError:
+                logger.debug("Ignoring unknown cache-control directive: %s", directive)
+                continue
+
+            if not typ or not required:
+                retval[directive] = None
+            if typ:
+                try:
+                    retval[directive] = typ(parts[1].strip())
+                except IndexError:
+                    if required:
+                        logger.debug(
+                            "Missing value for cache-control " "directive: %s",
+                            directive,
+                        )
+                except ValueError:
+                    logger.debug(
+                        "Invalid value for cache-control directive " "%s, must be %s",
+                        directive,
+                        typ.__name__,
+                    )
+
+        return retval
+
+    def cached_request(self, request):
+        """
+        Return a cached response if it exists in the cache, otherwise
+        return False.
+        """
+        cache_url = self.cache_url(request.url)
+        logger.debug('Looking up "%s" in the cache', cache_url)
+        cc = self.parse_cache_control(request.headers)
+
+        # Bail out if the request insists on fresh data
+        if "no-cache" in cc:
+            logger.debug('Request header has "no-cache", cache bypassed')
+            return False
+
+        if "max-age" in cc and cc["max-age"] == 0:
+            logger.debug('Request header has "max_age" as 0, cache bypassed')
+            return False
+
+        # Request allows serving from the cache, let's see if we find something
+        cache_data = self.cache.get(cache_url)
+        if cache_data is None:
+            logger.debug("No cache entry available")
+            return False
+
+        # Check whether it can be deserialized
+        resp = self.serializer.loads(request, cache_data)
+        if not resp:
+            logger.warning("Cache entry deserialization failed, entry ignored")
+            return False
+
+        # If we have a cached 301, return it immediately. We don't
+        # need to test our response for other headers b/c it is
+        # intrinsically "cacheable" as it is Permanent.
+        # See:
+        #   https://tools.ietf.org/html/rfc7231#section-6.4.2
+        #
+        # Client can try to refresh the value by repeating the request
+        # with cache busting headers as usual (ie no-cache).
+        if resp.status == 301:
+            msg = (
+                'Returning cached "301 Moved Permanently" response '
+                "(ignoring date and etag information)"
+            )
+            logger.debug(msg)
+            return resp
+
+        headers = CaseInsensitiveDict(resp.headers)
+        if not headers or "date" not in headers:
+            if "etag" not in headers:
+                # Without date or etag, the cached response can never be used
+                # and should be deleted.
+                logger.debug("Purging cached response: no date or etag")
+                self.cache.delete(cache_url)
+            logger.debug("Ignoring cached response: no date")
+            return False
+
+        now = time.time()
+        date = calendar.timegm(parsedate_tz(headers["date"]))
+        current_age = max(0, now - date)
+        logger.debug("Current age based on date: %i", current_age)
+
+        # TODO: There is an assumption that the result will be a
+        #       urllib3 response object. This may not be best since we
+        #       could probably avoid instantiating or constructing the
+        #       response until we know we need it.
+        resp_cc = self.parse_cache_control(headers)
+
+        # determine freshness
+        freshness_lifetime = 0
+
+        # Check the max-age pragma in the cache control header
+        if "max-age" in resp_cc:
+            freshness_lifetime = resp_cc["max-age"]
+            logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
+
+        # If there isn't a max-age, check for an expires header
+        elif "expires" in headers:
+            expires = parsedate_tz(headers["expires"])
+            if expires is not None:
+                expire_time = calendar.timegm(expires) - date
+                freshness_lifetime = max(0, expire_time)
+                logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
+
+        # Determine if we are setting freshness limit in the
+        # request. Note, this overrides what was in the response.
+        if "max-age" in cc:
+            freshness_lifetime = cc["max-age"]
+            logger.debug(
+                "Freshness lifetime from request max-age: %i", freshness_lifetime
+            )
+
+        if "min-fresh" in cc:
+            min_fresh = cc["min-fresh"]
+            # adjust our current age by our min fresh
+            current_age += min_fresh
+            logger.debug("Adjusted current age from min-fresh: %i", current_age)
+
+        # Return entry if it is fresh enough
+        if freshness_lifetime > current_age:
+            logger.debug('The response is "fresh", returning cached response')
+            logger.debug("%i > %i", freshness_lifetime, current_age)
+            return resp
+
+        # we're not fresh. If we don't have an Etag, clear it out
+        if "etag" not in headers:
+            logger.debug('The cached response is "stale" with no etag, purging')
+            self.cache.delete(cache_url)
+
+        # return the original handler
+        return False
+
+    def conditional_headers(self, request):
+        cache_url = self.cache_url(request.url)
+        resp = self.serializer.loads(request, self.cache.get(cache_url))
+        new_headers = {}
+
+        if resp:
+            headers = CaseInsensitiveDict(resp.headers)
+
+            if "etag" in headers:
+                new_headers["If-None-Match"] = headers["ETag"]
+
+            if "last-modified" in headers:
+                new_headers["If-Modified-Since"] = headers["Last-Modified"]
+
+        return new_headers
+
+    def cache_response(self, request, response, body=None, status_codes=None):
+        """
+        Algorithm for caching requests.
+
+        This assumes a requests Response object.
+        """
+        # From httplib2: Don't cache 206's since we aren't going to
+        #                handle byte range requests
+        cacheable_status_codes = status_codes or self.cacheable_status_codes
+        if response.status not in cacheable_status_codes:
+            logger.debug(
+                "Status code %s not in %s", response.status, cacheable_status_codes
+            )
+            return
+
+        response_headers = CaseInsensitiveDict(response.headers)
+
+        # If we've been given a body, our response has a Content-Length, that
+        # Content-Length is valid then we can check to see if the body we've
+        # been given matches the expected size, and if it doesn't we'll just
+        # skip trying to cache it.
+        if (
+            body is not None
+            and "content-length" in response_headers
+            and response_headers["content-length"].isdigit()
+            and int(response_headers["content-length"]) != len(body)
+        ):
+            return
+
+        cc_req = self.parse_cache_control(request.headers)
+        cc = self.parse_cache_control(response_headers)
+
+        cache_url = self.cache_url(request.url)
+        logger.debug('Updating cache with response from "%s"', cache_url)
+
+        # Delete it from the cache if we happen to have it stored there
+        no_store = False
+        if "no-store" in cc:
+            no_store = True
+            logger.debug('Response header has "no-store"')
+        if "no-store" in cc_req:
+            no_store = True
+            logger.debug('Request header has "no-store"')
+        if no_store and self.cache.get(cache_url):
+            logger.debug('Purging existing cache entry to honor "no-store"')
+            self.cache.delete(cache_url)
+        if no_store:
+            return
+
+        # https://tools.ietf.org/html/rfc7234#section-4.1:
+        # A Vary header field-value of "*" always fails to match.
+        # Storing such a response leads to a deserialization warning
+        # during cache lookup and is not allowed to ever be served,
+        # so storing it can be avoided.
+        if "*" in response_headers.get("vary", ""):
+            logger.debug('Response header has "Vary: *"')
+            return
+
+        # If we've been given an etag, then keep the response
+        if self.cache_etags and "etag" in response_headers:
+            logger.debug("Caching due to etag")
+            self.cache.set(
+                cache_url, self.serializer.dumps(request, response, body=body)
+            )
+
+        # Add to the cache any 301s. We do this before looking that
+        # the Date headers.
+        elif response.status == 301:
+            logger.debug("Caching permanant redirect")
+            self.cache.set(cache_url, self.serializer.dumps(request, response))
+
+        # Add to the cache if the response headers demand it. If there
+        # is no date header then we can't do anything about expiring
+        # the cache.
+        elif "date" in response_headers:
+            # cache when there is a max-age > 0
+            if "max-age" in cc and cc["max-age"] > 0:
+                logger.debug("Caching b/c date exists and max-age > 0")
+                self.cache.set(
+                    cache_url, self.serializer.dumps(request, response, body=body)
+                )
+
+            # If the request can expire, it means we should cache it
+            # in the meantime.
+            elif "expires" in response_headers:
+                if response_headers["expires"]:
+                    logger.debug("Caching b/c of expires header")
+                    self.cache.set(
+                        cache_url, self.serializer.dumps(request, response, body=body)
+                    )
+
+    def update_cached_response(self, request, response):
+        """On a 304 we will get a new set of headers that we want to
+        update our cached value with, assuming we have one.
+
+        This should only ever be called when we've sent an ETag and
+        gotten a 304 as the response.
+        """
+        cache_url = self.cache_url(request.url)
+
+        cached_response = self.serializer.loads(request, self.cache.get(cache_url))
+
+        if not cached_response:
+            # we didn't have a cached response
+            return response
+
+        # Lets update our headers with the headers from the new request:
+        # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
+        #
+        # The server isn't supposed to send headers that would make
+        # the cached body invalid. But... just in case, we'll be sure
+        # to strip out ones we know that might be problmatic due to
+        # typical assumptions.
+        excluded_headers = ["content-length"]
+
+        cached_response.headers.update(
+            dict(
+                (k, v)
+                for k, v in response.headers.items()
+                if k.lower() not in excluded_headers
+            )
+        )
+
+        # we want a 200 b/c we have content via the cache
+        cached_response.status = 200
+
+        # update our cache
+        self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
+
+        return cached_response
@@ -0,0 +1,80 @@
+from io import BytesIO
+
+
+class CallbackFileWrapper(object):
+    """
+    Small wrapper around a fp object which will tee everything read into a
+    buffer, and when that file is closed it will execute a callback with the
+    contents of that buffer.
+
+    All attributes are proxied to the underlying file object.
+
+    This class uses members with a double underscore (__) leading prefix so as
+    not to accidentally shadow an attribute.
+    """
+
+    def __init__(self, fp, callback):
+        self.__buf = BytesIO()
+        self.__fp = fp
+        self.__callback = callback
+
+    def __getattr__(self, name):
+        # The vaguaries of garbage collection means that self.__fp is
+        # not always set.  By using __getattribute__ and the private
+        # name[0] allows looking up the attribute value and raising an
+        # AttributeError when it doesn't exist. This stop thigns from
+        # infinitely recursing calls to getattr in the case where
+        # self.__fp hasn't been set.
+        #
+        # [0] https://docs.python.org/2/reference/expressions.html#atom-identifiers
+        fp = self.__getattribute__("_CallbackFileWrapper__fp")
+        return getattr(fp, name)
+
+    def __is_fp_closed(self):
+        try:
+            return self.__fp.fp is None
+
+        except AttributeError:
+            pass
+
+        try:
+            return self.__fp.closed
+
+        except AttributeError:
+            pass
+
+        # We just don't cache it then.
+        # TODO: Add some logging here...
+        return False
+
+    def _close(self):
+        if self.__callback:
+            self.__callback(self.__buf.getvalue())
+
+        # We assign this to None here, because otherwise we can get into
+        # really tricky problems where the CPython interpreter dead locks
+        # because the callback is holding a reference to something which
+        # has a __del__ method. Setting this to None breaks the cycle
+        # and allows the garbage collector to do it's thing normally.
+        self.__callback = None
+
+    def read(self, amt=None):
+        data = self.__fp.read(amt)
+        self.__buf.write(data)
+        if self.__is_fp_closed():
+            self._close()
+
+        return data
+
+    def _safe_read(self, amt):
+        data = self.__fp._safe_read(amt)
+        if amt == 2 and data == b"\r\n":
+            # urllib executes this read to toss the CRLF at the end
+            # of the chunk.
+            return data
+
+        self.__buf.write(data)
+        if self.__is_fp_closed():
+            self._close()
+
+        return data
@@ -0,0 +1,135 @@
+import calendar
+import time
+
+from email.utils import formatdate, parsedate, parsedate_tz
+
+from datetime import datetime, timedelta
+
+TIME_FMT = "%a, %d %b %Y %H:%M:%S GMT"
+
+
+def expire_after(delta, date=None):
+    date = date or datetime.utcnow()
+    return date + delta
+
+
+def datetime_to_header(dt):
+    return formatdate(calendar.timegm(dt.timetuple()))
+
+
+class BaseHeuristic(object):
+
+    def warning(self, response):
+        """
+        Return a valid 1xx warning header value describing the cache
+        adjustments.
+
+        The response is provided too allow warnings like 113
+        http://tools.ietf.org/html/rfc7234#section-5.5.4 where we need
+        to explicitly say response is over 24 hours old.
+        """
+        return '110 - "Response is Stale"'
+
+    def update_headers(self, response):
+        """Update the response headers with any new headers.
+
+        NOTE: This SHOULD always include some Warning header to
+              signify that the response was cached by the client, not
+              by way of the provided headers.
+        """
+        return {}
+
+    def apply(self, response):
+        updated_headers = self.update_headers(response)
+
+        if updated_headers:
+            response.headers.update(updated_headers)
+            warning_header_value = self.warning(response)
+            if warning_header_value is not None:
+                response.headers.update({"Warning": warning_header_value})
+
+        return response
+
+
+class OneDayCache(BaseHeuristic):
+    """
+    Cache the response by providing an expires 1 day in the
+    future.
+    """
+
+    def update_headers(self, response):
+        headers = {}
+
+        if "expires" not in response.headers:
+            date = parsedate(response.headers["date"])
+            expires = expire_after(timedelta(days=1), date=datetime(*date[:6]))
+            headers["expires"] = datetime_to_header(expires)
+            headers["cache-control"] = "public"
+        return headers
+
+
+class ExpiresAfter(BaseHeuristic):
+    """
+    Cache **all** requests for a defined time period.
+    """
+
+    def __init__(self, **kw):
+        self.delta = timedelta(**kw)
+
+    def update_headers(self, response):
+        expires = expire_after(self.delta)
+        return {"expires": datetime_to_header(expires), "cache-control": "public"}
+
+    def warning(self, response):
+        tmpl = "110 - Automatically cached for %s. Response might be stale"
+        return tmpl % self.delta
+
+
+class LastModified(BaseHeuristic):
+    """
+    If there is no Expires header already, fall back on Last-Modified
+    using the heuristic from
+    http://tools.ietf.org/html/rfc7234#section-4.2.2
+    to calculate a reasonable value.
+
+    Firefox also does something like this per
+    https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching_FAQ
+    http://lxr.mozilla.org/mozilla-release/source/netwerk/protocol/http/nsHttpResponseHead.cpp#397
+    Unlike mozilla we limit this to 24-hr.
+    """
+    cacheable_by_default_statuses = {
+        200, 203, 204, 206, 300, 301, 404, 405, 410, 414, 501
+    }
+
+    def update_headers(self, resp):
+        headers = resp.headers
+
+        if "expires" in headers:
+            return {}
+
+        if "cache-control" in headers and headers["cache-control"] != "public":
+            return {}
+
+        if resp.status not in self.cacheable_by_default_statuses:
+            return {}
+
+        if "date" not in headers or "last-modified" not in headers:
+            return {}
+
+        date = calendar.timegm(parsedate_tz(headers["date"]))
+        last_modified = parsedate(headers["last-modified"])
+        if date is None or last_modified is None:
+            return {}
+
+        now = time.time()
+        current_age = max(0, now - date)
+        delta = date - calendar.timegm(last_modified)
+        freshness_lifetime = max(0, min(delta / 10, 24 * 3600))
+        if freshness_lifetime <= current_age:
+            return {}
+
+        expires = date + freshness_lifetime
+        return {"expires": time.strftime(TIME_FMT, time.gmtime(expires))}
+
+    def warning(self, resp):
+        return None
@@ -0,0 +1,188 @@
+import base64
+import io
+import json
+import zlib
+
+from pip._vendor import msgpack
+from pip._vendor.requests.structures import CaseInsensitiveDict
+
+from .compat import HTTPResponse, pickle, text_type
+
+
+def _b64_decode_bytes(b):
+    return base64.b64decode(b.encode("ascii"))
+
+
+def _b64_decode_str(s):
+    return _b64_decode_bytes(s).decode("utf8")
+
+
+class Serializer(object):
+
+    def dumps(self, request, response, body=None):
+        response_headers = CaseInsensitiveDict(response.headers)
+
+        if body is None:
+            body = response.read(decode_content=False)
+
+            # NOTE: 99% sure this is dead code. I'm only leaving it
+            #       here b/c I don't have a test yet to prove
+            #       it. Basically, before using
+            #       `cachecontrol.filewrapper.CallbackFileWrapper`,
+            #       this made an effort to reset the file handle. The
+            #       `CallbackFileWrapper` short circuits this code by
+            #       setting the body as the content is consumed, the
+            #       result being a `body` argument is *always* passed
+            #       into cache_response, and in turn,
+            #       `Serializer.dump`.
+            response._fp = io.BytesIO(body)
+
+        # NOTE: This is all a bit weird, but it's really important that on
+        #       Python 2.x these objects are unicode and not str, even when
+        #       they contain only ascii. The problem here is that msgpack
+        #       understands the difference between unicode and bytes and we
+        #       have it set to differentiate between them, however Python 2
+        #       doesn't know the difference. Forcing these to unicode will be
+        #       enough to have msgpack know the difference.
+        data = {
+            u"response": {
+                u"body": body,
+                u"headers": dict(
+                    (text_type(k), text_type(v)) for k, v in response.headers.items()
+                ),
+                u"status": response.status,
+                u"version": response.version,
+                u"reason": text_type(response.reason),
+                u"strict": response.strict,
+                u"decode_content": response.decode_content,
+            }
+        }
+
+        # Construct our vary headers
+        data[u"vary"] = {}
+        if u"vary" in response_headers:
+            varied_headers = response_headers[u"vary"].split(",")
+            for header in varied_headers:
+                header = text_type(header).strip()
+                header_value = request.headers.get(header, None)
+                if header_value is not None:
+                    header_value = text_type(header_value)
+                data[u"vary"][header] = header_value
+
+        return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)])
+
+    def loads(self, request, data):
+        # Short circuit if we've been given an empty set of data
+        if not data:
+            return
+
+        # Determine what version of the serializer the data was serialized
+        # with
+        try:
+            ver, data = data.split(b",", 1)
+        except ValueError:
+            ver = b"cc=0"
+
+        # Make sure that our "ver" is actually a version and isn't a false
+        # positive from a , being in the data stream.
+        if ver[:3] != b"cc=":
+            data = ver + data
+            ver = b"cc=0"
+
+        # Get the version number out of the cc=N
+        ver = ver.split(b"=", 1)[-1].decode("ascii")
+
+        # Dispatch to the actual load method for the given version
+        try:
+            return getattr(self, "_loads_v{}".format(ver))(request, data)
+
+        except AttributeError:
+            # This is a version we don't have a loads function for, so we'll
+            # just treat it as a miss and return None
+            return
+
+    def prepare_response(self, request, cached):
+        """Verify our vary headers match and construct a real urllib3
+        HTTPResponse object.
+        """
+        # Special case the '*' Vary value as it means we cannot actually
+        # determine if the cached response is suitable for this request.
+        # This case is also handled in the controller code when creating
+        # a cache entry, but is left here for backwards compatibility.
+        if "*" in cached.get("vary", {}):
+            return
+
+        # Ensure that the Vary headers for the cached response match our
+        # request
+        for header, value in cached.get("vary", {}).items():
+            if request.headers.get(header, None) != value:
+                return
+
+        body_raw = cached["response"].pop("body")
+
+        headers = CaseInsensitiveDict(data=cached["response"]["headers"])
+        if headers.get("transfer-encoding", "") == "chunked":
+            headers.pop("transfer-encoding")
+
+        cached["response"]["headers"] = headers
+
+        try:
+            body = io.BytesIO(body_raw)
+        except TypeError:
+            # This can happen if cachecontrol serialized to v1 format (pickle)
+            # using Python 2. A Python 2 str(byte string) will be unpickled as
+            # a Python 3 str (unicode string), which will cause the above to
+            # fail with:
+            #
+            #     TypeError: 'str' does not support the buffer interface
+            body = io.BytesIO(body_raw.encode("utf8"))
+
+        return HTTPResponse(body=body, preload_content=False, **cached["response"])
+
+    def _loads_v0(self, request, data):
+        # The original legacy cache data. This doesn't contain enough
+        # information to construct everything we need, so we'll treat this as
+        # a miss.
+        return
+
+    def _loads_v1(self, request, data):
+        try:
+            cached = pickle.loads(data)
+        except ValueError:
+            return
+
+        return self.prepare_response(request, cached)
+
+    def _loads_v2(self, request, data):
+        try:
+            cached = json.loads(zlib.decompress(data).decode("utf8"))
+        except (ValueError, zlib.error):
+            return
+
+        # We need to decode the items that we've base64 encoded
+        cached["response"]["body"] = _b64_decode_bytes(cached["response"]["body"])
+        cached["response"]["headers"] = dict(
+            (_b64_decode_str(k), _b64_decode_str(v))
+            for k, v in cached["response"]["headers"].items()
+        )
+        cached["response"]["reason"] = _b64_decode_str(cached["response"]["reason"])
+        cached["vary"] = dict(
+            (_b64_decode_str(k), _b64_decode_str(v) if v is not None else v)
+            for k, v in cached["vary"].items()
+        )
+
+        return self.prepare_response(request, cached)
+
+    def _loads_v3(self, request, data):
+        # Due to Python 2 encoding issues, it's impossible to know for sure
+        # exactly how to load v3 entries, thus we'll treat these as a miss so
+        # that they get rewritten out as v4 entries.
+        return
+
+    def _loads_v4(self, request, data):
+        try:
+            cached = msgpack.loads(data, raw=False)
+        except ValueError:
+            return
+
+        return self.prepare_response(request, cached)
@@ -0,0 +1,29 @@
+from .adapter import CacheControlAdapter
+from .cache import DictCache
+
+
+def CacheControl(
+    sess,
+    cache=None,
+    cache_etags=True,
+    serializer=None,
+    heuristic=None,
+    controller_class=None,
+    adapter_class=None,
+    cacheable_methods=None,
+):
+
+    cache = DictCache() if cache is None else cache
+    adapter_class = adapter_class or CacheControlAdapter
+    adapter = adapter_class(
+        cache,
+        cache_etags=cache_etags,
+        serializer=serializer,
+        heuristic=heuristic,
+        controller_class=controller_class,
+        cacheable_methods=cacheable_methods,
+    )
+    sess.mount("http://", adapter)
+    sess.mount("https://", adapter)
+
+    return sess