Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an option to remove unused requests from cassette #763

Merged
merged 7 commits into from
Jan 11, 2025
13 changes: 13 additions & 0 deletions docs/advanced.rst
Original file line number Diff line number Diff line change
Expand Up @@ -426,3 +426,16 @@ If you want to save the cassette only when the test succeedes, set the Cassette

# Since there was an exception, the cassette file hasn't been created.
assert not os.path.exists('fixtures/vcr_cassettes/synopsis.yaml')

Drop unused requests
--------------------

Even if any HTTP request is changed or removed from tests, previously recorded
interactions remain in the cassette file. If set the ``drop_unused_requests``
option to ``True``, VCR will not save old HTTP interactions if they are not used.

.. code:: python

my_vcr = VCR(drop_unused_requests=True)
with my_vcr.use_cassette('fixtures/vcr_cassettes/synopsis.yaml'):
... # your HTTP interactions here
19 changes: 19 additions & 0 deletions tests/integration/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

import vcr
from vcr.cassette import Cassette


def test_set_serializer_default_config(tmpdir, httpbin):
Expand Down Expand Up @@ -80,3 +81,21 @@ def some_test():
assert b"Not in content" in urlopen("http://httpbin.org/get").read()

assert not os.path.exists(str(tmpdir.join("dontsave2.yml")))


def test_set_drop_unused_requests(tmpdir, httpbin):
my_vcr = vcr.VCR(drop_unused_requests=True)
file = str(tmpdir.join("test.yaml"))

with my_vcr.use_cassette(file):
urlopen(httpbin.url)
urlopen(httpbin.url + "/get")

cassette = Cassette.load(path=file)
assert len(cassette) == 2

with my_vcr.use_cassette(file):
urlopen(httpbin.url)

cassette = Cassette.load(path=file)
assert len(cassette) == 1
23 changes: 23 additions & 0 deletions tests/unit/test_cassettes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from vcr.cassette import Cassette
from vcr.errors import UnhandledHTTPRequestError
from vcr.patch import force_reset
from vcr.request import Request
from vcr.stubs import VCRHTTPSConnection


Expand Down Expand Up @@ -394,3 +395,25 @@ def test_find_requests_with_most_matches_many_similar_requests(mock_get_matchers
(1, ["method", "path"], [("query", "failed : query")]),
(3, ["method", "path"], [("query", "failed : query")]),
]


def test_used_interactions(tmpdir):
interactions = [
{"request": {"body": "", "uri": "foo1", "method": "GET", "headers": {}}, "response": "bar1"},
{"request": {"body": "", "uri": "foo2", "method": "GET", "headers": {}}, "response": "bar2"},
{"request": {"body": "", "uri": "foo3", "method": "GET", "headers": {}}, "response": "bar3"},
]
file = tmpdir.join("test_cassette.yml")
file.write(yaml.dump({"interactions": [interactions[0], interactions[1]]}))

cassette = Cassette.load(path=str(file))
request = Request._from_dict(interactions[1]["request"])
cassette.play_response(request)
assert len(cassette._played_interactions) < len(cassette._old_interactions)

request = Request._from_dict(interactions[2]["request"])
cassette.append(request, interactions[2]["response"])
assert len(cassette._new_interactions()) == 1

used_interactions = cassette._played_interactions + cassette._new_interactions()
assert len(used_interactions) == 2
34 changes: 33 additions & 1 deletion vcr/cassette.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def __init__(
custom_patches=(),
inject=False,
allow_playback_repeats=False,
drop_unused_requests=False,
):
self._persister = persister or FilesystemPersister
self._path = path
Expand All @@ -209,13 +210,18 @@ def __init__(
self.record_mode = record_mode
self.custom_patches = custom_patches
self.allow_playback_repeats = allow_playback_repeats
self.drop_unused_requests = drop_unused_requests

# self.data is the list of (req, resp) tuples
self.data = []
self.play_counts = collections.Counter()
self.dirty = False
self.rewound = False

# Subsets of self.data to store old and played interactions
self._old_interactions = []
self._played_interactions = []

@property
def play_count(self):
return sum(self.play_counts.values())
Expand Down Expand Up @@ -277,6 +283,7 @@ def play_response(self, request):
for index, response in self._responses(request):
if self.play_counts[index] == 0 or self.allow_playback_repeats:
self.play_counts[index] += 1
self._played_interactions.append((request, response))
return response
# The cassette doesn't contain the request asked for.
raise UnhandledHTTPRequestError(
Expand Down Expand Up @@ -337,19 +344,44 @@ def find_requests_with_most_matches(self, request):

return final_best_matches

def _new_interactions(self):
"""List of new HTTP interactions (request/response tuples)"""
new_interactions = []
for request, response in self.data:
if all(
not requests_match(request, old_request, self._match_on)
for old_request, _ in self._old_interactions
):
new_interactions.append((request, response))
return new_interactions

def _as_dict(self):
return {"requests": self.requests, "responses": self.responses}

def _build_used_interactions_dict(self):
interactions = self._played_interactions + self._new_interactions()
cassete_dict = {
"requests": [request for request, _ in interactions],
"responses": [response for _, response in interactions],
}
return cassete_dict

def _save(self, force=False):
if self.drop_unused_requests and len(self._played_interactions) < len(self._old_interactions):
cassete_dict = self._build_used_interactions_dict()
force = True
else:
cassete_dict = self._as_dict()
if force or self.dirty:
self._persister.save_cassette(self._path, self._as_dict(), serializer=self._serializer)
self._persister.save_cassette(self._path, cassete_dict, serializer=self._serializer)
self.dirty = False

def _load(self):
try:
requests, responses = self._persister.load_cassette(self._path, serializer=self._serializer)
for request, response in zip(requests, responses):
self.append(request, response)
self._old_interactions.append((request, response))
self.dirty = False
self.rewound = True
except ValueError:
Expand Down
3 changes: 3 additions & 0 deletions vcr/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
func_path_generator=None,
decode_compressed_response=False,
record_on_exception=True,
drop_unused_requests=False,
):
self.serializer = serializer
self.match_on = match_on
Expand Down Expand Up @@ -83,6 +84,7 @@ def __init__(
self.decode_compressed_response = decode_compressed_response
self.record_on_exception = record_on_exception
self._custom_patches = tuple(custom_patches)
self.drop_unused_requests = drop_unused_requests

def _get_serializer(self, serializer_name):
try:
Expand Down Expand Up @@ -153,6 +155,7 @@ def add_cassette_library_dir(path):
"func_path_generator": func_path_generator,
"allow_playback_repeats": kwargs.get("allow_playback_repeats", False),
"record_on_exception": record_on_exception,
"drop_unused_requests": kwargs.get("drop_unused_requests", self.drop_unused_requests),
}
path = kwargs.get("path")
if path:
Expand Down
Loading