diff --git a/common/cloudflare.py b/common/cloudflare.py index 33c2a7f9..ff35250d 100644 --- a/common/cloudflare.py +++ b/common/cloudflare.py @@ -7,7 +7,9 @@ from django.conf import settings from django.core.cache import cache from django.contrib.sites.models import Site +from django.http import HttpRequest from django.urls import reverse +from django.utils.cache import get_cache_key logger = logging.getLogger(__name__) @@ -51,13 +53,55 @@ def purge_vis_cache(cls, slug): ] cls.purge_paths_cache(paths) + @classmethod + def _make_cache_request(cls, path: str, domain: str) -> HttpRequest: + """ Build a synthetic request matching how Django's cache middleware + would have seen the original request for this path and domain. + Sets HTTP_HOST so build_absolute_uri() matches the original + request's cache key (SERVER_NAME alone appends the port). """ + request = HttpRequest() + request.method = 'GET' + if '?' in path: + request.path, request.META['QUERY_STRING'] = path.split('?', 1) + else: + request.path = path + request.META['QUERY_STRING'] = '' + request.META['HTTP_HOST'] = domain + request.META['wsgi.url_scheme'] = 'https' + request.META['SERVER_NAME'] = domain + request.META['SERVER_PORT'] = '443' + return request + + @classmethod + def _purge_django_cache(cls, paths: list[str]) -> None: + """ Purge matching entries from Django's file-based cache. + Tries both the primary domain and www. variant to match + however the cache was populated. """ + domain = Site.objects.get_current().domain + domains = [domain] + if not domain.startswith("www."): + domains.append(f"www.{domain}") + + # Temporarily allow the site domain so get_cache_key can call + # build_absolute_uri() without hitting ALLOWED_HOSTS validation. + # This is safe because we're constructing internal requests, not + # processing user input. + original_hosts = settings.ALLOWED_HOSTS + settings.ALLOWED_HOSTS = list(set(original_hosts + domains)) + try: + for path in paths: + for d in domains: + request = cls._make_cache_request(path, d) + cache_key = get_cache_key(request) + if cache_key: + cache.delete(cache_key) + finally: + settings.ALLOWED_HOSTS = original_hosts + @classmethod def purge_paths_cache(cls, paths): - """ Purges the URLs (paths, not URLs) """ - # We also want to purge the file-based cache, but unfortunately - # we don't have a way of doing this per-URL. - # It's overkill, but here we purge everything. - cache.clear() + """ Purges the given paths from both Django's file cache and Cloudflare CDN. """ + cls._purge_django_cache(paths) # If we're on local/dev/staging/etc, we're done. if not cls._is_api_enabled(): diff --git a/rcvis/settings.py b/rcvis/settings.py index b75da902..a8aaf73e 100644 --- a/rcvis/settings.py +++ b/rcvis/settings.py @@ -84,6 +84,7 @@ # Order of the next 3 is important 'django.middleware.cache.UpdateCacheMiddleware', 'django.middleware.common.CommonMiddleware', + 'django.middleware.http.ConditionalGetMiddleware', 'django.middleware.cache.FetchFromCacheMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', diff --git a/visualizer/migrations/0033_add_updated_at.py b/visualizer/migrations/0033_add_updated_at.py new file mode 100644 index 00000000..4b8255fa --- /dev/null +++ b/visualizer/migrations/0033_add_updated_at.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.28 on 2026-02-22 12:21 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('visualizer', '0032_jsonconfig_forcefirstrounddeterminespercentages'), + ] + + operations = [ + migrations.AddField( + model_name='jsonconfig', + name='updatedAt', + field=models.DateTimeField(auto_now=True), + ), + ] diff --git a/visualizer/models.py b/visualizer/models.py index 789dba60..65a6a4e1 100644 --- a/visualizer/models.py +++ b/visualizer/models.py @@ -50,6 +50,7 @@ class JsonConfig(models.Model): candidateSidecarFile = models.FileField(null=True, blank=True) slug = models.SlugField(unique=True, max_length=255) uploadedAt = models.DateTimeField(auto_now_add=True) + updatedAt = models.DateTimeField(auto_now=True) owner = models.ForeignKey( settings.AUTH_USER_MODEL, related_name='this_users_jsons', diff --git a/visualizer/tests/testSimple.py b/visualizer/tests/testSimple.py index f5df1319..a19d9649 100644 --- a/visualizer/tests/testSimple.py +++ b/visualizer/tests/testSimple.py @@ -11,6 +11,7 @@ from django.test import TestCase from django.test.client import RequestFactory from django.urls import reverse +from django.utils.http import http_date, parse_http_date from rcvformats.schemas.universaltabulator import SchemaV0 as UTSchema from common.testUtils import TestHelpers @@ -432,6 +433,262 @@ def test_cloudflare_purge(self, requestPostResponse): data=json.dumps(expectedData), timeout=8) + def test_purge_django_cache(self): + """ + Ensure _purge_django_cache removes cached responses that were stored + by UpdateCacheMiddleware, using the correct domain from django.sites. + """ + # Upload a visualization so we have a page to cache + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + slug = TestHelpers.get_latest_upload().slug + path = reverse('visualize', args=(slug,)) + + # First request populates the cache via UpdateCacheMiddleware + response1 = self.client.get(path) + self.assertEqual(response1.status_code, 200) + + # Second request should be served from cache (still 200) + response2 = self.client.get(path) + self.assertEqual(response2.status_code, 200) + + # Purge and verify the cache entry is gone by checking that + # a new request still works (no 304 from stale cache) + CloudflareAPI._purge_django_cache([path]) + + # After purge, the next request must re-render (not serve stale data). + # We verify by checking the response contains the visualization title. + response3 = self.client.get(path) + self.assertEqual(response3.status_code, 200) + self.assertContains(response3, slug) + + def test_purge_django_cache_with_query_string(self): + """ + Ensure _purge_django_cache handles paths with query strings, + which are used for embedded visualization variants. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + slug = TestHelpers.get_latest_upload().slug + path = reverse('visualizeEmbedded', args=(slug,)) + path_with_qs = path + '?vistype=sankey' + + # Populate cache + self.client.get(path_with_qs) + + # Purge should not raise, even with query string + CloudflareAPI._purge_django_cache([path_with_qs]) + + def test_purge_django_cache_tries_www_variant(self): + """ + Ensure _purge_django_cache tries both the primary domain and the + www. variant, matching how get_absolute_paths_for works for Cloudflare. + """ + domain = 'example.com' + path = '/v/test-slug' + + from django.utils.cache import learn_cache_key, get_cache_key + from django.core.cache import cache + from django.http import HttpResponse + + # Allow test domains so learn_cache_key can build absolute URIs + with self.settings(ALLOWED_HOSTS=['*']): + # Manually cache entries for both domain variants + for host in [domain, f'www.{domain}']: + request = CloudflareAPI._make_cache_request(path, host) + response = HttpResponse('cached content') + response['Content-Type'] = 'text/html' + cache_key = learn_cache_key(request, response) + cache.set(cache_key, response) + # Verify it's cached + self.assertIsNotNone(get_cache_key(request)) + + # Purge — should clear both variants + CloudflareAPI._purge_django_cache([path]) + + # Both should be gone + for host in [domain, f'www.{domain}']: + request = CloudflareAPI._make_cache_request(path, host) + cache_key = get_cache_key(request) + if cache_key: + self.assertIsNone(cache.get(cache_key)) + + def test_conditional_get_returns_304_when_fresh(self): + """ + When the client sends If-Modified-Since matching the object's updatedAt, + ConditionalGetMixin should short-circuit with 304 (no graph computation). + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + path = reverse('visualize', args=(config.slug,)) + + # Disable file cache so ConditionalGetMixin handles it directly + with self.settings(CACHES={'default': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}): + # First request: get the Last-Modified header + response1 = self.client.get(path) + self.assertEqual(response1.status_code, 200) + last_modified = response1['Last-Modified'] + self.assertIsNotNone(last_modified) + + # Second request with If-Modified-Since: should get 304 + response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=last_modified) + self.assertEqual(response2.status_code, 304) + + def test_server_cache_returns_304_when_fresh(self): + """ + With the file cache enabled, the second request with If-Modified-Since + should return 304 via the middleware pipeline (FetchFromCacheMiddleware + serves the cached 200, then ConditionalGetMiddleware converts to 304). + This path never reaches the view — it's entirely handled by middleware. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + path = reverse('visualize', args=(config.slug,)) + + # First request: populates the file cache (UpdateCacheMiddleware stores it) + response1 = self.client.get(path) + self.assertEqual(response1.status_code, 200) + last_modified = response1['Last-Modified'] + self.assertIsNotNone(last_modified) + + # Second request with If-Modified-Since: FetchFromCacheMiddleware returns + # the cached 200, ConditionalGetMiddleware converts it to 304 + response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=last_modified) + self.assertEqual(response2.status_code, 304) + + def test_server_cache_returns_304_when_if_modified_since_is_later(self): + """ + If-Modified-Since is later than Last-Modified: the resource hasn't been + modified since the client's copy, so 304. FetchFromCacheMiddleware serves + the cached 200, ConditionalGetMiddleware converts to 304. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + path = reverse('visualize', args=(config.slug,)) + + # First request: populates the file cache + response1 = self.client.get(path) + self.assertEqual(response1.status_code, 200) + last_modified = response1['Last-Modified'] + + # Shift If-Modified-Since 10 seconds into the future + future_date = http_date(parse_http_date(last_modified) + 10) + response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=future_date) + self.assertEqual(response2.status_code, 304) + + def test_server_cache_returns_200_when_if_modified_since_is_earlier(self): + """ + If-Modified-Since is earlier than Last-Modified: the resource was modified + after the client's copy, so the server should return the cached 200. + FetchFromCacheMiddleware serves the cached response, but + ConditionalGetMiddleware does NOT convert to 304 because the resource + is newer than the client's timestamp. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + path = reverse('visualize', args=(config.slug,)) + + # First request: populates the file cache + response1 = self.client.get(path) + self.assertEqual(response1.status_code, 200) + last_modified = response1['Last-Modified'] + + # Shift If-Modified-Since 10 seconds into the past + past_date = http_date(parse_http_date(last_modified) - 10) + response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=past_date) + self.assertEqual(response2.status_code, 200) + # Verify the response came with the same Last-Modified (served from cache) + self.assertEqual(response2['Last-Modified'], last_modified) + + def test_conditional_get_returns_200_after_update(self): + """ + After the model is updated, a request with the old If-Modified-Since + should get a fresh 200 (not 304), because updatedAt has advanced. + """ + from datetime import timedelta + + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + path = reverse('visualize', args=(config.slug,)) + + with self.settings(CACHES={'default': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}): + # Get the initial Last-Modified + response1 = self.client.get(path) + old_last_modified = response1['Last-Modified'] + + # Update the model and force updatedAt forward by 2 seconds + # (HTTP dates have 1-second resolution, so same-second updates + # would not be distinguishable) + config.hideSankey = not config.hideSankey + config.save() + JsonConfig.objects.filter(pk=config.pk).update( + updatedAt=config.updatedAt + timedelta(seconds=2)) + + # Request with old timestamp should get 200, not 304 + response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=old_last_modified) + self.assertEqual(response2.status_code, 200) + + # And the new Last-Modified should differ + self.assertNotEqual(response2['Last-Modified'], old_last_modified) + + def test_response_has_last_modified_header(self): + """ + Visualization responses should include a Last-Modified header + matching the object's updatedAt timestamp. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + + with self.settings(CACHES={'default': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}): + # Check both Visualize and VisualizeEmbedded views + for view_name in ['visualize', 'visualizeEmbedded']: + path = reverse(view_name, args=(config.slug,)) + response = self.client.get(path) + self.assertEqual(response.status_code, 200) + self.assertIn('Last-Modified', response) + expected = http_date(config.updatedAt.timestamp()) + self.assertEqual(response['Last-Modified'], expected) + + def test_response_has_no_cache_directive(self): + """ + Visualization responses should include Cache-Control: no-cache + so browsers always revalidate with the server. + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + + with self.settings(CACHES={'default': { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}): + path = reverse('visualize', args=(config.slug,)) + response = self.client.get(path) + self.assertIn('no-cache', response.get('Cache-Control', '')) + + def test_save_purge_only_on_update(self): + """ + The first save (creation) should NOT trigger a cache purge, but + a subsequent save (update) should. Tests the _state.adding guard + in JsonConfig.save(). + """ + with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f: + self.client.post('/upload.html', {'jsonFile': f}) + config = TestHelpers.get_latest_upload() + + with patch.object(CloudflareAPI, 'purge_vis_cache') as mock_purge: + # Update triggers purge + config.hideSankey = not config.hideSankey + config.save() + mock_purge.assert_called_once_with(config.slug) + def test_homepage_real_world_examples(self): """ Tests the "real-world examples" section on the homepage. diff --git a/visualizer/views.py b/visualizer/views.py index d1b51011..d42d91f3 100644 --- a/visualizer/views.py +++ b/visualizer/views.py @@ -12,7 +12,7 @@ from django.contrib.auth import get_user_model from django.contrib.auth.mixins import LoginRequiredMixin from django.core.cache import cache -from django.http import JsonResponse, HttpResponse +from django.http import JsonResponse, HttpResponse, HttpResponseNotModified from django.shortcuts import render from django.templatetags.static import static from django.urls import Resolver404 @@ -21,7 +21,8 @@ from django.utils.decorators import method_decorator from django.views import View from django.views.decorators.clickjacking import xframe_options_exempt -from django.views.decorators.vary import vary_on_headers +from django.utils.cache import patch_cache_control +from django.utils.http import http_date, parse_http_date_safe from django.views.generic.base import TemplateView, RedirectView from django.views.generic.detail import DetailView from django.views.generic.edit import CreateView @@ -160,8 +161,51 @@ def _actions_before_save(self, form): self.model.jsonFile.save('datatablesfile.json', form.cleaned_data['jsonFile']) -@method_decorator(vary_on_headers('increment',), name='get') -class Visualize(DetailView): +class ConditionalGetMixin: # pylint: disable=too-few-public-methods + """ + Mixin for DetailView subclasses that serve JsonConfig visualizations. + + Sets Last-Modified from the object's updatedAt and Cache-Control: no-cache + so browsers always revalidate. On cache misses (file cache empty), + short-circuits with 304 if the client already has a fresh copy, + avoiding expensive graph computation. On cache hits, Django's + ConditionalGetMiddleware handles the 304 conversion using the + Last-Modified header preserved in the cached response. + + Cache-Control: no-cache (without max-age=0) allows Django's + UpdateCacheMiddleware to store the rendered response server-side, + so subsequent requests from different clients or Cloudflare PoPs + can be served from the file cache without recomputing the graph. + """ + + def get(self, request, *args, **kwargs): + """Return 304 if the client's copy is fresh, otherwise render normally.""" + # Fetch object once — setting self.object avoids a second DB query + # when super().get() calls get_object() internally. + self.object = self.get_object() + + # Short-circuit: if the client has a fresh copy, return 304 without + # doing any of the expensive graph computation or template rendering. + # This handles cache misses where FetchFromCacheMiddleware found nothing. + if self.object.updatedAt: + lastModified = self.object.updatedAt.timestamp() + ifModifiedSince = request.META.get('HTTP_IF_MODIFIED_SINCE') + if ifModifiedSince: + ifModifiedSince = parse_http_date_safe(ifModifiedSince) + if ifModifiedSince is not None and lastModified <= ifModifiedSince: + response = HttpResponseNotModified() + response['Last-Modified'] = http_date(lastModified) + patch_cache_control(response, no_cache=True) + return response + + response = super().get(request, *args, **kwargs) + if self.object.updatedAt: + response['Last-Modified'] = http_date(self.object.updatedAt.timestamp()) + patch_cache_control(response, no_cache=True) + return response + + +class Visualize(ConditionalGetMixin, DetailView): """ Visualizing a single JsonConfig """ model = JsonConfig template_name = 'visualizer/visualize.html' @@ -198,9 +242,8 @@ def get_context_data(self, **kwargs): return data -@method_decorator(vary_on_headers('increment',), name='get') @method_decorator(xframe_options_exempt, name='dispatch') -class VisualizeEmbedded(DetailView): +class VisualizeEmbedded(ConditionalGetMixin, DetailView): """ The embedded visualization, to be used in an iframe. """ @@ -258,9 +301,8 @@ def get_redirect_url(self, *args, **kwargs): return super().get_redirect_url(slug) + "?vistype=" + vistype -@method_decorator(vary_on_headers('increment',), name='get') @method_decorator(xframe_options_exempt, name='dispatch') -class VisualizeBallotpedia(DetailView): +class VisualizeBallotpedia(ConditionalGetMixin, DetailView): """ The embedded ballotpedia visualization """ model = JsonConfig template_name = 'visualizer/visualize-ballotpedia.html'