Skip to content
54 changes: 49 additions & 5 deletions common/cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from django.conf import settings
from django.core.cache import cache
from django.contrib.sites.models import Site
from django.http import HttpRequest
from django.urls import reverse
from django.utils.cache import get_cache_key

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -51,13 +53,55 @@ def purge_vis_cache(cls, slug):
]
cls.purge_paths_cache(paths)

@classmethod
def _make_cache_request(cls, path: str, domain: str) -> HttpRequest:
""" Build a synthetic request matching how Django's cache middleware
would have seen the original request for this path and domain.
Sets HTTP_HOST so build_absolute_uri() matches the original
request's cache key (SERVER_NAME alone appends the port). """
request = HttpRequest()
request.method = 'GET'
if '?' in path:
request.path, request.META['QUERY_STRING'] = path.split('?', 1)
else:
request.path = path
request.META['QUERY_STRING'] = ''
request.META['HTTP_HOST'] = domain
request.META['wsgi.url_scheme'] = 'https'
request.META['SERVER_NAME'] = domain
request.META['SERVER_PORT'] = '443'
return request

@classmethod
def _purge_django_cache(cls, paths: list[str]) -> None:
""" Purge matching entries from Django's file-based cache.
Tries both the primary domain and www. variant to match
however the cache was populated. """
domain = Site.objects.get_current().domain
domains = [domain]
if not domain.startswith("www."):
domains.append(f"www.{domain}")

# Temporarily allow the site domain so get_cache_key can call
# build_absolute_uri() without hitting ALLOWED_HOSTS validation.
# This is safe because we're constructing internal requests, not
# processing user input.
original_hosts = settings.ALLOWED_HOSTS
settings.ALLOWED_HOSTS = list(set(original_hosts + domains))
try:
for path in paths:
for d in domains:
request = cls._make_cache_request(path, d)
cache_key = get_cache_key(request)
if cache_key:
cache.delete(cache_key)
finally:
settings.ALLOWED_HOSTS = original_hosts

@classmethod
def purge_paths_cache(cls, paths):
""" Purges the URLs (paths, not URLs) """
# We also want to purge the file-based cache, but unfortunately
# we don't have a way of doing this per-URL.
# It's overkill, but here we purge everything.
cache.clear()
""" Purges the given paths from both Django's file cache and Cloudflare CDN. """
cls._purge_django_cache(paths)

# If we're on local/dev/staging/etc, we're done.
if not cls._is_api_enabled():
Expand Down
1 change: 1 addition & 0 deletions rcvis/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
# Order of the next 3 is important
'django.middleware.cache.UpdateCacheMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.http.ConditionalGetMiddleware',
'django.middleware.cache.FetchFromCacheMiddleware',

'django.middleware.csrf.CsrfViewMiddleware',
Expand Down
18 changes: 18 additions & 0 deletions visualizer/migrations/0033_add_updated_at.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.28 on 2026-02-22 12:21

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('visualizer', '0032_jsonconfig_forcefirstrounddeterminespercentages'),
]

operations = [
migrations.AddField(
model_name='jsonconfig',
name='updatedAt',
field=models.DateTimeField(auto_now=True),
),
]
1 change: 1 addition & 0 deletions visualizer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class JsonConfig(models.Model):
candidateSidecarFile = models.FileField(null=True, blank=True)
slug = models.SlugField(unique=True, max_length=255)
uploadedAt = models.DateTimeField(auto_now_add=True)
updatedAt = models.DateTimeField(auto_now=True)
owner = models.ForeignKey(
settings.AUTH_USER_MODEL,
related_name='this_users_jsons',
Expand Down
257 changes: 257 additions & 0 deletions visualizer/tests/testSimple.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from django.test import TestCase
from django.test.client import RequestFactory
from django.urls import reverse
from django.utils.http import http_date, parse_http_date
from rcvformats.schemas.universaltabulator import SchemaV0 as UTSchema

from common.testUtils import TestHelpers
Expand Down Expand Up @@ -432,6 +433,262 @@ def test_cloudflare_purge(self, requestPostResponse):
data=json.dumps(expectedData),
timeout=8)

def test_purge_django_cache(self):
"""
Ensure _purge_django_cache removes cached responses that were stored
by UpdateCacheMiddleware, using the correct domain from django.sites.
"""
# Upload a visualization so we have a page to cache
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
slug = TestHelpers.get_latest_upload().slug
path = reverse('visualize', args=(slug,))

# First request populates the cache via UpdateCacheMiddleware
response1 = self.client.get(path)
self.assertEqual(response1.status_code, 200)

# Second request should be served from cache (still 200)
response2 = self.client.get(path)
self.assertEqual(response2.status_code, 200)

# Purge and verify the cache entry is gone by checking that
# a new request still works (no 304 from stale cache)
CloudflareAPI._purge_django_cache([path])

# After purge, the next request must re-render (not serve stale data).
# We verify by checking the response contains the visualization title.
response3 = self.client.get(path)
self.assertEqual(response3.status_code, 200)
self.assertContains(response3, slug)

def test_purge_django_cache_with_query_string(self):
"""
Ensure _purge_django_cache handles paths with query strings,
which are used for embedded visualization variants.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
slug = TestHelpers.get_latest_upload().slug
path = reverse('visualizeEmbedded', args=(slug,))
path_with_qs = path + '?vistype=sankey'

# Populate cache
self.client.get(path_with_qs)

# Purge should not raise, even with query string
CloudflareAPI._purge_django_cache([path_with_qs])

def test_purge_django_cache_tries_www_variant(self):
"""
Ensure _purge_django_cache tries both the primary domain and the
www. variant, matching how get_absolute_paths_for works for Cloudflare.
"""
domain = 'example.com'
path = '/v/test-slug'

from django.utils.cache import learn_cache_key, get_cache_key
from django.core.cache import cache
from django.http import HttpResponse

# Allow test domains so learn_cache_key can build absolute URIs
with self.settings(ALLOWED_HOSTS=['*']):
# Manually cache entries for both domain variants
for host in [domain, f'www.{domain}']:
request = CloudflareAPI._make_cache_request(path, host)
response = HttpResponse('cached content')
response['Content-Type'] = 'text/html'
cache_key = learn_cache_key(request, response)
cache.set(cache_key, response)
# Verify it's cached
self.assertIsNotNone(get_cache_key(request))

# Purge — should clear both variants
CloudflareAPI._purge_django_cache([path])

# Both should be gone
for host in [domain, f'www.{domain}']:
request = CloudflareAPI._make_cache_request(path, host)
cache_key = get_cache_key(request)
if cache_key:
self.assertIsNone(cache.get(cache_key))

def test_conditional_get_returns_304_when_fresh(self):
"""
When the client sends If-Modified-Since matching the object's updatedAt,
ConditionalGetMixin should short-circuit with 304 (no graph computation).
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()
path = reverse('visualize', args=(config.slug,))

# Disable file cache so ConditionalGetMixin handles it directly
with self.settings(CACHES={'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}):
# First request: get the Last-Modified header
response1 = self.client.get(path)
self.assertEqual(response1.status_code, 200)
last_modified = response1['Last-Modified']
self.assertIsNotNone(last_modified)

# Second request with If-Modified-Since: should get 304
response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=last_modified)
self.assertEqual(response2.status_code, 304)

def test_server_cache_returns_304_when_fresh(self):
"""
With the file cache enabled, the second request with If-Modified-Since
should return 304 via the middleware pipeline (FetchFromCacheMiddleware
serves the cached 200, then ConditionalGetMiddleware converts to 304).
This path never reaches the view — it's entirely handled by middleware.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()
path = reverse('visualize', args=(config.slug,))

# First request: populates the file cache (UpdateCacheMiddleware stores it)
response1 = self.client.get(path)
self.assertEqual(response1.status_code, 200)
last_modified = response1['Last-Modified']
self.assertIsNotNone(last_modified)

# Second request with If-Modified-Since: FetchFromCacheMiddleware returns
# the cached 200, ConditionalGetMiddleware converts it to 304
response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=last_modified)
self.assertEqual(response2.status_code, 304)

def test_server_cache_returns_304_when_if_modified_since_is_later(self):
"""
If-Modified-Since is later than Last-Modified: the resource hasn't been
modified since the client's copy, so 304. FetchFromCacheMiddleware serves
the cached 200, ConditionalGetMiddleware converts to 304.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()
path = reverse('visualize', args=(config.slug,))

# First request: populates the file cache
response1 = self.client.get(path)
self.assertEqual(response1.status_code, 200)
last_modified = response1['Last-Modified']

# Shift If-Modified-Since 10 seconds into the future
future_date = http_date(parse_http_date(last_modified) + 10)
response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=future_date)
self.assertEqual(response2.status_code, 304)

def test_server_cache_returns_200_when_if_modified_since_is_earlier(self):
"""
If-Modified-Since is earlier than Last-Modified: the resource was modified
after the client's copy, so the server should return the cached 200.
FetchFromCacheMiddleware serves the cached response, but
ConditionalGetMiddleware does NOT convert to 304 because the resource
is newer than the client's timestamp.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()
path = reverse('visualize', args=(config.slug,))

# First request: populates the file cache
response1 = self.client.get(path)
self.assertEqual(response1.status_code, 200)
last_modified = response1['Last-Modified']

# Shift If-Modified-Since 10 seconds into the past
past_date = http_date(parse_http_date(last_modified) - 10)
response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=past_date)
self.assertEqual(response2.status_code, 200)
# Verify the response came with the same Last-Modified (served from cache)
self.assertEqual(response2['Last-Modified'], last_modified)

def test_conditional_get_returns_200_after_update(self):
"""
After the model is updated, a request with the old If-Modified-Since
should get a fresh 200 (not 304), because updatedAt has advanced.
"""
from datetime import timedelta

with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()
path = reverse('visualize', args=(config.slug,))

with self.settings(CACHES={'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}):
# Get the initial Last-Modified
response1 = self.client.get(path)
old_last_modified = response1['Last-Modified']

# Update the model and force updatedAt forward by 2 seconds
# (HTTP dates have 1-second resolution, so same-second updates
# would not be distinguishable)
config.hideSankey = not config.hideSankey
config.save()
JsonConfig.objects.filter(pk=config.pk).update(
updatedAt=config.updatedAt + timedelta(seconds=2))

# Request with old timestamp should get 200, not 304
response2 = self.client.get(path, HTTP_IF_MODIFIED_SINCE=old_last_modified)
self.assertEqual(response2.status_code, 200)

# And the new Last-Modified should differ
self.assertNotEqual(response2['Last-Modified'], old_last_modified)

def test_response_has_last_modified_header(self):
"""
Visualization responses should include a Last-Modified header
matching the object's updatedAt timestamp.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()

with self.settings(CACHES={'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}):
# Check both Visualize and VisualizeEmbedded views
for view_name in ['visualize', 'visualizeEmbedded']:
path = reverse(view_name, args=(config.slug,))
response = self.client.get(path)
self.assertEqual(response.status_code, 200)
self.assertIn('Last-Modified', response)
expected = http_date(config.updatedAt.timestamp())
self.assertEqual(response['Last-Modified'], expected)

def test_response_has_no_cache_directive(self):
"""
Visualization responses should include Cache-Control: no-cache
so browsers always revalidate with the server.
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()

with self.settings(CACHES={'default': {
'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}}):
path = reverse('visualize', args=(config.slug,))
response = self.client.get(path)
self.assertIn('no-cache', response.get('Cache-Control', ''))

def test_save_purge_only_on_update(self):
"""
The first save (creation) should NOT trigger a cache purge, but
a subsequent save (update) should. Tests the _state.adding guard
in JsonConfig.save().
"""
with open(filenames.ONE_ROUND, 'r', encoding='utf-8') as f:
self.client.post('/upload.html', {'jsonFile': f})
config = TestHelpers.get_latest_upload()

with patch.object(CloudflareAPI, 'purge_vis_cache') as mock_purge:
# Update triggers purge
config.hideSankey = not config.hideSankey
config.save()
mock_purge.assert_called_once_with(config.slug)

def test_homepage_real_world_examples(self):
"""
Tests the "real-world examples" section on the homepage.
Expand Down
Loading