From 718837cad7effdf817c0c16af24aab52a0192ecf Mon Sep 17 00:00:00 2001 From: Michael Volo Date: Tue, 25 Feb 2025 12:48:53 -0600 Subject: [PATCH 1/5] add a page template for crawlers to impact stoires --- pages/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pages/models.py b/pages/models.py index 8296b2fc..06c08185 100644 --- a/pages/models.py +++ b/pages/models.py @@ -1923,6 +1923,7 @@ class ImpactStory(Page): ] parent_page_types = ['pages.Impact'] + template = 'page.html' def get_url_parts(self, *args, **kwargs): return None From 289164ba916838f65d439fffe895da9d8a0d5b8f Mon Sep 17 00:00:00 2001 From: Michael Volo Date: Tue, 25 Feb 2025 12:49:05 -0600 Subject: [PATCH 2/5] move blocks around on impact page --- pages/models.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pages/models.py b/pages/models.py index 06c08185..cbfb3c63 100644 --- a/pages/models.py +++ b/pages/models.py @@ -1933,15 +1933,6 @@ def get_sitemap_urls(self, request=None): class Impact(Page): - improving_access = StreamField( - blocks.StreamBlock([ - ('content', blocks.StructBlock([ - ('image', ImageBlock()), - ('heading', blocks.CharBlock()), - ('description', blocks.RichTextBlock()), - ('button_text', blocks.CharBlock()), - ('button_href', blocks.URLBlock()) - ]))], max_num=1), use_json_field=True) reach = StreamField( blocks.StreamBlock([ ('content', blocks.StructBlock([ @@ -1955,6 +1946,15 @@ class Impact(Page): ('link_href', blocks.URLBlock(required=False)) ]))) ]))], max_num=1), use_json_field=True) + improving_access = StreamField( + blocks.StreamBlock([ + ('content', blocks.StructBlock([ + ('image', ImageBlock()), + ('heading', blocks.CharBlock()), + ('description', blocks.RichTextBlock()), + ('button_text', blocks.CharBlock()), + ('button_href', blocks.URLBlock()) + ]))], max_num=1), use_json_field=True) quote = StreamField( blocks.StreamBlock([ ('content', blocks.StructBlock([ @@ -2012,8 +2012,8 @@ class Impact(Page): api_fields = [ APIField('title'), - APIField('improving_access'), APIField('reach'), + APIField('improving_access'), APIField('quote'), APIField('making_a_difference'), APIField('disruption'), From b7e99bb5e801d73681f1ab55caca30412b842908 Mon Sep 17 00:00:00 2001 From: Michael Volo Date: Tue, 25 Feb 2025 13:02:05 -0600 Subject: [PATCH 3/5] ensure all sitemap.xml urls don't end in a slash --- global_settings/views.py | 29 ++++++++++++++++++++++++++++- openstax/urls.py | 4 +--- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/global_settings/views.py b/global_settings/views.py index 4af5a56e..dd5e28a5 100644 --- a/global_settings/views.py +++ b/global_settings/views.py @@ -1,5 +1,7 @@ +import inspect +from django.contrib.sitemaps import views as sitemap_views from django.http import HttpResponseServerError, HttpResponse - +from wagtail.contrib.sitemaps.sitemap_generator import Sitemap from global_settings.functions import invalidate_cloudfront_caches @@ -13,3 +15,28 @@ def clear_entire_cache(request): invalidate_cloudfront_caches() response = '

All Caches Invalidated

' return HttpResponse(response) + + +def index(request, sitemaps, **kwargs): + sitemaps = prepare_sitemaps(request, sitemaps) + return sitemap_views.index(request, sitemaps, **kwargs) + + +def sitemap(request, sitemaps=None, **kwargs): + if sitemaps: + sitemaps = prepare_sitemaps(request, sitemaps) + else: + sitemaps = {"wagtail": Sitemap(request)} + return sitemap_views.sitemap(request, sitemaps, **kwargs) + + +def prepare_sitemaps(request, sitemaps): + initialised_sitemaps = {} + for name, sitemap_cls in sitemaps.items(): + if inspect.isclass(sitemap_cls) and issubclass(sitemap_cls, Sitemap): + sitemap_instance = sitemap_cls(request) + sitemap_instance.sitemap_urls = [url.rstrip('/') for url in sitemap_instance.sitemap_urls] + initialised_sitemaps[name] = sitemap_instance + else: + initialised_sitemaps[name] = sitemap_cls + return initialised_sitemaps diff --git a/openstax/urls.py b/openstax/urls.py index 5c20a431..7c5d4c42 100644 --- a/openstax/urls.py +++ b/openstax/urls.py @@ -5,7 +5,6 @@ from wagtail.admin import urls as wagtailadmin_urls from wagtail import urls as wagtail_urls from wagtail.documents import urls as wagtaildocs_urls -from wagtail.images.views.serve import ServeView from accounts import urls as accounts_urls from .api import api_router @@ -13,8 +12,7 @@ from news.feeds import RssBlogFeed, AtomBlogFeed from api import urls as api_urls -from global_settings.views import throw_error, clear_entire_cache -from wagtail.contrib.sitemaps.views import sitemap +from global_settings.views import throw_error, clear_entire_cache, sitemap admin.site.site_header = 'OpenStax' From cedbdebb45c311749cd29a5b6e678eeed3dbff75 Mon Sep 17 00:00:00 2001 From: Michael Volo Date: Tue, 25 Feb 2025 13:14:24 -0600 Subject: [PATCH 4/5] middleware cleanup - no trailing slashes on canonical urls --- openstax/middleware.py | 228 ++++++++++++++++------------------------- 1 file changed, 88 insertions(+), 140 deletions(-) diff --git a/openstax/middleware.py b/openstax/middleware.py index 7fafca86..5a37d88b 100644 --- a/openstax/middleware.py +++ b/openstax/middleware.py @@ -14,181 +14,132 @@ from pages.models import HomePage, Supporters, PrivacyPolicy, K12Subject, Subject, Subjects, RootPage -class HttpSmartRedirectResponse(HttpResponsePermanentRedirect): - pass - - class CommonMiddlewareAppendSlashWithoutRedirect(CommonMiddleware): - """ This class converts HttpSmartRedirectResponse to the common response - of Django view, without redirect. This is necessary to match status_codes - for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302 - code always on pages without slash. + """ This class converts HttpResponsePermanentRedirect to the common response + of Django view, without redirect. This is necessary to match status_codes + for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302 + code always on pages without slash. """ - response_redirect_class = HttpSmartRedirectResponse + response_redirect_class = HttpResponsePermanentRedirect -def __init__(self, *args, **kwargs): - # create django request resolver - self.handler = BaseHandler() + def __init__(self, *args, **kwargs): + # create django request resolver + self.handler = BaseHandler() - # prevent recursive includes - old = settings.MIDDLEWARE - name = self.__module__ + '.' + self.__class__.__name__ - settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name] + # prevent recursive includes + old = settings.MIDDLEWARE + name = self.__module__ + '.' + self.__class__.__name__ + settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name] - self.handler.load_middleware() + self.handler.load_middleware() - settings.MIDDLEWARE = old - super(CommonMiddlewareAppendSlashWithoutRedirect, self).__init__(*args, **kwargs) + settings.MIDDLEWARE = old + super().__init__(*args, **kwargs) -def get_full_path_with_slash(self, request): - """ Return the full path of the request with a trailing slash appended - without Exception in Debug mode - """ - new_path = request.get_full_path(force_append_slash=True) - # Prevent construction of scheme relative urls. - new_path = escape_leading_slashes(new_path) - return new_path + def get_full_path_with_slash(self, request): + """ Return the full path of the request with a trailing slash appended + without Exception in Debug mode + """ + # Prevent construction of scheme relative urls. + new_path = request.get_full_path(force_append_slash=True) + new_path = escape_leading_slashes(new_path) + return new_path -def process_response(self, request, response): - response = super(CommonMiddlewareAppendSlashWithoutRedirect, self).process_response(request, response) + def process_response(self, request, response): + response = super().process_response(request, response) - if isinstance(response, HttpSmartRedirectResponse): - if not request.path.endswith('/'): - request.path = request.path + '/' - # we don't need query string in path_info because it's in request.GET already - request.path_info = request.path - response = self.handler.get_response(request) + if isinstance(response, HttpResponsePermanentRedirect): + if not request.path.endswith('/'): + request.path = request.path + '/' + # we don't need query string in path_info because it's in request.GET already + response = self.handler.get_response(request) - return response + return response class CommonMiddlewareOpenGraphRedirect(CommonMiddleware): - OG_USER_AGENTS = [ - 'baiduspider', - 'bingbot', - 'embedly', - 'facebookbot', - 'facebookexternalhit/1.1', - 'facebookexternalhit', - 'facebot', - 'google.*snippet', - 'googlebot', - 'linkedinbot', - 'MetadataScraper', - 'outbrain', - 'pinterest', - 'pinterestbot', - 'quora', - 'quora link preview', - 'rogerbot', - 'showyoubot', - 'slackbot', - 'slackbot-linkexpanding', - 'twitterbot', - 'vkShare', - 'W3C_Validator', - 'WhatsApp', - 'MetadataScraper', - 'yandex', - 'yahoo', - ] + OG_USER_AGENTS = { + 'baiduspider', 'bingbot', 'embedly', 'facebookbot', 'facebookexternalhit/1.1', + 'facebookexternalhit', 'facebot', 'google.*snippet', 'googlebot', 'linkedinbot', + 'MetadataScraper', 'outbrain', 'pinterest', 'pinterestbot', 'quora', 'quora link preview', + 'rogerbot', 'showyoubot', 'slackbot', 'slackbot-linkexpanding', 'twitterbot', 'vkShare', + 'W3C_Validator', 'WhatsApp', 'yandex', 'yahoo' + } def __init__(self, get_response): self.get_response = get_response def __call__(self, request, *args, **kwargs): if 'HTTP_USER_AGENT' in request.META: - user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"]) if user_agent['family'].lower() in self.OG_USER_AGENTS: - # url path minus the trailing / url_path = unquote(request.get_full_path()[:-1]) - full_url = unquote(request.build_absolute_uri()) - - # index of last / to find slug, except when there isn't a last / - if url_path == '': - page_slug = "home" - else: - index = url_path.rindex('/') - page_slug = url_path[index+1:] + page_slug = "home" if url_path == '' else url_path.rsplit('/', 1)[-1] if self.redirect_path_found(url_path): - # supporters page has the wrong slug if page_slug == 'foundation': page_slug = 'supporters' - # look up correct object based on path - if '/details/books/' in url_path: - page = Book.objects.filter(slug=page_slug) - elif '/blog/' in url_path: - page = NewsArticle.objects.filter(slug=page_slug) - elif '/privacy' in url_path: - page = PrivacyPolicy.objects.filter(slug='privacy-policy') - elif '/k12' in url_path: - page = K12Subject.objects.filter(slug='k12-' + page_slug) - elif '/subjects' in url_path: - flag = FeatureFlag.objects.filter(name='new_subjects') - if flag[0].feature_active: - if page_slug == 'subjects': - page_slug = 'new-subjects' - page = Subjects.objects.filter(slug=page_slug) - else: - page = Subject.objects.filter(slug=page_slug+'-books') - else: - page_slug = 'subjects' - page = BookIndex.objects.filter(slug=page_slug) - else: - page = self.page_by_slug(page_slug) - + page = self.get_page(url_path, page_slug) if page: template = self.build_template(page[0], full_url) return HttpResponse(template) - else: - return self.get_response(request) - else: - return self.get_response(request) return self.get_response(request) + def get_page(self, url_path, page_slug): + if '/details/books/' in url_path: + return Book.objects.filter(slug=page_slug) + elif '/blog/' in url_path: + return NewsArticle.objects.filter(slug=page_slug) + elif '/privacy' in url_path: + return PrivacyPolicy.objects.filter(slug='privacy-policy') + elif '/k12' in url_path: + return K12Subject.objects.filter(slug='k12-' + page_slug) + elif '/subjects' in url_path: + flag = FeatureFlag.objects.filter(name='new_subjects') + if flag[0].feature_active: + if page_slug == 'subjects': + page_slug = 'new-subjects' + return Subjects.objects.filter(slug=page_slug) + else: + return Subject.objects.filter(slug=page_slug + '-books') + else: + return BookIndex.objects.filter(slug='subjects') + else: + return self.page_by_slug(page_slug) + def build_template(self, page, page_url): + page_url = page_url.rstrip('/') image_url = self.image_url(page.promote_image) - template = ' \n' - template += ' ' + str(page.title) + '\n' - template += ' \n'.format(page.search_description) - template += ' \n'.format(page_url) - template += ' \n'.format(page_url) - template += ' \n' - template += ' \n'.format(page.title) - template += ' \n'.format(page.search_description) - template += ' \n'.format(image_url) - template += ' \n'.format(page.title) - template += ' \n' - template += ' \n' - template += ' \n'.format(page.title) - template += ' \n'.format(page.search_description) - template += ' \n'.format(image_url) - template += ' \n' - template += '' - return template + return f''' + + + + {page.title} + + + + + + + + + + + + + + + + + ''' def redirect_path_found(self, url_path): - if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '/privacy' in url_path or '/subjects' in url_path or '' == url_path: - return True - elif '/k12' in url_path: - last_slash = url_path.rfind('/') - k12_index = url_path.rfind('k12') - if last_slash < k12_index: - return False - else: - return True - else: - return False + return any(substring in url_path for substring in ['/blog/', '/details/books/', '/foundation', '/privacy', '/subjects', '']) or '/k12' in url_path def image_url(self, image): - image_url = build_image_url(image) - if not image_url: - return '' - return image_url + return build_image_url(image) or '' def page_by_slug(self, page_slug): if page_slug == 'supporters': @@ -196,7 +147,4 @@ def page_by_slug(self, page_slug): if page_slug == 'openstax-homepage': return HomePage.objects.filter(locale=1) if page_slug == 'home': - return RootPage.objects.filter(locale=1) - - - + return RootPage.objects.filter(locale=1) \ No newline at end of file From cf4bfa96f7497a5a1ab88e15a057a1b20ed76757 Mon Sep 17 00:00:00 2001 From: Michael Volo Date: Tue, 25 Feb 2025 22:43:09 -0600 Subject: [PATCH 5/5] add cards to impact section of page --- .../0154_alter_impact_improving_access.py | 61 +++++++++++++++++++ pages/models.py | 8 ++- pages/tests.py | 2 +- 3 files changed, 69 insertions(+), 2 deletions(-) create mode 100644 pages/migrations/0154_alter_impact_improving_access.py diff --git a/pages/migrations/0154_alter_impact_improving_access.py b/pages/migrations/0154_alter_impact_improving_access.py new file mode 100644 index 00000000..79386932 --- /dev/null +++ b/pages/migrations/0154_alter_impact_improving_access.py @@ -0,0 +1,61 @@ +# Generated by Django 5.0.12 on 2025-02-26 04:42 + +import wagtail.fields +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("pages", "0153_alter_homepage_options"), + ] + + operations = [ + migrations.AlterField( + model_name="impact", + name="improving_access", + field=wagtail.fields.StreamField( + [("content", 12)], + block_lookup={ + 0: ("wagtail.images.blocks.ImageChooserBlock", (), {"required": False}), + 1: ("wagtail.blocks.CharBlock", (), {"required": False}), + 2: ("wagtail.blocks.URLBlock", (), {"required": False}), + 3: ("pages.custom_blocks.ImageFormatChoiceBlock", (), {}), + 4: ( + "wagtail.blocks.CharBlock", + (), + {"help_text": "Used by the frontend for Google Analytics.", "required": False}, + ), + 5: ( + "wagtail.blocks.StructBlock", + [[("image", 0), ("alt_text", 1), ("link", 2), ("alignment", 3), ("identifier", 4)]], + {}, + ), + 6: ("wagtail.blocks.CharBlock", (), {}), + 7: ("wagtail.blocks.RichTextBlock", (), {}), + 8: ("wagtail.blocks.URLBlock", (), {}), + 9: ("pages.custom_blocks.APIImageChooserBlock", (), {"required": False}), + 10: ( + "wagtail.blocks.StructBlock", + [[("icon", 9), ("description", 6), ("link_text", 1), ("link_href", 2)]], + {}, + ), + 11: ("wagtail.blocks.ListBlock", (10,), {}), + 12: ( + "wagtail.blocks.StructBlock", + [ + [ + ("image", 5), + ("heading", 6), + ("description", 7), + ("button_text", 6), + ("button_href", 8), + ("cards", 11), + ] + ], + {}, + ), + }, + ), + ), + ] diff --git a/pages/models.py b/pages/models.py index cbfb3c63..58d04188 100644 --- a/pages/models.py +++ b/pages/models.py @@ -1953,7 +1953,13 @@ class Impact(Page): ('heading', blocks.CharBlock()), ('description', blocks.RichTextBlock()), ('button_text', blocks.CharBlock()), - ('button_href', blocks.URLBlock()) + ('button_href', blocks.URLBlock()), + ('cards', blocks.ListBlock(blocks.StructBlock([ + ('icon', APIImageChooserBlock(required=False)), + ('description', blocks.CharBlock()), + ('link_text', blocks.CharBlock(required=False)), + ('link_href', blocks.URLBlock(required=False)) + ]))) ]))], max_num=1), use_json_field=True) quote = StreamField( blocks.StreamBlock([ diff --git a/pages/tests.py b/pages/tests.py index 006e23ee..5f3f9a0f 100644 --- a/pages/tests.py +++ b/pages/tests.py @@ -8,7 +8,7 @@ from wagtail.models import Page from pages import models as page_models -from shared.test_utilities import assertPathDoesNotRedirectToTrailingSlash, mock_user_login +from shared.test_utilities import mock_user_login from http import cookies class TestRootPage(unittest.TestCase):