Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

URL slash fixes #1614

Merged
merged 5 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion global_settings/views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import inspect
from django.contrib.sitemaps import views as sitemap_views
from django.http import HttpResponseServerError, HttpResponse

from wagtail.contrib.sitemaps.sitemap_generator import Sitemap
from global_settings.functions import invalidate_cloudfront_caches


Expand All @@ -13,3 +15,28 @@
invalidate_cloudfront_caches()
response = '<html><body><p>All Caches Invalidated</p></body></html>'
return HttpResponse(response)


def index(request, sitemaps, **kwargs):
sitemaps = prepare_sitemaps(request, sitemaps)
return sitemap_views.index(request, sitemaps, **kwargs)

Check warning on line 22 in global_settings/views.py

View check run for this annotation

Codecov / codecov/patch

global_settings/views.py#L21-L22

Added lines #L21 - L22 were not covered by tests


def sitemap(request, sitemaps=None, **kwargs):
if sitemaps:
sitemaps = prepare_sitemaps(request, sitemaps)

Check warning on line 27 in global_settings/views.py

View check run for this annotation

Codecov / codecov/patch

global_settings/views.py#L26-L27

Added lines #L26 - L27 were not covered by tests
else:
sitemaps = {"wagtail": Sitemap(request)}
return sitemap_views.sitemap(request, sitemaps, **kwargs)

Check warning on line 30 in global_settings/views.py

View check run for this annotation

Codecov / codecov/patch

global_settings/views.py#L29-L30

Added lines #L29 - L30 were not covered by tests


def prepare_sitemaps(request, sitemaps):
initialised_sitemaps = {}
for name, sitemap_cls in sitemaps.items():
if inspect.isclass(sitemap_cls) and issubclass(sitemap_cls, Sitemap):
sitemap_instance = sitemap_cls(request)
sitemap_instance.sitemap_urls = [url.rstrip('/') for url in sitemap_instance.sitemap_urls]
initialised_sitemaps[name] = sitemap_instance

Check warning on line 39 in global_settings/views.py

View check run for this annotation

Codecov / codecov/patch

global_settings/views.py#L34-L39

Added lines #L34 - L39 were not covered by tests
else:
initialised_sitemaps[name] = sitemap_cls
return initialised_sitemaps

Check warning on line 42 in global_settings/views.py

View check run for this annotation

Codecov / codecov/patch

global_settings/views.py#L41-L42

Added lines #L41 - L42 were not covered by tests
228 changes: 88 additions & 140 deletions openstax/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,189 +14,137 @@
from pages.models import HomePage, Supporters, PrivacyPolicy, K12Subject, Subject, Subjects, RootPage


class HttpSmartRedirectResponse(HttpResponsePermanentRedirect):
pass


class CommonMiddlewareAppendSlashWithoutRedirect(CommonMiddleware):
""" This class converts HttpSmartRedirectResponse to the common response
of Django view, without redirect. This is necessary to match status_codes
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
code always on pages without slash.
""" This class converts HttpResponsePermanentRedirect to the common response
of Django view, without redirect. This is necessary to match status_codes
for urls like /url?q=1 and /url/?q=1. If you don't use it, you will have 302
code always on pages without slash.
"""
response_redirect_class = HttpSmartRedirectResponse
response_redirect_class = HttpResponsePermanentRedirect

def __init__(self, *args, **kwargs):
# create django request resolver
self.handler = BaseHandler()
def __init__(self, *args, **kwargs):
# create django request resolver
self.handler = BaseHandler()

# prevent recursive includes
old = settings.MIDDLEWARE
name = self.__module__ + '.' + self.__class__.__name__
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]
# prevent recursive includes
old = settings.MIDDLEWARE
name = self.__module__ + '.' + self.__class__.__name__
settings.MIDDLEWARE = [i for i in settings.MIDDLEWARE if i != name]

self.handler.load_middleware()
self.handler.load_middleware()

settings.MIDDLEWARE = old
super(CommonMiddlewareAppendSlashWithoutRedirect, self).__init__(*args, **kwargs)
settings.MIDDLEWARE = old
super().__init__(*args, **kwargs)

def get_full_path_with_slash(self, request):
""" Return the full path of the request with a trailing slash appended
without Exception in Debug mode
"""
new_path = request.get_full_path(force_append_slash=True)
# Prevent construction of scheme relative urls.
new_path = escape_leading_slashes(new_path)
return new_path
def get_full_path_with_slash(self, request):
""" Return the full path of the request with a trailing slash appended
without Exception in Debug mode
"""
# Prevent construction of scheme relative urls.
new_path = request.get_full_path(force_append_slash=True)
new_path = escape_leading_slashes(new_path)
return new_path

Check warning on line 46 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L44-L46

Added lines #L44 - L46 were not covered by tests

def process_response(self, request, response):
response = super(CommonMiddlewareAppendSlashWithoutRedirect, self).process_response(request, response)
def process_response(self, request, response):
response = super().process_response(request, response)

if isinstance(response, HttpSmartRedirectResponse):
if not request.path.endswith('/'):
request.path = request.path + '/'
# we don't need query string in path_info because it's in request.GET already
request.path_info = request.path
response = self.handler.get_response(request)
if isinstance(response, HttpResponsePermanentRedirect):
if not request.path.endswith('/'):
request.path = request.path + '/'
# we don't need query string in path_info because it's in request.GET already
response = self.handler.get_response(request)

return response
return response


class CommonMiddlewareOpenGraphRedirect(CommonMiddleware):
OG_USER_AGENTS = [
'baiduspider',
'bingbot',
'embedly',
'facebookbot',
'facebookexternalhit/1.1',
'facebookexternalhit',
'facebot',
'google.*snippet',
'googlebot',
'linkedinbot',
'MetadataScraper',
'outbrain',
'pinterest',
'pinterestbot',
'quora',
'quora link preview',
'rogerbot',
'showyoubot',
'slackbot',
'slackbot-linkexpanding',
'twitterbot',
'vkShare',
'W3C_Validator',
'WhatsApp',
'MetadataScraper',
'yandex',
'yahoo',
]
OG_USER_AGENTS = {
'baiduspider', 'bingbot', 'embedly', 'facebookbot', 'facebookexternalhit/1.1',
'facebookexternalhit', 'facebot', 'google.*snippet', 'googlebot', 'linkedinbot',
'MetadataScraper', 'outbrain', 'pinterest', 'pinterestbot', 'quora', 'quora link preview',
'rogerbot', 'showyoubot', 'slackbot', 'slackbot-linkexpanding', 'twitterbot', 'vkShare',
'W3C_Validator', 'WhatsApp', 'yandex', 'yahoo'
}

def __init__(self, get_response):
self.get_response = get_response

def __call__(self, request, *args, **kwargs):
if 'HTTP_USER_AGENT' in request.META:

user_agent = user_agent_parser.ParseUserAgent(request.META["HTTP_USER_AGENT"])
if user_agent['family'].lower() in self.OG_USER_AGENTS:
# url path minus the trailing /
url_path = unquote(request.get_full_path()[:-1])

full_url = unquote(request.build_absolute_uri())

# index of last / to find slug, except when there isn't a last /
if url_path == '':
page_slug = "home"
else:
index = url_path.rindex('/')
page_slug = url_path[index+1:]
page_slug = "home" if url_path == '' else url_path.rsplit('/', 1)[-1]

if self.redirect_path_found(url_path):
# supporters page has the wrong slug
if page_slug == 'foundation':
page_slug = 'supporters'

# look up correct object based on path
if '/details/books/' in url_path:
page = Book.objects.filter(slug=page_slug)
elif '/blog/' in url_path:
page = NewsArticle.objects.filter(slug=page_slug)
elif '/privacy' in url_path:
page = PrivacyPolicy.objects.filter(slug='privacy-policy')
elif '/k12' in url_path:
page = K12Subject.objects.filter(slug='k12-' + page_slug)
elif '/subjects' in url_path:
flag = FeatureFlag.objects.filter(name='new_subjects')
if flag[0].feature_active:
if page_slug == 'subjects':
page_slug = 'new-subjects'
page = Subjects.objects.filter(slug=page_slug)
else:
page = Subject.objects.filter(slug=page_slug+'-books')
else:
page_slug = 'subjects'
page = BookIndex.objects.filter(slug=page_slug)
else:
page = self.page_by_slug(page_slug)

page = self.get_page(url_path, page_slug)
if page:
template = self.build_template(page[0], full_url)
return HttpResponse(template)
else:
return self.get_response(request)
else:
return self.get_response(request)
return self.get_response(request)

def get_page(self, url_path, page_slug):
if '/details/books/' in url_path:
return Book.objects.filter(slug=page_slug)
elif '/blog/' in url_path:
return NewsArticle.objects.filter(slug=page_slug)
elif '/privacy' in url_path:
return PrivacyPolicy.objects.filter(slug='privacy-policy')

Check warning on line 96 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L96

Added line #L96 was not covered by tests
elif '/k12' in url_path:
return K12Subject.objects.filter(slug='k12-' + page_slug)

Check warning on line 98 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L98

Added line #L98 was not covered by tests
elif '/subjects' in url_path:
flag = FeatureFlag.objects.filter(name='new_subjects')
if flag[0].feature_active:
if page_slug == 'subjects':
page_slug = 'new-subjects'
return Subjects.objects.filter(slug=page_slug)

Check warning on line 104 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L100-L104

Added lines #L100 - L104 were not covered by tests
else:
return Subject.objects.filter(slug=page_slug + '-books')

Check warning on line 106 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L106

Added line #L106 was not covered by tests
else:
return BookIndex.objects.filter(slug='subjects')

Check warning on line 108 in openstax/middleware.py

View check run for this annotation

Codecov / codecov/patch

openstax/middleware.py#L108

Added line #L108 was not covered by tests
else:
return self.page_by_slug(page_slug)

def build_template(self, page, page_url):
page_url = page_url.rstrip('/')
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove the last / from the path

image_url = self.image_url(page.promote_image)
template = '<!DOCTYPE html> <html> <head> <meta charset="utf-8">\n'
template += ' <title>' + str(page.title) + '</title>\n'
template += ' <meta name="description" content="{}" >\n'.format(page.search_description)
template += ' <link rel="canonical" href="{}" />\n'.format(page_url)
template += ' <meta property="og:url" content="{}" />\n'.format(page_url)
template += ' <meta property="og:type" content="article" />\n'
template += ' <meta property="og:title" content="{}" />\n'.format(page.title)
template += ' <meta property="og:description" content="{}" />\n'.format(page.search_description)
template += ' <meta property="og:image" content="{}" />\n'.format(image_url)
template += ' <meta property="og:image:alt" content="{}" />\n'.format(page.title)
template += ' <meta name="twitter:card" content="summary_large_image">\n'
template += ' <meta name="twitter:site" content="@OpenStax">\n'
template += ' <meta name="twitter:title" content="{}">\n'.format(page.title)
template += ' <meta name="twitter:description" content="{}">\n'.format(page.search_description)
template += ' <meta name="twitter:image" content="{}">\n'.format(image_url)
template += ' <meta name="twitter:image:alt" content="OpenStax">\n'
template += '</head><body></body></html>'
return template
return f'''<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{page.title}</title>
<meta name="description" content="{page.search_description}">
<link rel="canonical" href="{page_url}">
<meta property="og:url" content="{page_url}">
<meta property="og:type" content="article">
<meta property="og:title" content="{page.title}">
<meta property="og:description" content="{page.search_description}">
<meta property="og:image" content="{image_url}">
<meta property="og:image:alt" content="{page.title}">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@OpenStax">
<meta name="twitter:title" content="{page.title}">
<meta name="twitter:description" content="{page.search_description}">
<meta name="twitter:image" content="{image_url}">
<meta name="twitter:image:alt" content="OpenStax">
</head>
<body></body>
</html>'''

def redirect_path_found(self, url_path):
if '/blog/' in url_path or '/details/books/' in url_path or '/foundation' in url_path or '/privacy' in url_path or '/subjects' in url_path or '' == url_path:
return True
elif '/k12' in url_path:
last_slash = url_path.rfind('/')
k12_index = url_path.rfind('k12')
if last_slash < k12_index:
return False
else:
return True
else:
return False
return any(substring in url_path for substring in ['/blog/', '/details/books/', '/foundation', '/privacy', '/subjects', '']) or '/k12' in url_path

def image_url(self, image):
image_url = build_image_url(image)
if not image_url:
return ''
return image_url
return build_image_url(image) or ''

def page_by_slug(self, page_slug):
if page_slug == 'supporters':
return Supporters.objects.all()
if page_slug == 'openstax-homepage':
return HomePage.objects.filter(locale=1)
if page_slug == 'home':
return RootPage.objects.filter(locale=1)



return RootPage.objects.filter(locale=1)
4 changes: 1 addition & 3 deletions openstax/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,14 @@
from wagtail.admin import urls as wagtailadmin_urls
from wagtail import urls as wagtail_urls
from wagtail.documents import urls as wagtaildocs_urls
from wagtail.images.views.serve import ServeView
from accounts import urls as accounts_urls

from .api import api_router
from news.search import search
from news.feeds import RssBlogFeed, AtomBlogFeed

from api import urls as api_urls
from global_settings.views import throw_error, clear_entire_cache
from wagtail.contrib.sitemaps.views import sitemap
from global_settings.views import throw_error, clear_entire_cache, sitemap

admin.site.site_header = 'OpenStax'

Expand Down
21 changes: 11 additions & 10 deletions pages/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1923,6 +1923,7 @@ class ImpactStory(Page):
]

parent_page_types = ['pages.Impact']
template = 'page.html'

def get_url_parts(self, *args, **kwargs):
return None
Expand All @@ -1932,15 +1933,6 @@ def get_sitemap_urls(self, request=None):


class Impact(Page):
improving_access = StreamField(
blocks.StreamBlock([
('content', blocks.StructBlock([
('image', ImageBlock()),
('heading', blocks.CharBlock()),
('description', blocks.RichTextBlock()),
('button_text', blocks.CharBlock()),
('button_href', blocks.URLBlock())
]))], max_num=1), use_json_field=True)
reach = StreamField(
blocks.StreamBlock([
('content', blocks.StructBlock([
Expand All @@ -1954,6 +1946,15 @@ class Impact(Page):
('link_href', blocks.URLBlock(required=False))
])))
]))], max_num=1), use_json_field=True)
improving_access = StreamField(
blocks.StreamBlock([
('content', blocks.StructBlock([
('image', ImageBlock()),
('heading', blocks.CharBlock()),
('description', blocks.RichTextBlock()),
('button_text', blocks.CharBlock()),
('button_href', blocks.URLBlock())
]))], max_num=1), use_json_field=True)
quote = StreamField(
blocks.StreamBlock([
('content', blocks.StructBlock([
Expand Down Expand Up @@ -2011,8 +2012,8 @@ class Impact(Page):

api_fields = [
APIField('title'),
APIField('improving_access'),
APIField('reach'),
APIField('improving_access'),
APIField('quote'),
APIField('making_a_difference'),
APIField('disruption'),
Expand Down