Initial commit. Basic models mostly done.

This commit is contained in:
tcaxle
2020-04-11 13:03:48 +01:00
commit 840e3c86f9
5761 changed files with 650959 additions and 0 deletions

View File

@@ -0,0 +1,163 @@
from urllib.parse import urlencode
from urllib.request import urlopen
from django.apps import apps as django_apps
from django.conf import settings
from django.core import paginator
from django.core.exceptions import ImproperlyConfigured
from django.urls import NoReverseMatch, reverse
from django.utils import translation
PING_URL = "https://www.google.com/webmasters/tools/ping"
class SitemapNotFound(Exception):
pass
def ping_google(sitemap_url=None, ping_url=PING_URL, sitemap_uses_https=True):
"""
Alert Google that the sitemap for the current site has been updated.
If sitemap_url is provided, it should be an absolute path to the sitemap
for this site -- e.g., '/sitemap.xml'. If sitemap_url is not provided, this
function will attempt to deduce it by using urls.reverse().
"""
sitemap_full_url = _get_sitemap_full_url(sitemap_url, sitemap_uses_https)
params = urlencode({'sitemap': sitemap_full_url})
urlopen('%s?%s' % (ping_url, params))
def _get_sitemap_full_url(sitemap_url, sitemap_uses_https=True):
if not django_apps.is_installed('django.contrib.sites'):
raise ImproperlyConfigured("ping_google requires django.contrib.sites, which isn't installed.")
if sitemap_url is None:
try:
# First, try to get the "index" sitemap URL.
sitemap_url = reverse('django.contrib.sitemaps.views.index')
except NoReverseMatch:
try:
# Next, try for the "global" sitemap URL.
sitemap_url = reverse('django.contrib.sitemaps.views.sitemap')
except NoReverseMatch:
pass
if sitemap_url is None:
raise SitemapNotFound("You didn't provide a sitemap_url, and the sitemap URL couldn't be auto-detected.")
Site = django_apps.get_model('sites.Site')
current_site = Site.objects.get_current()
scheme = 'https' if sitemap_uses_https else 'http'
return '%s://%s%s' % (scheme, current_site.domain, sitemap_url)
class Sitemap:
# This limit is defined by Google. See the index documentation at
# https://www.sitemaps.org/protocol.html#index.
limit = 50000
# If protocol is None, the URLs in the sitemap will use the protocol
# with which the sitemap was requested.
protocol = None
def __get(self, name, obj, default=None):
try:
attr = getattr(self, name)
except AttributeError:
return default
if callable(attr):
return attr(obj)
return attr
def items(self):
return []
def location(self, obj):
return obj.get_absolute_url()
@property
def paginator(self):
return paginator.Paginator(self.items(), self.limit)
def get_urls(self, page=1, site=None, protocol=None):
# Determine protocol
if self.protocol is not None:
protocol = self.protocol
if protocol is None:
protocol = 'http'
# Determine domain
if site is None:
if django_apps.is_installed('django.contrib.sites'):
Site = django_apps.get_model('sites.Site')
try:
site = Site.objects.get_current()
except Site.DoesNotExist:
pass
if site is None:
raise ImproperlyConfigured(
"To use sitemaps, either enable the sites framework or pass "
"a Site/RequestSite object in your view."
)
domain = site.domain
if getattr(self, 'i18n', False):
urls = []
current_lang_code = translation.get_language()
for lang_code, lang_name in settings.LANGUAGES:
translation.activate(lang_code)
urls += self._urls(page, protocol, domain)
translation.activate(current_lang_code)
else:
urls = self._urls(page, protocol, domain)
return urls
def _urls(self, page, protocol, domain):
urls = []
latest_lastmod = None
all_items_lastmod = True # track if all items have a lastmod
for item in self.paginator.page(page).object_list:
loc = "%s://%s%s" % (protocol, domain, self.__get('location', item))
priority = self.__get('priority', item)
lastmod = self.__get('lastmod', item)
if all_items_lastmod:
all_items_lastmod = lastmod is not None
if (all_items_lastmod and
(latest_lastmod is None or lastmod > latest_lastmod)):
latest_lastmod = lastmod
url_info = {
'item': item,
'location': loc,
'lastmod': lastmod,
'changefreq': self.__get('changefreq', item),
'priority': str(priority if priority is not None else ''),
}
urls.append(url_info)
if all_items_lastmod and latest_lastmod:
self.latest_lastmod = latest_lastmod
return urls
class GenericSitemap(Sitemap):
priority = None
changefreq = None
def __init__(self, info_dict, priority=None, changefreq=None, protocol=None):
self.queryset = info_dict['queryset']
self.date_field = info_dict.get('date_field')
self.priority = priority
self.changefreq = changefreq
self.protocol = protocol
def items(self):
# Make sure to return a clone; we don't want premature evaluation.
return self.queryset.filter()
def lastmod(self, item):
if self.date_field is not None:
return getattr(item, self.date_field)
return None
default_app_config = 'django.contrib.sitemaps.apps.SiteMapsConfig'

View File

@@ -0,0 +1,7 @@
from django.apps import AppConfig
from django.utils.translation import gettext_lazy as _
class SiteMapsConfig(AppConfig):
name = 'django.contrib.sitemaps'
verbose_name = _("Site Maps")

View File

@@ -0,0 +1,16 @@
from django.contrib.sitemaps import ping_google
from django.core.management.base import BaseCommand
class Command(BaseCommand):
help = "Ping Google with an updated sitemap, pass optional url of sitemap"
def add_arguments(self, parser):
parser.add_argument('sitemap_url', nargs='?')
parser.add_argument('--sitemap-uses-http', action='store_true')
def handle(self, *args, **options):
ping_google(
sitemap_url=options['sitemap_url'],
sitemap_uses_https=not options['sitemap_uses_http'],
)

View File

@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% spaceless %}
{% for url in urlset %}
<url>
<loc>{{ url.location }}</loc>
{% if url.lastmod %}<lastmod>{{ url.lastmod|date:"Y-m-d" }}</lastmod>{% endif %}
{% if url.changefreq %}<changefreq>{{ url.changefreq }}</changefreq>{% endif %}
{% if url.priority %}<priority>{{ url.priority }}</priority>{% endif %}
</url>
{% endfor %}
{% endspaceless %}
</urlset>

View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for location in sitemaps %}<sitemap><loc>{{ location }}</loc></sitemap>{% endfor %}
</sitemapindex>

View File

@@ -0,0 +1,92 @@
import datetime
from calendar import timegm
from functools import wraps
from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils.http import http_date
def x_robots_tag(func):
@wraps(func)
def inner(request, *args, **kwargs):
response = func(request, *args, **kwargs)
response['X-Robots-Tag'] = 'noindex, noodp, noarchive'
return response
return inner
@x_robots_tag
def index(request, sitemaps,
template_name='sitemap_index.xml', content_type='application/xml',
sitemap_url_name='django.contrib.sitemaps.views.sitemap'):
req_protocol = request.scheme
req_site = get_current_site(request)
sites = [] # all sections' sitemap URLs
for section, site in sitemaps.items():
# For each section label, add links of all pages of its sitemap
# (usually generated by the `sitemap` view).
if callable(site):
site = site()
protocol = req_protocol if site.protocol is None else site.protocol
sitemap_url = reverse(sitemap_url_name, kwargs={'section': section})
absolute_url = '%s://%s%s' % (protocol, req_site.domain, sitemap_url)
sites.append(absolute_url)
# Add links to all pages of the sitemap.
for page in range(2, site.paginator.num_pages + 1):
sites.append('%s?p=%s' % (absolute_url, page))
return TemplateResponse(request, template_name, {'sitemaps': sites},
content_type=content_type)
@x_robots_tag
def sitemap(request, sitemaps, section=None,
template_name='sitemap.xml', content_type='application/xml'):
req_protocol = request.scheme
req_site = get_current_site(request)
if section is not None:
if section not in sitemaps:
raise Http404("No sitemap available for section: %r" % section)
maps = [sitemaps[section]]
else:
maps = sitemaps.values()
page = request.GET.get("p", 1)
lastmod = None
all_sites_lastmod = True
urls = []
for site in maps:
try:
if callable(site):
site = site()
urls.extend(site.get_urls(page=page, site=req_site,
protocol=req_protocol))
if all_sites_lastmod:
site_lastmod = getattr(site, 'latest_lastmod', None)
if site_lastmod is not None:
site_lastmod = (
site_lastmod.utctimetuple() if isinstance(site_lastmod, datetime.datetime)
else site_lastmod.timetuple()
)
lastmod = site_lastmod if lastmod is None else max(lastmod, site_lastmod)
else:
all_sites_lastmod = False
except EmptyPage:
raise Http404("Page %s empty" % page)
except PageNotAnInteger:
raise Http404("No page '%s'" % page)
response = TemplateResponse(request, template_name, {'urlset': urls},
content_type=content_type)
if all_sites_lastmod and lastmod is not None:
# if lastmod is defined for all sites, set header so as
# ConditionalGetMiddleware is able to send 304 NOT MODIFIED
response['Last-Modified'] = http_date(timegm(lastmod))
return response