diff --git a/README.md b/README.md index a192e15..78fcdba 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ django-simple-robots Most web applications shouldn't be indexed by Google. This app just provides a view that serves a "deny all" robots.txt. -In some cases, you do want your app to be indexed - but only in your production environment (not any staging environments). For this case, you can set `ROBOTS_ALLOW_HOST`. If the incoming hostname matches this setting, an "allow all" robots.txt will be served. Otherwise, the "deny all" will be served. +In some cases, you do want your app to be indexed - but only in your production environment (not any staging environments). For this case, you can set `ROBOTS_ALLOW_HOSTS`. If the incoming hostname matches this setting, an "allow all" robots.txt will be served. Otherwise, the "deny all" will be served. Tested against Django 2.2, 3.2 and 4.0 on Python 3.6, 3.7, 3.8, 3.9 and 3.10 @@ -14,26 +14,41 @@ Tested against Django 2.2, 3.2 and 4.0 on Python 3.6, 3.7, 3.8, 3.9 and 3.10 Install from PIP - pip install django-simple-robots +```bash +pip install django-simple-robots +``` In your root urlconf, add an entry as follows: - from django.conf.urls import url - from simple_robots.views import serve_robots +```python +from django.conf.urls import url +from simple_robots.views import serve_robots - urlpatterns = [ - path("robots.txt", serve_robots), - # ..... other stuff - ] +urlpatterns = [ + path("robots.txt", serve_robots), + # ..... other stuff +] +``` -Then, add `simple_robots` to `INSTALLED_APPS` in your `settings.py` +Then, add `simple_robots` to `INSTALLED_APPS` in your `settings.py`. -Optionally, set `ROBOTS_ALLOW_HOST` settings variable. +Optionally, set `ROBOTS_ALLOW_HOSTS` settings variable. - ROBOTS_ALLOW_HOST = "myproductionurl.com" +```python +ROBOTS_ALLOW_HOSTS = ["myproductionurl.com"] +``` + +`ROBOTS_ALLOW_HOSTS` also supports multiple options, similar to [`ALLOWED_HOSTS`](https://docs.djangoproject.com/en/stable/ref/settings/#allowed-hosts): + +```python +# Allow all subdomains of `myproductionurl.com` (including the apex) and exactly `myotherproductionurl.com` (no subdomains) +ROBOTS_ALLOW_HOSTS = [".myproductionurl.com", "myotherproductionurl.com"] +``` That's it! +Note: Previous versions used `ROBOTS_ALLOW_HOST` to specify a single allowed host. This setting still exists for backwards compatibility. + ### Customization The allow and disallow template are stored at `robots.txt` and `robots-disallow.txt` respectively. You can override these in your projects templates directory to customize the responses. diff --git a/dev-requirements.txt b/dev-requirements.txt index 39b8955..c4d94f1 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,3 @@ -black==21.11b1 +black==22.8.0 flake8==4.0.1 isort==5.10.1 diff --git a/simple_robots/tests/tests.py b/simple_robots/tests/tests.py index 83a527b..c13ce92 100644 --- a/simple_robots/tests/tests.py +++ b/simple_robots/tests/tests.py @@ -13,6 +13,19 @@ def test_allow_if_host_matches(self): response = self.client.get("/robots.txt", HTTP_HOST="test.com") self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + @override_settings(ROBOTS_ALLOW_HOST=".test.com", ALLOWED_HOSTS=[".test.com"]) + def test_allow_if_host_matches_wildcard(self): + response = self.client.get("/robots.txt", HTTP_HOST="example.test.com") + self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + + @override_settings( + ROBOTS_ALLOW_HOSTS=["example.test.com", "example2.test.com"], + ALLOWED_HOSTS=[".test.com"], + ) + def test_allow_if_host_matches_multiple(self): + response = self.client.get("/robots.txt", HTTP_HOST="example2.test.com") + self.assertEqual(response.content, b"User-agent: *\nAllow: /\n") + @override_settings( ROBOTS_ALLOW_HOST="test.com", ALLOWED_HOSTS=["test.com", "somethingelse.com"] ) diff --git a/simple_robots/views.py b/simple_robots/views.py index 87c039d..bdf4930 100644 --- a/simple_robots/views.py +++ b/simple_robots/views.py @@ -1,7 +1,7 @@ from django.conf import settings +from django.http.request import validate_host from django.views.generic import TemplateView -ROBOTS_ALLOW_HOST_SETTING = "ROBOTS_ALLOW_HOST" ROBOTS_ALLOW_TEMPLATE = "robots.txt" ROBOTS_DISALLOW_TEMPLATE = "robots-disallow.txt" @@ -9,11 +9,15 @@ class ServeRobotsView(TemplateView): content_type = "text/plain" + def get_allowed_hosts(self): + # Maintain singular setting for backwards compatibility + if getattr(settings, "ROBOTS_ALLOW_HOST", ""): + return [settings.ROBOTS_ALLOW_HOST] + + return getattr(settings, "ROBOTS_ALLOW_HOSTS", []) + def get_template_names(self): - if ( - getattr(settings, ROBOTS_ALLOW_HOST_SETTING, None) - == self.request.get_host() - ): + if validate_host(self.request.get_host(), self.get_allowed_hosts()): return ROBOTS_ALLOW_TEMPLATE return ROBOTS_DISALLOW_TEMPLATE