From c8da53d4b079398c947ce7eb76ed323e40adb704 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Fri, 7 May 2021 11:45:53 -0400 Subject: [PATCH] Block websites from search results via user config (#304) * Block websites in search results via user config Adds a new config field "Block" to specify a comma separated list of websites to block in search results. This is applied for all searches. * Add test for blocking sites from search results * Document WHOOGLE_CONFIG_BLOCK usage * Strip '-site:' filters from query in header template The 'behind the scenes' site filter applied for blocked sites was appearing in the query field when navigating between search categories (all -> images -> news, etc). This prevents the filter from appearing in all except "images", since the image category uses a separate header. This should eventually be addressed when the image page can begin using the standard whoogle header, but until then, the filter will still appear for image searches. --- README.md | 1 + app.json | 5 +++++ app/models/config.py | 1 + app/request.py | 4 ++++ app/routes.py | 3 +-- app/templates/header.html | 4 ++-- app/templates/index.html | 4 ++++ app/utils/search.py | 1 - test/test_results.py | 26 ++++++++++++++++++++++++++ whoogle.env | 14 ++++++++++++++ 10 files changed, 58 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 34b5b1d..99edce4 100644 --- a/README.md +++ b/README.md @@ -267,6 +267,7 @@ These environment variables allow setting default config values, but can be over | WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country | | WHOOGLE_CONFIG_LANGUAGE | Set interface language | | WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language | +| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) | | WHOOGLE_CONFIG_DARK | Enable dark theme | | WHOOGLE_CONFIG_SAFE | Enable safe searches | | WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) | diff --git a/app.json b/app.json index 6889e5b..691f8fc 100644 --- a/app.json +++ b/app.json @@ -85,6 +85,11 @@ "value": "", "required": false }, + "WHOOGLE_CONFIG_BLOCK": { + "description": "[CONFIG] Block websites from search results (comma-separated list)", + "value": "", + "required": false + }, "WHOOGLE_CONFIG_DARK": { "description": "[CONFIG] Enable dark mode (set to 1 or leave blank)", "value": "", diff --git a/app/models/config.py b/app/models/config.py index 84ad0e4..3898ae7 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -18,6 +18,7 @@ class Config: 'WHOOGLE_CONFIG_STYLE', open(os.path.join(app_config['STATIC_FOLDER'], 'css/variables.css')).read()) + self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '') self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '') self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE') self.dark = read_config_bool('WHOOGLE_CONFIG_DARK') diff --git a/app/request.py b/app/request.py index b4c67dd..6fabe46 100644 --- a/app/request.py +++ b/app/request.py @@ -120,6 +120,10 @@ def gen_query(query, args, config, near_city=None) -> str: ) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') + # Block all sites specified in the user config + for blocked in config.block.split(','): + query += (' -site:' + blocked) if blocked else '' + for val in param_dict.values(): if not val: continue diff --git a/app/routes.py b/app/routes.py index 053cb72..7867442 100644 --- a/app/routes.py +++ b/app/routes.py @@ -2,7 +2,6 @@ import argparse import base64 import io import json -import os import pickle import urllib.parse as urlparse import uuid @@ -17,7 +16,7 @@ from app import app from app.models.config import Config from app.request import Request, TorError from app.utils.bangs import resolve_bang -from app.utils.session import valid_user_session +from app.utils.session import generate_user_key, valid_user_session from app.utils.search import * # Load DDG bang json files only on init diff --git a/app/templates/header.html b/app/templates/header.html index 4eb7822..b353d92 100644 --- a/app/templates/header.html +++ b/app/templates/header.html @@ -22,7 +22,7 @@ style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; type="text" - value="{{ query }}"> + value="{{ query[:query.find('-site:')] }}"> @@ -54,7 +54,7 @@ name="q" spellcheck="false" type="text" - value="{{ query }}" + value="{{ query[:query.find('-site:')] }}" style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important; color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}; border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};"> diff --git a/app/templates/index.html b/app/templates/index.html index 3625719..9d43988 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -106,6 +106,10 @@ +
+ + +
diff --git a/app/utils/search.py b/app/utils/search.py index b71e6dd..a856bf6 100644 --- a/app/utils/search.py +++ b/app/utils/search.py @@ -1,5 +1,4 @@ from app.filter import Filter, get_first_link -from app.utils.session import generate_user_key from app.request import gen_query from bs4 import BeautifulSoup as bsoup from cryptography.fernet import Fernet, InvalidToken diff --git a/test/test_results.py b/test/test_results.py index 38b9936..ff6fe4b 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -3,6 +3,9 @@ from app.filter import Filter from app.utils.session import generate_user_key from datetime import datetime from dateutil.parser import * +from urllib.parse import urlparse + +from test.conftest import demo_config def get_search_results(data): @@ -46,6 +49,29 @@ def test_post_results(client): assert len(get_search_results(rv.data)) <= 15 +def test_block_results(client): + rv = client.post('/search', data=dict(q='pinterest')) + assert rv._status_code == 200 + + has_pinterest = False + for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True): + if 'pinterest.com' in urlparse(link['href']).netloc: + has_pinterest = True + break + + assert has_pinterest + + demo_config['block'] = 'pinterest.com' + rv = client.post('/config', data=demo_config) + assert rv._status_code == 302 + + rv = client.post('/search', data=dict(q='pinterest')) + assert rv._status_code == 200 + + for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True): + assert 'pinterest.com' not in urlparse(link['href']).netloc + + # TODO: Unit test the site alt method instead -- the results returned # are too unreliable for this test in particular. # def test_site_alts(client): diff --git a/whoogle.env b/whoogle.env index 0ee372f..3a0f88b 100644 --- a/whoogle.env +++ b/whoogle.env @@ -18,25 +18,39 @@ # See app/static/settings/countries.json for values #WHOOGLE_CONFIG_COUNTRY=countryUK + # See app/static/settings/languages.json for values #WHOOGLE_CONFIG_LANGUAGE=lang_en + # See app/static/settings/languages.json for values #WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en + # Disable changing of config from client #WHOOGLE_CONFIG_DISABLE=1 + +# Block websites from search results (comma-separated list) +#WHOOGLE_CONFIG_BLOCK=pinterest.com,whitehouse.gov + # Dark mode #WHOOGLE_CONFIG_DARK=1 + # Safe search mode #WHOOGLE_CONFIG_SAFE=1 + # Use social media site alternatives (nitter, bibliogram, etc) #WHOOGLE_CONFIG_ALTS=1 + # Use Tor if available #WHOOGLE_CONFIG_TOR=1 + # Open results in new tab #WHOOGLE_CONFIG_NEW_TAB=1 + # Search using GET requests only (exposes query in logs) #WHOOGLE_CONFIG_GET_ONLY=1 + # Set instance URL #WHOOGLE_CONFIG_URL=https:/// + # Set custom CSS styling/theming #WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"