Block websites from search results via user config (#304)

* Block websites in search results via user config

Adds a new config field "Block" to specify a comma separated list of
websites to block in search results. This is applied for all searches.

* Add test for blocking sites from search results

* Document WHOOGLE_CONFIG_BLOCK usage

* Strip '-site:' filters from query in header template

The 'behind the scenes' site filter applied for blocked sites was
appearing in the query field when navigating between search categories
(all -> images -> news, etc). This prevents the filter from appearing in
all except "images", since the image category uses a separate header.
This should eventually be addressed when the image page can begin using
the standard whoogle header, but until then, the filter will still
appear for image searches.
このコミットが含まれているのは:
Ben Busby 2021-05-07 11:45:53 -04:00 committed by GitHub
コミット c8da53d4b0
この署名に対応する既知のキーがデータベースに存在しません
GPGキーID: 4AEE18F83AFDEB23
10個のファイルの変更58行の追加5行の削除

ファイルの表示

@ -267,6 +267,7 @@ These environment variables allow setting default config values, but can be over
| WHOOGLE_CONFIG_COUNTRY | Filter results by hosting country |
| WHOOGLE_CONFIG_LANGUAGE | Set interface language |
| WHOOGLE_CONFIG_SEARCH_LANGUAGE | Set search result language |
| WHOOGLE_CONFIG_BLOCK | Block websites from search results (use comma-separated list) |
| WHOOGLE_CONFIG_DARK | Enable dark theme |
| WHOOGLE_CONFIG_SAFE | Enable safe searches |
| WHOOGLE_CONFIG_ALTS | Use social media site alternatives (nitter, invidious, etc) |

ファイルの表示

@ -85,6 +85,11 @@
"value": "",
"required": false
},
"WHOOGLE_CONFIG_BLOCK": {
"description": "[CONFIG] Block websites from search results (comma-separated list)",
"value": "",
"required": false
},
"WHOOGLE_CONFIG_DARK": {
"description": "[CONFIG] Enable dark mode (set to 1 or leave blank)",
"value": "",

ファイルの表示

@ -18,6 +18,7 @@ class Config:
'WHOOGLE_CONFIG_STYLE',
open(os.path.join(app_config['STATIC_FOLDER'],
'css/variables.css')).read())
self.block = os.getenv('WHOOGLE_CONFIG_BLOCK', '')
self.ctry = os.getenv('WHOOGLE_CONFIG_COUNTRY', '')
self.safe = read_config_bool('WHOOGLE_CONFIG_SAFE')
self.dark = read_config_bool('WHOOGLE_CONFIG_DARK')

ファイルの表示

@ -120,6 +120,10 @@ def gen_query(query, args, config, near_city=None) -> str:
) if config.lang_interface else ''
param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off')
# Block all sites specified in the user config
for blocked in config.block.split(','):
query += (' -site:' + blocked) if blocked else ''
for val in param_dict.values():
if not val:
continue

ファイルの表示

@ -2,7 +2,6 @@ import argparse
import base64
import io
import json
import os
import pickle
import urllib.parse as urlparse
import uuid
@ -17,7 +16,7 @@ from app import app
from app.models.config import Config
from app.request import Request, TorError
from app.utils.bangs import resolve_bang
from app.utils.session import valid_user_session
from app.utils.session import generate_user_key, valid_user_session
from app.utils.search import *
# Load DDG bang json files only on init

ファイルの表示

@ -22,7 +22,7 @@
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
type="text"
value="{{ query }}">
value="{{ query[:query.find('-site:')] }}">
<input style="color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }}" id="search-reset" type="reset" value="x">
<input name="tbm" value="{{ search_type }}" style="display: none">
<input type="submit" style="display: none;">
@ -54,7 +54,7 @@
name="q"
spellcheck="false"
type="text"
value="{{ query }}"
value="{{ query[:query.find('-site:')] }}"
style="background-color: {{ 'var(--whoogle-dark-result-bg)' if config.dark else 'var(--whoogle-result-bg)' }} !important;
color: {{ 'var(--whoogle-dark-text)' if config.dark else 'var(--whoogle-text)' }};
border-bottom: {{ '2px solid var(--whoogle-dark-element-bg)' if config.dark else '0px' }};">

ファイルの表示

@ -106,6 +106,10 @@
<label for="config-near">Near: </label>
<input type="text" name="near" id="config-near" placeholder="City Name" value="{{ config.near }}">
</div>
<div class="config-div config-div-block">
<label for="config-block">Block: </label>
<input type="text" name="block" id="config-block" placeholder="Comma-separated site list" value="{{ config.block }}">
</div>
<div class="config-div config-div-nojs">
<label for="config-nojs">Show NoJS Links: </label>
<input type="checkbox" name="nojs" id="config-nojs" {{ 'checked' if config.nojs else '' }}>

ファイルの表示

@ -1,5 +1,4 @@
from app.filter import Filter, get_first_link
from app.utils.session import generate_user_key
from app.request import gen_query
from bs4 import BeautifulSoup as bsoup
from cryptography.fernet import Fernet, InvalidToken

ファイルの表示

@ -3,6 +3,9 @@ from app.filter import Filter
from app.utils.session import generate_user_key
from datetime import datetime
from dateutil.parser import *
from urllib.parse import urlparse
from test.conftest import demo_config
def get_search_results(data):
@ -46,6 +49,29 @@ def test_post_results(client):
assert len(get_search_results(rv.data)) <= 15
def test_block_results(client):
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
has_pinterest = False
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
if 'pinterest.com' in urlparse(link['href']).netloc:
has_pinterest = True
break
assert has_pinterest
demo_config['block'] = 'pinterest.com'
rv = client.post('/config', data=demo_config)
assert rv._status_code == 302
rv = client.post('/search', data=dict(q='pinterest'))
assert rv._status_code == 200
for link in BeautifulSoup(rv.data, 'html.parser').find_all('a', href=True):
assert 'pinterest.com' not in urlparse(link['href']).netloc
# TODO: Unit test the site alt method instead -- the results returned
# are too unreliable for this test in particular.
# def test_site_alts(client):

ファイルの表示

@ -18,25 +18,39 @@
# See app/static/settings/countries.json for values
#WHOOGLE_CONFIG_COUNTRY=countryUK
# See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_LANGUAGE=lang_en
# See app/static/settings/languages.json for values
#WHOOGLE_CONFIG_SEARCH_LANGUAGE=lang_en
# Disable changing of config from client
#WHOOGLE_CONFIG_DISABLE=1
# Block websites from search results (comma-separated list)
#WHOOGLE_CONFIG_BLOCK=pinterest.com,whitehouse.gov
# Dark mode
#WHOOGLE_CONFIG_DARK=1
# Safe search mode
#WHOOGLE_CONFIG_SAFE=1
# Use social media site alternatives (nitter, bibliogram, etc)
#WHOOGLE_CONFIG_ALTS=1
# Use Tor if available
#WHOOGLE_CONFIG_TOR=1
# Open results in new tab
#WHOOGLE_CONFIG_NEW_TAB=1
# Search using GET requests only (exposes query in logs)
#WHOOGLE_CONFIG_GET_ONLY=1
# Set instance URL
#WHOOGLE_CONFIG_URL=https://<whoogle url>/
# Set custom CSS styling/theming
#WHOOGLE_CONFIG_STYLE=":root { /* LIGHT THEME COLORS */ --whoogle-background: #d8dee9; --whoogle-accent: #2e3440; --whoogle-text: #3B4252; --whoogle-contrast-text: #eceff4; --whoogle-secondary-text: #70757a; --whoogle-result-bg: #fff; --whoogle-result-title: #4c566a; --whoogle-result-url: #81a1c1; --whoogle-result-visited: #a3be8c; /* DARK THEME COLORS */ --whoogle-dark-background: #222; --whoogle-dark-accent: #685e79; --whoogle-dark-text: #fff; --whoogle-dark-contrast-text: #000; --whoogle-dark-secondary-text: #bbb; --whoogle-dark-result-bg: #000; --whoogle-dark-result-title: #1967d2; --whoogle-dark-result-url: #4b11a8; --whoogle-dark-result-visited: #bbbbff; }"