diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 24bf2f6..9da6d04 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -10,8 +10,5 @@ assignees: '' **Describe the feature you'd like to see added** A short description of the feature, and what it would accomplish. -**Describe which parts of the project this would modify (front end/back end/configuration/etc)** -A short description of which aspects of Whoogle Search would need modification - **Additional context** Add any other context or screenshots about the feature request here. diff --git a/.replit b/.replit new file mode 100644 index 0000000..909eee8 --- /dev/null +++ b/.replit @@ -0,0 +1,2 @@ +language = "python3" +run = "pip install -r requirements.txt && ./run" diff --git a/README.md b/README.md index 803e3d8..3e9c823 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![Build Status](https://travis-ci.com/benbusby/whoogle-search.svg?branch=master)](https://travis-ci.com/benbusby/whoogle-search) [![codebeat badge](https://codebeat.co/badges/e96cada2-fb6f-4528-8285-7d72abd74e8d)](https://codebeat.co/projects/github-com-benbusby-shoogle-master) [![Docker Pulls](https://img.shields.io/docker/pulls/benbusby/whoogle-search)](https://hub.docker.com/r/benbusby/whoogle-search) +[![Gitter](https://img.shields.io/gitter/room/benbusby/whoogle-search)](https://gitter.im/whoogle-search/community) Get Google search results, but without any ads, javascript, AMP links, cookies, or IP address tracking. Easily deployable in one click as a Docker app, and customizable with a single config file. Quick and simple to implement as a primary search engine replacement on both desktop and mobile. @@ -21,7 +22,7 @@ Contents - No ads or sponsored content - No javascript - No cookies -- No tracking/linking of your personal IP address +- No tracking/linking of your personal IP address\* - No AMP links - No URL tracking tags (i.e. utm=%s) - No referrer header @@ -34,6 +35,8 @@ Contents - Optional location-based searching (i.e. results near \) - Optional NoJS mode to disable all Javascript in results +*If deployed to a remote server + ## Dependencies If using Heroku Quick Deploy, **you can skip this section**. @@ -55,19 +58,28 @@ There are a few different ways to begin using the app, depending on your prefere Provides: - Free deployment of app -- Free https url (https://\.herokuapp.com) +- Free HTTPS url (https://\.herokuapp.com) - Downtime after periods of inactivity \([solution](https://github.com/benbusby/whoogle-search#prevent-downtime-heroku-only)\) -### B) [pipx](https://github.com/pipxproject/pipx#install-pipx) +### B) [Repl.it](https://repl.it) +[![Run on Repl.it](https://repl.it/badge/github/benbusby/whoogle-search)](https://repl.it/github/benbusby/whoogle-search) + +Provides: +- Free deployment of app (can be ran without account) +- Free HTTPS url (https://\.\\.repl\.co) + - Supports custom domains +- Downtime after periods of inactivity \([solution 1](https://repl.it/talk/ask/use-this-pingmat1replco-just-enter/28821/101298), [solution 2](https://repl.it/talk/learn/How-to-use-and-setup-UptimeRobot/9003)\) + +### C) [pipx](https://github.com/pipxproject/pipx#install-pipx) Persistent install: `pipx install git+https://github.com/benbusby/whoogle-search.git` Sandboxed temporary instance: -`pipx run git+https://github.com/benbusby/whoogle-search.git whoogle-search` +`pipx run --spec git+https://github.com/benbusby/whoogle-search.git whoogle-search` -### C) pip +### D) pip `pip install whoogle-search` ```bash @@ -85,7 +97,7 @@ optional arguments: --https-only Enforces HTTPS redirects for all requests (default False) ``` -### D) Manual +### E) Manual Clone the repo and run the following commands to start the app in a local-only environment: ```bash @@ -124,7 +136,7 @@ sudo systemctl enable whoogle sudo systemctl start whoogle ``` -### E) Manual (Docker) +### F) Manual (Docker) 1. Ensure the Docker daemon is running, and is accessible by your user account - To add user permissions, you can execute `sudo usermod -aG docker yourusername` - Running `docker ps` should return something besides an error. If you encounter an error saying the daemon isn't running, try `sudo systemctl start docker` (Linux) or ensure the docker tool is running (Windows/macOS). @@ -194,15 +206,23 @@ Update browser settings: - Firefox (iOS) - In the mobile app Settings page, tap "Search" within the "General" section. There should be an option titled "Add Search Engine" to select. It should prompt you to enter a title and search query url - use the following elements to fill out the form: - Title: "Whoogle" - - URL: "https://\/search?q=%s" + - URL: `http[s]://\/search?q=%s` - Firefox (Android) - - Navigate to your app's url - - Long-press on the search text field - - Click the "Add Search Engine" menu item - - Select a name and click ok - - Click the 3 dot menu in the top right - - Navigate to the settings menu and select the "search" sub-menu - - Select Whoogle and press "Set as default" + - Version <79.0.0 + - Navigate to your app's url + - Long-press on the search text field + - Click the "Add Search Engine" menu item + - Select a name and click ok + - Click the 3 dot menu in the top right + - Navigate to the settings menu and select the "Search" sub-menu + - Select Whoogle and press "Set as default" + - Version >=79.0.0 + - Click the 3 dot menu in the top right + - Navigate to the settings menu and select the "Search" sub-menu + - Click "Add search engine" + - Select the 'Other' radio button + - Name: "Whoogle" + - Search string to use: `https://\/search?q=%s` - [Alfred](https://www.alfredapp.com/) (Mac OS X) 1. Go to `Alfred Preferences` > `Features` > `Web Search` and click `Add Custom Search`. Then configure these settings - Search URL: `https://\/search?q={query} diff --git a/app/__init__.py b/app/__init__.py index 22e436d..8293c44 100644 --- a/app/__init__.py +++ b/app/__init__.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from flask import Flask from flask_session import Session import os @@ -9,7 +9,7 @@ app.default_key_set = generate_user_keys() app.no_cookie_ips = [] app.config['SECRET_KEY'] = os.urandom(32) app.config['SESSION_TYPE'] = 'filesystem' -app.config['VERSION_NUMBER'] = '0.2.0' +app.config['VERSION_NUMBER'] = '0.2.1' app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) app.config['CONFIG_PATH'] = os.getenv('CONFIG_VOLUME', os.path.join(app.config['STATIC_FOLDER'], 'config')) diff --git a/app/filter.py b/app/filter.py index 1cc9f87..e56dc67 100644 --- a/app/filter.py +++ b/app/filter.py @@ -1,56 +1,11 @@ from app.request import VALID_PARAMS -from app.utils.misc import BLACKLIST -from bs4 import BeautifulSoup +from app.utils.filter_utils import * from bs4.element import ResultSet from cryptography.fernet import Fernet import re import urllib.parse as urlparse from urllib.parse import parse_qs -SKIP_ARGS = ['ref_src', 'utm'] -FULL_RES_IMG = '
Full Image' -GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' -LOGO_URL = GOOG_IMG + '_desk' -BLANK_B64 = ''' -data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC -''' - - -def get_first_link(soup): - # Replace hrefs with only the intended destination (no "utm" type tags) - for a in soup.find_all('a', href=True): - # Return the first search result URL - if 'url?q=' in a['href']: - return filter_link_args(a['href']) - - -def filter_link_args(query_link): - parsed_link = urlparse.urlparse(query_link) - link_args = parse_qs(parsed_link.query) - safe_args = {} - - if len(link_args) == 0 and len(parsed_link) > 0: - return query_link - - for arg in link_args.keys(): - if arg in SKIP_ARGS: - continue - - safe_args[arg] = link_args[arg] - - # Remove original link query and replace with filtered args - query_link = query_link.replace(parsed_link.query, '') - if len(safe_args) > 0: - query_link = query_link + urlparse.urlencode(safe_args, doseq=True) - else: - query_link = query_link.replace('?', '') - - return query_link - - -def has_ad_content(element: str): - return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element - class Filter: def __init__(self, user_keys: dict, mobile=False, config=None): @@ -61,6 +16,7 @@ class Filter: self.dark = config['dark'] if 'dark' in config else False self.nojs = config['nojs'] if 'nojs' in config else False self.new_tab = config['new_tab'] if 'new_tab' in config else False + self.alt_redirect = config['alts'] if 'alts' in config else False self.mobile = mobile self.user_keys = user_keys self.main_divs = ResultSet('') @@ -188,18 +144,6 @@ class Filter: except AttributeError: pass - # Set up dark mode if active - if self.dark: - soup.find('html')['style'] = 'scrollbar-color: #333 #111;color:#fff !important;background:#000 !important' - for input_element in soup.findAll('input'): - input_element['style'] = 'color:#fff;background:#000;' - - for span_element in soup.findAll('span'): - span_element['style'] = 'color: white;' - - for href_element in soup.findAll('a'): - href_element['style'] = 'color: white' if href_element['href'].startswith('/search') else '' - def update_link(self, link): # Replace href with only the intended destination (no "utm" type tags) href = link['href'].replace('https://www.google.com', '') @@ -213,8 +157,12 @@ class Filter: query_link = parse_qs(result_link.query)['q'][0] if '?q=' in href else '' if query_link.startswith('/'): + # Internal google links (i.e. mail, maps, etc) should still be forwarded to Google link['href'] = 'https://google.com' + query_link elif '/search?q=' in href: + # "li:1" implies the query should be interpreted verbatim, so we wrap it in double quotes + if 'li:1' in href: + query_link = '"' + query_link + '"' new_search = '/search?q=' + self.encrypt_path(query_link) query_params = parse_qs(urlparse.urlparse(href).query) @@ -232,11 +180,13 @@ class Filter: else: link['href'] = href + # Replace link location if "alts" config is enabled + if self.alt_redirect: + # Search and replace all link descriptions with alternative location + link['href'] = get_site_alt(link['href']) + link_desc = link.find_all(text=re.compile('|'.join(SITE_ALTS.keys()))) + if len(link_desc) == 0: + return -def gen_nojs(sibling): - nojs_link = BeautifulSoup().new_tag('a') - nojs_link['href'] = '/window?location=' + sibling['href'] - nojs_link['style'] = 'display:block;width:100%;' - nojs_link.string = 'NoJS Link: ' + nojs_link['href'] - sibling.append(BeautifulSoup('


', 'html.parser')) - sibling.append(nojs_link) + # Replace link destination + link_desc[0].replace_with(get_site_alt(link_desc[0])) diff --git a/app/models/config.py b/app/models/config.py index 45b1b65..2fb4088 100644 --- a/app/models/config.py +++ b/app/models/config.py @@ -2,7 +2,7 @@ class Config: # Derived from here: # https://sites.google.com/site/tomihasa/google-language-codes#searchlanguage LANGUAGES = [ - {'name': 'Default (use server location)', 'value': ''}, + {'name': 'Default (none specified)', 'value': ''}, {'name': 'English', 'value': 'lang_en'}, {'name': 'Afrikaans', 'value': 'lang_af'}, {'name': 'Arabic', 'value': 'lang_ar'}, @@ -52,7 +52,7 @@ class Config: ] COUNTRIES = [ - {'name': 'Default (use server location)', 'value': ''}, + {'name': 'Default (none)', 'value': ''}, {'name': 'Afghanistan', 'value': 'countryAF'}, {'name': 'Albania', 'value': 'countryAL'}, {'name': 'Algeria', 'value': 'countryDZ'}, @@ -306,6 +306,7 @@ class Config: self.dark = False self.nojs = False self.near = '' + self.alts = False self.new_tab = False self.get_only = False diff --git a/app/request.py b/app/request.py index 192eedc..4abb9b3 100644 --- a/app/request.py +++ b/app/request.py @@ -12,7 +12,7 @@ MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' # Valid query params -VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source'] +VALID_PARAMS = ['tbs', 'tbm', 'start', 'near', 'source', 'nfpr'] def gen_user_agent(is_mobile): @@ -68,6 +68,10 @@ def gen_query(query, args, config, near_city=None): else: param_dict['lr'] = ('&lr=' + config.lang_search) if config.lang_search else '' + # Set autocorrected search ignore + if 'nfpr' in args: + param_dict['nfpr'] = '&nfpr=' + args.get('nfpr') + param_dict['cr'] = ('&cr=' + config.ctry) if config.ctry else '' param_dict['hl'] = ('&hl=' + config.lang_interface.replace('lang_', '')) if config.lang_interface else '' param_dict['safe'] = '&safe=' + ('active' if config.safe else 'off') diff --git a/app/routes.py b/app/routes.py index f4f3bbf..198dc4f 100644 --- a/app/routes.py +++ b/app/routes.py @@ -15,7 +15,7 @@ from requests import exceptions from app import app from app.models.config import Config from app.request import Request -from app.utils.misc import valid_user_session +from app.utils.session_utils import valid_user_session from app.utils.routing_utils import * @@ -115,12 +115,11 @@ def opensearch(): if opensearch_url.endswith('/'): opensearch_url = opensearch_url[:-1] - template = render_template('opensearch.xml', - main_url=opensearch_url, - request_type='get' if g.user_config.get_only else 'post') - response = make_response(template) - response.headers['Content-Type'] = 'application/xml' - return response + return render_template( + 'opensearch.xml', + main_url=opensearch_url, + request_type='' if g.user_config.get_only else 'method="post"' + ), 200, {'Content-Disposition': 'attachment; filename="opensearch.xml"'} @app.route('/autocomplete', methods=['GET', 'POST']) diff --git a/app/static/css/dark-theme.css b/app/static/css/dark-theme.css new file mode 100644 index 0000000..36cfada --- /dev/null +++ b/app/static/css/dark-theme.css @@ -0,0 +1,42 @@ +html { + background-color: #000 !important; +} + +body { + background-color: #222 !important; +} + +div { + /*background-color: #111 !important;*/ + color: #fff !important; +} + +a:visited h3 div { + color: #bbbbff !important; +} + +a:link h3 div { + color: #4b8eea !important; +} + +a:link div { + color: #aaffaa !important; +} + +div span { + color: #bbb !important; +} + +input { + background-color: #111 !important; + color: #fff !important; +} + +#search-bar { + color: #fff !important; + background-color: #000 !important; +} + +.search-container { + background-color: #000 !important; +} diff --git a/app/static/css/main.css b/app/static/css/main.css index ef4b557..5b35bf6 100644 --- a/app/static/css/main.css +++ b/app/static/css/main.css @@ -16,6 +16,7 @@ body { left: 50%; transform: translate(-50%, -50%); max-width: 600px; + z-index: 15; } .search-items { @@ -34,10 +35,10 @@ body { color: #685e79; border-radius: 10px 10px 0 0; max-width: 600px; - background: rgba(0,0,0,0); + background: rgba(0, 0, 0, 0); } -#search-bar:focus{ +#search-bar:focus { color: #685e79; } @@ -45,7 +46,7 @@ body { width: 100%; height: 40px; border: 1px solid #685e79; - background: #685e79; + background: #685e79 !important; text-align: center; color: #fff; cursor: pointer; @@ -68,7 +69,7 @@ button::-moz-focus-inner { .collapsible { outline: 0; - background-color: rgba(0,0,0,0); + background-color: rgba(0, 0, 0, 0); color: #685e79; cursor: pointer; padding: 18px; @@ -127,5 +128,10 @@ footer { bottom: 0%; text-align: center; width: 100%; - z-index: -1; + z-index: 10; +} + +.info-text { + font-style: italic; + font-size: 12px; } diff --git a/app/static/js/autocomplete.js b/app/static/js/autocomplete.js index 84e9b23..3d179ca 100644 --- a/app/static/js/autocomplete.js +++ b/app/static/js/autocomplete.js @@ -2,7 +2,7 @@ const handleUserInput = searchBar => { let xhrRequest = new XMLHttpRequest(); xhrRequest.open("POST", "/autocomplete"); xhrRequest.setRequestHeader("Content-type", "application/x-www-form-urlencoded"); - xhrRequest.onload = function() { + xhrRequest.onload = function () { if (xhrRequest.readyState === 4 && xhrRequest.status !== 200) { // Do nothing if failed to fetch autocomplete results return; @@ -18,6 +18,7 @@ const handleUserInput = searchBar => { const autocomplete = (searchInput, autocompleteResults) => { let currentFocus; + let originalSearch; searchInput.addEventListener("input", function () { let autocompleteList, autocompleteItem, i, val = this.value; @@ -53,9 +54,11 @@ const autocomplete = (searchInput, autocompleteResults) => { let suggestion = document.getElementById(this.id + "-autocomplete-list"); if (suggestion) suggestion = suggestion.getElementsByTagName("div"); if (e.keyCode === 40) { // down + e.preventDefault(); currentFocus++; addActive(suggestion); } else if (e.keyCode === 38) { //up + e.preventDefault(); currentFocus--; addActive(suggestion); } else if (e.keyCode === 13) { // enter @@ -63,17 +66,36 @@ const autocomplete = (searchInput, autocompleteResults) => { if (currentFocus > -1) { if (suggestion) suggestion[currentFocus].click(); } + } else { + originalSearch = document.getElementById("search-bar").value; } }); const addActive = suggestion => { - if (!suggestion || !suggestion[currentFocus]) return false; + let searchBar = document.getElementById("search-bar"); + + // Handle navigation outside of suggestion list + if (!suggestion || !suggestion[currentFocus]) { + if (currentFocus >= suggestion.length) { + // Move selection back to the beginning + currentFocus = 0; + } else if (currentFocus < 0) { + // Retrieve original search and remove active suggestion selection + currentFocus = -1; + searchBar.value = originalSearch; + removeActive(suggestion); + return; + } else { + return; + } + } + removeActive(suggestion); - - if (currentFocus >= suggestion.length) currentFocus = 0; - if (currentFocus < 0) currentFocus = (suggestion.length - 1); - suggestion[currentFocus].classList.add("autocomplete-active"); + + // Autofill search bar with suggestion content + searchBar.value = suggestion[currentFocus].textContent; + searchBar.focus(); }; const removeActive = suggestion => { diff --git a/app/static/js/controller.js b/app/static/js/controller.js index 95d917b..156a84d 100644 --- a/app/static/js/controller.js +++ b/app/static/js/controller.js @@ -1,3 +1,14 @@ +// Whoogle configurations that use boolean values and checkboxes +CONFIG_BOOLS = [ + "nojs", "dark", "safe", "alts", "new_tab", "get_only" +]; + +// Whoogle configurations that use string values and input fields +CONFIG_STRS = [ + "near", "url" +]; + + const setupSearchLayout = () => { // Setup search field const searchBar = document.getElementById("search-bar"); @@ -18,15 +29,6 @@ const setupSearchLayout = () => { }; const fillConfigValues = () => { - // Establish all config value elements - const near = document.getElementById("config-near"); - const noJS = document.getElementById("config-nojs"); - const dark = document.getElementById("config-dark"); - const safe = document.getElementById("config-safe"); - const url = document.getElementById("config-url"); - const newTab = document.getElementById("config-new-tab"); - const getOnly = document.getElementById("config-get-only"); - // Request existing config info let xhrGET = new XMLHttpRequest(); xhrGET.open("GET", "/config"); @@ -39,15 +41,15 @@ const fillConfigValues = () => { // Allow for updating/saving config values let configSettings = JSON.parse(xhrGET.responseText); - near.value = configSettings["near"] ? configSettings["near"] : ""; - noJS.checked = !!configSettings["nojs"]; - dark.checked = !!configSettings["dark"]; - safe.checked = !!configSettings["safe"]; - getOnly.checked = !!configSettings["get_only"]; - newTab.checked = !!configSettings["new_tab"]; + CONFIG_STRS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.value = configSettings[item] ? configSettings[item] : ""; + }); - // Addresses the issue of incorrect URL being used behind reverse proxy - url.value = configSettings["url"] ? configSettings["url"] : ""; + CONFIG_BOOLS.forEach(function(item) { + let configElement = document.getElementById("config-" + item.replace("_", "-")); + configElement.checked = !!configSettings[item]; + }); }; xhrGET.send(); @@ -113,4 +115,8 @@ document.addEventListener("DOMContentLoaded", function() { setupSearchLayout(); setupConfigLayout(); + + // Focusing on the search input field requires a delay for elements to finish + // loading (seemingly only on FF) + setTimeout(function() { document.getElementById("search-bar").focus(); }, 250); }); diff --git a/app/templates/display.html b/app/templates/display.html index bd18838..6a8a609 100644 --- a/app/templates/display.html +++ b/app/templates/display.html @@ -8,6 +8,9 @@ + {% if dark_mode %} + + {% endif %} {{ query }} - Whoogle Search diff --git a/app/templates/index.html b/app/templates/index.html index a541413..4980316 100644 --- a/app/templates/index.html +++ b/app/templates/index.html @@ -23,6 +23,9 @@ + {% if config.dark %} + + {% endif %} Whoogle Search @@ -31,7 +34,7 @@
- +
@@ -42,7 +45,7 @@
- + +
— Note: If enabled, a website will only appear in the results if it is *hosted* in the selected country.
@@ -96,6 +100,12 @@
+
+ + +
— Replaces Twitter/YouTube/Instagram links + with Nitter/Invidious/Bibliogram links.
+
diff --git a/app/templates/opensearch.xml b/app/templates/opensearch.xml index b737be7..8e2e7b2 100644 --- a/app/templates/opensearch.xml +++ b/app/templates/opensearch.xml @@ -1,13 +1,14 @@ + Whoogle Whoogle: A lightweight, deployable Google search proxy for desktop/mobile that removes Javascript, AMP links, and ads UTF-8 - /static/img/favicon/favicon-32x32.png - + data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAADwAAAA8CAYAAAA6/NlyAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAAgY0hSTQAAeiYAAICEAAD6AAAAgOgAAHUwAADqYAAAOpgAABdwnLpRPAAAAAZiS0dEAP8A/wD/oL2nkwAAAAlwSFlzAAAASAAAAEgARslrPgAADS9JREFUaN7Nm2lsXNd1x3/nzQxnOMNdJEeUuYjUZu2Lh7IrWZZsy46btN7ioEgRSHZapEybLgHStPDSAqntNg7yIahdyAjgJg4aB66tJDBsy4ocW5QrWzYtUQslUdx3cV9nONt7tx/ekNSQQ84MOTLzB0hg3nbv/55zzzn33HOFm4Sqx58GEcEwHIALpXIQcaNUFmCJPKYQ8aJULyJDgBcRH4ZhHHn1+ZvSL0kZwX/9KTQ3g6bZgFKU2ghsBnYDxUAOkAnYAW2aMAQBLzAK9ALngLPANUQa8Psncbk48soP/jAIV33rBzA8DE5nHkp9CXggQnI14FjkZ8NAT4T4B8BvsNnaMQx15L//bXkIVz3+NFgsQihUDDwEPALcDriWOoizEAIuAb9B5KiIXFFK6Ud+/twXR7jq0JMAuRGSfwXsBGwpJjobBtACvAr8Ap+vhcxMkpV4UoSrDj8FIhpK7UKpfwK+DDhvMtHZ0IEziPwQOAYEk5F2QoSr/u4FGBkBkUyUOgz8A7DmCyY6G4PATxF5EcPowmLhyM+ejfuSJf53wbOhEqAA+Hfge0DhMpMFU7P2AFsQqUWpPs/2fdScP7U0wlWHngSRMuA54DCmW/lDgQBrgW3ANUpLOzwVO6iprV7whfhklXoR+Eq856eglPlPoeZvWASRlIUBAHWIfBvDOLWQes/bYsQSFwAvYEo2bu+UoUAg3elgRX4uK1cVkJuXjcNhR0QIhcJMjHvp7emn9/ogE+NewuEwmqbF+3Si+Bj4C5S6Mh/pmCSqDj8FkIlSLwB/CVgXJKoUIsLKogI2bV3Htp0bcRflk5HhxGqLftUwDPyTAUaGx7h2tYXaz+toberE7w+kSuLHEPlrTBfGbAs+p4Wqw0+B1SqEQn+DKd30eGRdGU7uPOBh710eCtwrkuq4zzvJlUuNHH+nmraWrlSRfgX4W8A3OyafY7Q82/eBUh7gR4A7Htni0iK+9udf4c4DlWRlZyTdYVuajVXFbtbdWk4oFKanqw9dN5ZKfC3Qgq5f8Ow6EGW5owjfEEH9B7BvoS8ahqJibQnf+ObDbNi0Bosl9jwMh3UCgSChYAgFaJoWk0xGposNGyswDIPWli4MXbEEznZgPZr2MXD9RsLTn6x64hkIBsFi+SbwIguoslKK4pIivn74QdasL5tzPxAI0t7aRXNDO91dfYyOjKHrBi5XOgWFeZRVFLN2XRnZuVlzyPt8kxz91TE+PvU5hjG/lU8Q/4WmfRcIThmwGYui62CxlADfikc2MyuDh752/xyyhmHQ0tjB74+fpv5KMxPjXpSa22m7PQ13UQF793vYvWc7TudMc05nOg9+9T7Gx7ycP3t5qar9VQzjdeBk1aEnOfLq86ZKVz3+NBgGiBwCnmBmvRoTB+69nbvu3o1oM53RdZ1TH3zG//7P2zQ1tBEKhqZ97ew/wzAYGR6n/koTA33DlK5ehdM1Q9ruSCNvRTaXLzbgn1yS9c4ADDTtXcCoOX8qQkwp0LQ8zNXPvC5IKcXKogLuuHMX2g1zVinFJx+d462jJxgaHJ13nt4ITRP0sM5nn5zn6K+OMToyHnV/dUUxO27btBTpTuEgSm2e+mGpeuIZU53hYaAKSJvvTRHhnvv3sLNycxShK5caefO1dxkbGUfTkpOGiHC9ux/RhPW3lk8HIZqmkZ7uSIWUs4FOLSPj1G2b7kDDMKbSMg9gqkBMKKVwudJZv7E8qvHJST8fnviYwYHhKBVPBqaGnOXa1Zao6yWrV7FmfVlMO5Ak/tjweotgZq6WApXxOlVYVMDKouiFUktjBw31rYgsPjwUEUZHxqk5cwE9rE9fT0uzUV5RkorQcwewA6XQUIpIwq083lv5Bbk4XdFpqsZrbUz6/EvxmdOkG6+1MTIyFnXdXVSA3Z4GSxOyC6V2EQ6jqXAYzOziggk3ESErOyNqtM3IqDcVKgfA+NgEY6MTUddy87KwO+wLrrwSxG5sNpcmNls6ZpYxLtLTo8dED+tzOrhYiJhWO+APRLfpdGCzJZSniIe1QL6GmWUsTuSNOZIUUr2mZfbcUIYiRQqUCxRrKJUb+RGHLUz6/FGXrFYruXlZKemNUgpbmhWnM1qLvN5JQqFwKprIQKlSDZFCFnBHM3wVw0Oj6PqMFbVaLZSUrUqJlJWCvBU5ZOdED+DgwDD+SX8q2nAgcosW2etJKE812D/MxLgv6tqadWVkZrlSYLgU6zaUk5UVPfa9PQMEg6ElDyhgQ6lcDXOJGNfRaaLR1ztId2dv1PXS1avYvG39ktyGUopCdz6Vf7QtKniZ9PlpbmhLmRcALIl7dDGjqrqL19B1Y2bY0mzcfd8eVpW4MQwj4c/dSNZms7L/3tspK4+2nU2N7TQ3daTUMGqYskl4CGs+uUhrc0fUtbLyW3j0zx6gwL0iKdJKKaxWC/vu3s3eA54oYsFAiE9P1zIx7kslYaUhMoG5ZRkXIsLw0CgnT3xCIBD9yuZt6/nGE4+wZp0Z+6oFFu9KKQzDICPTyZ88cpA/ffTgHB9/6UI9F89dTSVZHRizolQ/5v5sQhCBC+eusvHTi9xx523TblNEuHXzGvLyc/jow884//llhoZGCU4NjALEXAVlZWVQVlHMXffsZuOWtVitc1ek3Z29kUxmqvjiB7qtiAyi1GjihAW/P8BbR9/H7rCzq3JL1P1C9woefux+9t29m7bmTrq7+vBO+DAMA4fDTl5+Dqsriim6pRCHY37nsHe/h9bmTi7W1ie95JwHPkS6rJjS7U3mTRFhcGCEX7/+HjablS3bN0SpnmbRKCjMo6Awb/qaUiQlrdy8bB567D4mxr20NnUueul5A0aADkvl7oMhdH0TcbKUsUh7J3w01reaBN0rSEuzLfB88j3MzslkZVEBTQ1tTEws2XjVIvKKxbNlD5ih5aMk4I9nk56c9FN/uZn21i5cGU5y8rKxWBIL9pVS9PYMMDoyTlZ27GAvLz+HrOwM6i83E4zkyRaJ13E43rF4tu8DM4/1IGY6JCmICEop+q4PcrWukbbmTgKBoLmMFDN3NZXj0nWdgD/IyPAYrc0dnHz/DO/+9gPqLjRQvqaYrOzMmG0UuvPRRGht6iQcDi+GdBB4iXC4zhrRtQaU+hwoWezwaZowPublXE0dF8/Xk+Eypb0iPwdXhhNN0wj4zT2lwYFhxsa85lIwYr3feO1dvn7oQdxF+XO+bbVauOdLe/H7Axx/+9RiApxriJwFsHh27gfDCCNSBNxLgpvk80l7SuL+CLmerl7amrtoae6ks6OHgf4hfD4/RmQ7ZepvoG8I74SX9RsrYtoCi0WjuGQlA31DdHf1JivlX6JpbwCGpaa2Gs+Ou8AsE7oHKFos4VjkRQTRBE2Lzk3Hev56zwCGbrB2/Wos1rnjnmZPo2z1LfT1DtJ3fSBR0oPAsyjVhIgpTc+uA6jR0VGx293AAVJYsJYMDMOgq+M6TqeD0vLimP7X6UqnpGwVLU0djAyPJUL6OCL/iUjoyM+fMwnX1FZTufsgiAwC9wN58b5yMyAihMM67W095OZlsap4ZUx3lpnlIic3m4b61nhr5QngWUyXRM35UzPz1bNzP2K19mMYOcB+lknKIkLAH6S9rZvi0pXkF8Qe+0L3CjIznTQ3thPwB+cj/Q4iP0YkMLUxPk24prYaz9a9AO2YSb1FW+xUkPZ6J+nrHaSsojimjxYRClfm09PdT1fH9ViEe4F/AS7Z3W7OnD4GzA40LBYwjJZI0dfgchEG0801N7bz5mvvMDgwPOd+wB+k+vdnqL/chMxVxhDwEpr2O0T4yY/+fobijU/V1Fbj2bkfoBkzCNnDMqk2mFIc6B/G651kw8YKbJF6EZ/Pz9u/fp/33q7G552MJd33gGdQajxuyUPN+VN4tu8zEKkHtmDmc5cVvT39WKwWVlcUMzo8zltv/o6PPqwhHIoZddUh8o+I1E8ZqgUJA1TuOgC6PoZILbAVKGMZoesG7S1d+LyTfHSyhnM1ddOVQ7PQBnxn5I03Tjq2bk28bAmm6z0A9gIvY27HLDvmIQowgMj3gZ+hlJqvon7e1dGRV58HTYOXn/s/RL6NWfS17JiHbDsi30fkF4iohY4PLBg319RW4+nSQal24DRmBe0altGQxUAd8B3gKEoZ8UqJ4y4Uamqrqdx1AKXrA6Jpp4EsTEO23EWmIcyw8Xtit1ejlEqkbjq5AnFzXrswkwX/DKSkCGMR6AVeAo4A/RCZggkgadW84azDDsw6zMf44uqnJ4D3gZfRtBMoFUr27MPiD3mYxwHsGMYdwCHgIGY4ejPm9xBQDfwSkeNK10cdbjc/+fF3k/7Q0o/xPP40mBtVm1Hqy5jnIHaw9NMtQaABOAH8FpFPEfFiGAmr700hPE185gCIG7OA5DbMQpm1mEnCDMyyCuusdnXMJLkPM5XaDnwGnEXkLCLti1Hdm044irwpdUEpJ5APFKNUKSK3RDbgZ47iwRjQjUgX0AFcR2QcMBI5tJEs/h/GMBxGKn9DKwAAACV0RVh0ZGF0ZTpjcmVhdGUAMjAyMC0wNC0xMlQyMDoyMDo0OSswMDowME0is3UAAAAldEVYdGRhdGU6bW9kaWZ5ADIwMjAtMDQtMTJUMjA6MjA6NDkrMDA6MDA8fwvJAAAARnRFWHRzb2Z0d2FyZQBJbWFnZU1hZ2ljayA2LjcuOC05IDIwMTQtMDUtMTIgUTE2IGh0dHA6Ly93d3cuaW1hZ2VtYWdpY2sub3Jn3IbtAAAAABh0RVh0VGh1bWI6OkRvY3VtZW50OjpQYWdlcwAxp/+7LwAAABh0RVh0VGh1bWI6OkltYWdlOjpoZWlnaHQAMTkyDwByhQAAABd0RVh0VGh1bWI6OkltYWdlOjpXaWR0aAAxOTLTrCEIAAAAGXRFWHRUaHVtYjo6TWltZXR5cGUAaW1hZ2UvcG5nP7JWTgAAABd0RVh0VGh1bWI6Ok1UaW1lADE1ODY3MjI4NDlV2OpiAAAAD3RFWHRUaHVtYjo6U2l6ZQAwQkKUoj7sAAAAVnRFWHRUaHVtYjo6VVJJAGZpbGU6Ly8vbW50bG9nL2Zhdmljb25zLzIwMjAtMDQtMTIvNTdhMDYyNGFhNzAyYzk3ZWU1YTE5MjgwYWEwNTkwZDMuaWNvLnBuZ1EXWHMAAAAASUVORK5CYII= + - + {{ main_url }}/search diff --git a/app/utils/filter_utils.py b/app/utils/filter_utils.py new file mode 100644 index 0000000..7f9e9a5 --- /dev/null +++ b/app/utils/filter_utils.py @@ -0,0 +1,79 @@ +from bs4 import BeautifulSoup +import urllib.parse as urlparse +from urllib.parse import parse_qs + +SKIP_ARGS = ['ref_src', 'utm'] +FULL_RES_IMG = '
Full Image' +GOOG_IMG = '/images/branding/searchlogo/1x/googlelogo' +LOGO_URL = GOOG_IMG + '_desk' +BLANK_B64 = ''' +data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkwAIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC +''' + +BLACKLIST = [ + 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', + 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', + 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' +] + +SITE_ALTS = { + 'twitter.com': 'nitter.net', + 'youtube.com': 'invidiou.site', + 'instagram.com': 'bibliogram.art/u' +} + + +def has_ad_content(element: str): + return element.upper() in (value.upper() for value in BLACKLIST) or 'ⓘ' in element + + +def get_first_link(soup): + # Replace hrefs with only the intended destination (no "utm" type tags) + for a in soup.find_all('a', href=True): + # Return the first search result URL + if 'url?q=' in a['href']: + return filter_link_args(a['href']) + + +def get_site_alt(link: str): + for site_key in SITE_ALTS.keys(): + if site_key not in link: + continue + + link = link.replace(site_key, SITE_ALTS[site_key]) + break + + return link + + +def filter_link_args(query_link): + parsed_link = urlparse.urlparse(query_link) + link_args = parse_qs(parsed_link.query) + safe_args = {} + + if len(link_args) == 0 and len(parsed_link) > 0: + return query_link + + for arg in link_args.keys(): + if arg in SKIP_ARGS: + continue + + safe_args[arg] = link_args[arg] + + # Remove original link query and replace with filtered args + query_link = query_link.replace(parsed_link.query, '') + if len(safe_args) > 0: + query_link = query_link + urlparse.urlencode(safe_args, doseq=True) + else: + query_link = query_link.replace('?', '') + + return query_link + + +def gen_nojs(sibling): + nojs_link = BeautifulSoup().new_tag('a') + nojs_link['href'] = '/window?location=' + sibling['href'] + nojs_link['style'] = 'display:block;width:100%;' + nojs_link.string = 'NoJS Link: ' + nojs_link['href'] + sibling.append(BeautifulSoup('


', 'html.parser')) + sibling.append(nojs_link) \ No newline at end of file diff --git a/app/utils/routing_utils.py b/app/utils/routing_utils.py index deb0444..3dac09a 100644 --- a/app/utils/routing_utils.py +++ b/app/utils/routing_utils.py @@ -1,5 +1,5 @@ from app.filter import Filter, get_first_link -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from app.request import gen_query from bs4 import BeautifulSoup from cryptography.fernet import Fernet, InvalidToken diff --git a/app/utils/misc.py b/app/utils/session_utils.py similarity index 62% rename from app/utils/misc.py rename to app/utils/session_utils.py index b87941d..f959abe 100644 --- a/app/utils/misc.py +++ b/app/utils/session_utils.py @@ -2,11 +2,6 @@ from cryptography.fernet import Fernet from flask import current_app as app REQUIRED_SESSION_VALUES = ['uuid', 'config', 'fernet_keys'] -BLACKLIST = [ - 'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama', 'Реклама', 'Anunț', '광고', - 'annons', 'Annonse', 'Iklan', '広告', 'Augl.', 'Mainos', 'Advertentie', 'إعلان', 'Գովազդ', 'विज्ञापन', 'Reklam', - 'آگهی', 'Reklāma', 'Reklaam', 'Διαφήμιση', 'מודעה', 'Hirdetés' -] def generate_user_keys(cookies_disabled=False) -> dict: diff --git a/setup.py b/setup.py index 08652bc..b2cddd1 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ setuptools.setup( author='Ben Busby', author_email='benbusby@protonmail.com', name='whoogle-search', - version='0.2.0', + version='0.2.1', include_package_data=True, install_requires=requirements, description='Self-hosted, ad-free, privacy-respecting Google metasearch engine', diff --git a/test/conftest.py b/test/conftest.py index 63aec3e..7a15f00 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -1,5 +1,5 @@ from app import app -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys import pytest diff --git a/test/test_misc.py b/test/test_misc.py index 8eb1d78..92fcadb 100644 --- a/test/test_misc.py +++ b/test/test_misc.py @@ -1,4 +1,4 @@ -from app.utils.misc import generate_user_keys, valid_user_session +from app.utils.session_utils import generate_user_keys, valid_user_session def test_generate_user_keys(): diff --git a/test/test_results.py b/test/test_results.py index a943de6..a7aa771 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup from app.filter import Filter -from app.utils.misc import generate_user_keys +from app.utils.session_utils import generate_user_keys from datetime import datetime from dateutil.parser import * @@ -55,7 +55,7 @@ def test_recent_results(client): result_divs = get_search_results(rv.data) current_date = datetime.now() - for div in result_divs: + for div in [_ for _ in result_divs if _.find('span')]: date_span = div.find('span').decode_contents() if not date_span or len(date_span) > 15 or len(date_span) < 7: continue