diff --git a/app/filter.py b/app/filter.py index 64360f0..4958efe 100644 --- a/app/filter.py +++ b/app/filter.py @@ -14,6 +14,9 @@ class Filter: self.nojs = config['nojs'] if 'nojs' in config else False self.mobile = mobile + def __getitem__(self, name): + return getattr(self, name) + def reskin(self, page): # Aesthetic only re-skinning page = page.replace('>G<', '>Sh<') @@ -24,34 +27,6 @@ class Filter: return page - def gen_query(self, q, args): - # Use :past(hour/day/week/month/year) if available - # example search "new restaurants :past month" - tbs = '' - if ':past' in q: - time_range = str.strip(q.split(':past', 1)[-1]) - tbs = '&tbs=qdr:' + str.lower(time_range[0]) - - # Ensure search query is parsable - q = urlparse.quote(q) - - # Pass along type of results (news, images, books, etc) - tbm = '' - if 'tbm' in args: - tbm = '&tbm=' + args.get('tbm') - - # Get results page start value (10 per page, ie page 2 start val = 20) - start = '' - if 'start' in args: - start = '&start=' + args.get('start') - - # Grab city from config, if available - near = '' - if self.near: - near = '&near=' + urlparse.quote(self.near) - - return q + tbs + tbm + start + near - def clean(self, soup): # Remove all ads main_divs = soup.find('div', {'id': 'main'}) @@ -76,6 +51,10 @@ class Filter: if logo and self.mobile: logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;' + # Fix search bar length on mobile + search_bar = soup.find('header').find('form').find('div') + search_bar['style'] = 'width: 100%;' + # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): href = a['href'] diff --git a/app/request.py b/app/request.py new file mode 100644 index 0000000..6325def --- /dev/null +++ b/app/request.py @@ -0,0 +1,76 @@ +from app import rhyme +from app.filter import Filter +from io import BytesIO +import pycurl +import urllib.parse as urlparse + +# Base search url +SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' + +MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' +DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' + + +def gen_user_agent(normal_ua): + is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua + + mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla') + firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox') + linux = rhyme.get_rhyme('Lin') + 'ux' + + if is_mobile: + return MOBILE_UA.format(mozilla, firefox) + else: + return DESKTOP_UA.format(mozilla, linux, firefox) + + +def gen_query(q, args, near_city=None): + # Use :past(hour/day/week/month/year) if available + # example search "new restaurants :past month" + tbs = '' + if ':past' in q: + time_range = str.strip(q.split(':past', 1)[-1]) + tbs = '&tbs=qdr:' + str.lower(time_range[0]) + + # Ensure search query is parsable + q = urlparse.quote(q) + + # Pass along type of results (news, images, books, etc) + tbm = '' + if 'tbm' in args: + tbm = '&tbm=' + args.get('tbm') + + # Get results page start value (10 per page, ie page 2 start val = 20) + start = '' + if 'start' in args: + start = '&start=' + args.get('start') + + # Search for results near a particular city, if available + near = '' + if near_city is not None: + near = '&near=' + urlparse.quote(near_city) + + return q + tbs + tbm + start + near + + +class Request: + def __init__(self, normal_ua): + self.modified_user_agent = gen_user_agent(normal_ua) + + def __getitem__(self, name): + return getattr(self, name) + + def send(self, base_url=SEARCH_URL, query=''): + response_header = [] + + b_obj = BytesIO() + crl = pycurl.Curl() + crl.setopt(crl.URL, base_url + query) + crl.setopt(crl.USERAGENT, self.modified_user_agent) + crl.setopt(crl.WRITEDATA, b_obj) + crl.setopt(crl.HEADERFUNCTION, response_header.append) + crl.setopt(pycurl.FOLLOWLOCATION, 1) + crl.perform() + crl.close() + + return b_obj.getvalue().decode('utf-8', 'ignore') diff --git a/app/routes.py b/app/routes.py index 39e4089..f56459a 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,58 +1,27 @@ -from app import app, rhyme +from app import app from app.filter import Filter +from app.request import Request, gen_query from bs4 import BeautifulSoup -from flask import request, redirect, render_template -from io import BytesIO +from flask import g, request, redirect, render_template import json import os -import pycurl import urllib.parse as urlparse app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) -MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' -DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' - -# Base search url -SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' - user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json')) -def get_ua(user_agent): - is_mobile = 'Android' in user_agent or 'iPhone' in user_agent - - mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla') - firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox') - linux = rhyme.get_rhyme('Lin') + 'ux' - - if is_mobile: - return MOBILE_UA.format(mozilla, firefox) - else: - return DESKTOP_UA.format(mozilla, linux, firefox) - - -def send_request(curl_url, ua): - response_header = [] - - b_obj = BytesIO() - crl = pycurl.Curl() - crl.setopt(crl.URL, curl_url) - crl.setopt(crl.USERAGENT, ua) - crl.setopt(crl.WRITEDATA, b_obj) - crl.setopt(crl.HEADERFUNCTION, response_header.append) - crl.setopt(pycurl.FOLLOWLOCATION, 1) - crl.perform() - crl.close() - - return b_obj.getvalue().decode('utf-8', 'ignore') +@app.before_request +def before_request_func(): + g.user_request = Request(request.headers.get('User-Agent')) @app.route('/', methods=['GET']) def index(): bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff' - return render_template('index.html', bg=bg, ua=get_ua(request.headers.get('User-Agent'))) + return render_template('index.html', bg=bg, ua=g.user_request.modified_user_agent) @app.route('/search', methods=['GET']) @@ -65,12 +34,13 @@ def search(): mobile = 'Android' in user_agent or 'iPhone' in user_agent content_filter = Filter(mobile, user_config) - full_query = content_filter.gen_query(q, request.args) - get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent)) - get_body = content_filter.reskin(get_body) - soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser')) + full_query = gen_query(q, request.args, content_filter.near) + get_body = g.user_request.send(query=full_query) - return render_template('display.html', query=urlparse.unquote(q), response=soup) + shoogle_results = content_filter.reskin(get_body) + formatted_results = content_filter.clean(BeautifulSoup(shoogle_results, 'html.parser')) + + return render_template('display.html', query=urlparse.unquote(q), response=formatted_results) @app.route('/config', methods=['GET', 'POST']) @@ -107,19 +77,19 @@ def imgres(): @app.route('/window') def window(): - get_body = send_request(request.args.get('location'), get_ua(request.headers.get('User-Agent'))) + get_body = g.user_request.send(base_url=request.args.get('location')) get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') - soup = BeautifulSoup(get_body, 'html.parser') + results = BeautifulSoup(get_body, 'html.parser') try: - for script in soup('script'): + for script in results('script'): script.decompose() except Exception: pass - return render_template('display.html', response=soup) + return render_template('display.html', response=results) if __name__ == '__main__':