From a7005c012efb3f822b230fab64e992b0a19b6593 Mon Sep 17 00:00:00 2001 From: Ben Busby Date: Thu, 23 Apr 2020 20:59:43 -0600 Subject: [PATCH] Refactoring of user requests and routing Curl requests and user agent related functionality was moved to its own request class. Routes was refactored to only include strictly routing related functionality. Filter class was cleaned up (had routing/request related logic in here, which didn't make sense) --- app/filter.py | 35 +++++------------------ app/request.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ app/routes.py | 64 +++++++++++------------------------------- 3 files changed, 100 insertions(+), 75 deletions(-) create mode 100644 app/request.py diff --git a/app/filter.py b/app/filter.py index 64360f0..4958efe 100644 --- a/app/filter.py +++ b/app/filter.py @@ -14,6 +14,9 @@ class Filter: self.nojs = config['nojs'] if 'nojs' in config else False self.mobile = mobile + def __getitem__(self, name): + return getattr(self, name) + def reskin(self, page): # Aesthetic only re-skinning page = page.replace('>G<', '>Sh<') @@ -24,34 +27,6 @@ class Filter: return page - def gen_query(self, q, args): - # Use :past(hour/day/week/month/year) if available - # example search "new restaurants :past month" - tbs = '' - if ':past' in q: - time_range = str.strip(q.split(':past', 1)[-1]) - tbs = '&tbs=qdr:' + str.lower(time_range[0]) - - # Ensure search query is parsable - q = urlparse.quote(q) - - # Pass along type of results (news, images, books, etc) - tbm = '' - if 'tbm' in args: - tbm = '&tbm=' + args.get('tbm') - - # Get results page start value (10 per page, ie page 2 start val = 20) - start = '' - if 'start' in args: - start = '&start=' + args.get('start') - - # Grab city from config, if available - near = '' - if self.near: - near = '&near=' + urlparse.quote(self.near) - - return q + tbs + tbm + start + near - def clean(self, soup): # Remove all ads main_divs = soup.find('div', {'id': 'main'}) @@ -76,6 +51,10 @@ class Filter: if logo and self.mobile: logo['style'] = 'display:flex; justify-content:center; align-items:center; color:#685e79; font-size:18px;' + # Fix search bar length on mobile + search_bar = soup.find('header').find('form').find('div') + search_bar['style'] = 'width: 100%;' + # Replace hrefs with only the intended destination (no "utm" type tags) for a in soup.find_all('a', href=True): href = a['href'] diff --git a/app/request.py b/app/request.py new file mode 100644 index 0000000..6325def --- /dev/null +++ b/app/request.py @@ -0,0 +1,76 @@ +from app import rhyme +from app.filter import Filter +from io import BytesIO +import pycurl +import urllib.parse as urlparse + +# Base search url +SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' + +MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' +DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' + + +def gen_user_agent(normal_ua): + is_mobile = 'Android' in normal_ua or 'iPhone' in normal_ua + + mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla') + firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox') + linux = rhyme.get_rhyme('Lin') + 'ux' + + if is_mobile: + return MOBILE_UA.format(mozilla, firefox) + else: + return DESKTOP_UA.format(mozilla, linux, firefox) + + +def gen_query(q, args, near_city=None): + # Use :past(hour/day/week/month/year) if available + # example search "new restaurants :past month" + tbs = '' + if ':past' in q: + time_range = str.strip(q.split(':past', 1)[-1]) + tbs = '&tbs=qdr:' + str.lower(time_range[0]) + + # Ensure search query is parsable + q = urlparse.quote(q) + + # Pass along type of results (news, images, books, etc) + tbm = '' + if 'tbm' in args: + tbm = '&tbm=' + args.get('tbm') + + # Get results page start value (10 per page, ie page 2 start val = 20) + start = '' + if 'start' in args: + start = '&start=' + args.get('start') + + # Search for results near a particular city, if available + near = '' + if near_city is not None: + near = '&near=' + urlparse.quote(near_city) + + return q + tbs + tbm + start + near + + +class Request: + def __init__(self, normal_ua): + self.modified_user_agent = gen_user_agent(normal_ua) + + def __getitem__(self, name): + return getattr(self, name) + + def send(self, base_url=SEARCH_URL, query=''): + response_header = [] + + b_obj = BytesIO() + crl = pycurl.Curl() + crl.setopt(crl.URL, base_url + query) + crl.setopt(crl.USERAGENT, self.modified_user_agent) + crl.setopt(crl.WRITEDATA, b_obj) + crl.setopt(crl.HEADERFUNCTION, response_header.append) + crl.setopt(pycurl.FOLLOWLOCATION, 1) + crl.perform() + crl.close() + + return b_obj.getvalue().decode('utf-8', 'ignore') diff --git a/app/routes.py b/app/routes.py index 39e4089..f56459a 100644 --- a/app/routes.py +++ b/app/routes.py @@ -1,58 +1,27 @@ -from app import app, rhyme +from app import app from app.filter import Filter +from app.request import Request, gen_query from bs4 import BeautifulSoup -from flask import request, redirect, render_template -from io import BytesIO +from flask import g, request, redirect, render_template import json import os -import pycurl import urllib.parse as urlparse app.config['APP_ROOT'] = os.getenv('APP_ROOT', os.path.dirname(os.path.abspath(__file__))) app.config['STATIC_FOLDER'] = os.getenv('STATIC_FOLDER', os.path.join(app.config['APP_ROOT'], 'static')) -MOBILE_UA = '{}/5.0 (Android 0; Mobile; rv:54.0) Gecko/54.0 {}/59.0' -DESKTOP_UA = '{}/5.0 (X11; {} x86_64; rv:75.0) Gecko/20100101 {}/75.0' - -# Base search url -SEARCH_URL = 'https://www.google.com/search?gbv=1&q=' - user_config = json.load(open(app.config['STATIC_FOLDER'] + '/config.json')) -def get_ua(user_agent): - is_mobile = 'Android' in user_agent or 'iPhone' in user_agent - - mozilla = rhyme.get_rhyme('Mo') + rhyme.get_rhyme('zilla') - firefox = rhyme.get_rhyme('Fire') + rhyme.get_rhyme('fox') - linux = rhyme.get_rhyme('Lin') + 'ux' - - if is_mobile: - return MOBILE_UA.format(mozilla, firefox) - else: - return DESKTOP_UA.format(mozilla, linux, firefox) - - -def send_request(curl_url, ua): - response_header = [] - - b_obj = BytesIO() - crl = pycurl.Curl() - crl.setopt(crl.URL, curl_url) - crl.setopt(crl.USERAGENT, ua) - crl.setopt(crl.WRITEDATA, b_obj) - crl.setopt(crl.HEADERFUNCTION, response_header.append) - crl.setopt(pycurl.FOLLOWLOCATION, 1) - crl.perform() - crl.close() - - return b_obj.getvalue().decode('utf-8', 'ignore') +@app.before_request +def before_request_func(): + g.user_request = Request(request.headers.get('User-Agent')) @app.route('/', methods=['GET']) def index(): bg = '#000' if 'dark' in user_config and user_config['dark'] else '#fff' - return render_template('index.html', bg=bg, ua=get_ua(request.headers.get('User-Agent'))) + return render_template('index.html', bg=bg, ua=g.user_request.modified_user_agent) @app.route('/search', methods=['GET']) @@ -65,12 +34,13 @@ def search(): mobile = 'Android' in user_agent or 'iPhone' in user_agent content_filter = Filter(mobile, user_config) - full_query = content_filter.gen_query(q, request.args) - get_body = send_request(SEARCH_URL + full_query, get_ua(user_agent)) - get_body = content_filter.reskin(get_body) - soup = content_filter.clean(BeautifulSoup(get_body, 'html.parser')) + full_query = gen_query(q, request.args, content_filter.near) + get_body = g.user_request.send(query=full_query) - return render_template('display.html', query=urlparse.unquote(q), response=soup) + shoogle_results = content_filter.reskin(get_body) + formatted_results = content_filter.clean(BeautifulSoup(shoogle_results, 'html.parser')) + + return render_template('display.html', query=urlparse.unquote(q), response=formatted_results) @app.route('/config', methods=['GET', 'POST']) @@ -107,19 +77,19 @@ def imgres(): @app.route('/window') def window(): - get_body = send_request(request.args.get('location'), get_ua(request.headers.get('User-Agent'))) + get_body = g.user_request.send(base_url=request.args.get('location')) get_body = get_body.replace('src="/', 'src="' + request.args.get('location') + '"') get_body = get_body.replace('href="/', 'href="' + request.args.get('location') + '"') - soup = BeautifulSoup(get_body, 'html.parser') + results = BeautifulSoup(get_body, 'html.parser') try: - for script in soup('script'): + for script in results('script'): script.decompose() except Exception: pass - return render_template('display.html', response=soup) + return render_template('display.html', response=results) if __name__ == '__main__':