Ensure searches with a leading slash are treated as queries

A user reported a bug where searches with a leading slash (in this case:
"/e/OS apps" were interpreted as a Google specific link when clicking
the next page of results.

This was due to the behavior that Google's search results exhibit, where
internal links for pages like support.google.com are delivered with
params like "?q=/support" rather than a direct link. This fixes that
scenario by checking the "q" param value against the user's original
query to ensure they don't match before assuming that the result is
intended as a redirect.

Fixes #776
このコミットが含まれているのは:
Ben Busby 2022-06-03 14:03:57 -06:00
コミット ef98d85dc5
この署名に対応する既知のキーがデータベースに存在しません
GPGキーID: B9B7231E01D924A1
3個のファイルの変更25行の追加2行の削除

ファイルの表示

@ -89,11 +89,13 @@ class Filter:
config: Config,
root_url='',
page_url='',
query='',
mobile=False) -> None:
self.config = config
self.mobile = mobile
self.user_key = user_key
self.page_url = page_url
self.query = query
self.main_divs = ResultSet('')
self._elements = 0
self._av = set()
@ -429,7 +431,7 @@ class Filter:
result_link = urlparse.urlparse(href)
q = extract_q(result_link.query, href)
if q.startswith('/'):
if q.startswith('/') and q not in self.query:
# Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + q

ファイルの表示

@ -118,7 +118,8 @@ class Search:
content_filter = Filter(self.session_key,
root_url=self.request.url_root,
mobile=mobile,
config=self.config)
config=self.config,
query=self.query)
full_query = gen_query(self.query,
self.request_params,
self.config)

ファイルの表示

@ -122,3 +122,23 @@ def test_recent_results(client):
assert (current_date - date).days <= (num_days + 5)
except ParserError:
pass
def test_leading_slash_search(client):
# Ensure searches with a leading slash are interpreted
# correctly as queries and not endpoints
q = '/test'
rv = client.get(f'/{Endpoint.search}?q={q}')
assert rv._status_code == 200
soup = Filter(
user_key=generate_user_key(),
config=Config(**demo_config),
query=q
).clean(BeautifulSoup(rv.data, 'html.parser'))
for link in soup.find_all('a', href=True):
if 'start=' not in link['href']:
continue
assert link['href'].startswith(f'{Endpoint.search}')