From 71ba00785f45333d47ce5751cd19168a069d3759 Mon Sep 17 00:00:00 2001 From: Ben Busby <33362396+benbusby@users.noreply.github.com> Date: Fri, 29 May 2020 13:21:53 -0600 Subject: [PATCH] Quick improvement to ad removal --- app/filter.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/app/filter.py b/app/filter.py index 5ff46b7..8c25fe4 100644 --- a/app/filter.py +++ b/app/filter.py @@ -111,10 +111,9 @@ class Filter: return result_divs = main_divs.find_all('div', recursive=False) - # Only ads/sponsored content use classes in the list of result divs - ad_divs = [ad_div for ad_div in result_divs if 'class' in ad_div.attrs] - for div in ad_divs: - div.decompose() + for div in [_ for _ in result_divs]: + has_ad = len([_ for _ in div.find_all('span', recursive=True) if 'ad' == _.text.lower()]) + _ = div.decompose() if has_ad else None def update_image_paths(self, soup): for img in [_ for _ in soup.find_all('img') if 'src' in _.attrs]: