From d0ca6b8e9349e163cd66d321e8ef7a6d98988988 Mon Sep 17 00:00:00 2001 From: Fabio Manganiello Date: Sun, 13 May 2018 14:29:27 +0200 Subject: [PATCH] Replaced the YouTube search results parsing logic that relied on BeautifulSoup with a simpler logic that only uses regexes to parse video results. It greatly improves the performance of YouTube video search and removes the dependency on BeautifulSoul and lxml --- platypush/plugins/video/omxplayer.py | 16 +++++++++------- setup.py | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/platypush/plugins/video/omxplayer.py b/platypush/plugins/video/omxplayer.py index 6f2b7bbe..20d1f13f 100644 --- a/platypush/plugins/video/omxplayer.py +++ b/platypush/plugins/video/omxplayer.py @@ -8,7 +8,6 @@ import time import urllib.request import urllib.parse -from bs4 import BeautifulSoup from dbus.exceptions import DBusException from omxplayer import OMXPlayer @@ -292,18 +291,21 @@ class VideoOmxplayerPlugin(Plugin): query = urllib.parse.quote(query) url = "https://www.youtube.com/results?search_query=" + query response = urllib.request.urlopen(url) - html = response.read() - soup = BeautifulSoup(html, 'lxml') + html = response.read().decode('utf-8') results = [] - for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}): - m = re.match('(/watch\?v=[^&]+)', vid['href']) + while html: + m = re.search('()', html) if m: results.append({ - 'url': 'https://www.youtube.com' + m.group(1), - 'title': vid['title'], + 'url': 'https://www.youtube.com' + m.group(2), + 'title': m.group(3) }) + html = html.split(m.group(1))[1] + else: + html = '' + logging.info('{} YouTube video results for the search query "{}"' .format(len(results), query)) diff --git a/setup.py b/setup.py index b1064629..f8420079 100755 --- a/setup.py +++ b/setup.py @@ -74,7 +74,7 @@ setup( 'Support for Belkin WeMo Switch plugin': ['ouimeaux'], 'Support for text2speech plugin': ['mplayer'], 'Support for OMXPlayer plugin': ['omxplayer'], - 'Support for YouTube in the OMXPlayer plugin': ['youtube-dl', 'beautifulsoup4', 'lxml'], + 'Support for YouTube in the OMXPlayer plugin': ['youtube-dl'], 'Support for torrents download': ['python-libtorrent'], 'Support for Google Assistant': ['google-assistant-library'], 'Support for the Google APIs': ['google-api-python-client'],