Replaced the YouTube search results parsing logic that relied on BeautifulSoup with a simpler logic that only uses regexes to parse video results. It greatly improves the performance of YouTube video search and removes the dependency on BeautifulSoul and lxml

2018-05-13 14:29:27 +02:00 · 2018-05-13 14:29:27 +02:00 · d0ca6b8e93
parent 01c5bbadcd
commit d0ca6b8e93
2 changed files with 10 additions and 8 deletions
--- a/platypush/plugins/video/omxplayer.py
+++ b/platypush/plugins/video/omxplayer.py
@ -8,7 +8,6 @@ import time
 import urllib.request
 import urllib.parse

-from bs4 import BeautifulSoup
 from dbus.exceptions import DBusException
 from omxplayer import OMXPlayer

@ -292,18 +291,21 @@ class VideoOmxplayerPlugin(Plugin):
        query = urllib.parse.quote(query)
        url = "https://www.youtube.com/results?search_query=" + query
        response = urllib.request.urlopen(url)
-        html = response.read()
-        soup = BeautifulSoup(html, 'lxml')
+        html = response.read().decode('utf-8')
        results = []

-        for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}):
-            m = re.match('(/watch\?v=[^&]+)', vid['href'])
+        while html:
+            m = re.search('(<a href="(/watch\?v=.+?)".+?yt-uix-tile-link.+?title="(.+?)".+?>)', html)
            if m:
                results.append({
-                    'url': 'https://www.youtube.com' + m.group(1),
-                    'title': vid['title'],
+                    'url': 'https://www.youtube.com' + m.group(2),
+                    'title': m.group(3)
                })

+                html = html.split(m.group(1))[1]
+            else:
+                html = ''
+
        logging.info('{} YouTube video results for the search query "{}"'
                     .format(len(results), query))

--- a/setup.py
+++ b/setup.py
@ -74,7 +74,7 @@ setup(
        'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
        'Support for text2speech plugin': ['mplayer'],
        'Support for OMXPlayer plugin': ['omxplayer'],
-        'Support for YouTube in the OMXPlayer plugin': ['youtube-dl', 'beautifulsoup4', 'lxml'],
+        'Support for YouTube in the OMXPlayer plugin': ['youtube-dl'],
        'Support for torrents download': ['python-libtorrent'],
        'Support for Google Assistant': ['google-assistant-library'],
        'Support for the Google APIs': ['google-api-python-client'],