Replaced the YouTube search results parsing logic that relied on BeautifulSoup with a simpler logic that only uses regexes to parse video results. It greatly improves the performance of YouTube video search and removes the dependency on BeautifulSoul and lxml

This commit is contained in:
Fabio Manganiello 2018-05-13 14:29:27 +02:00
parent 01c5bbadcd
commit d0ca6b8e93
2 changed files with 10 additions and 8 deletions

View file

@ -8,7 +8,6 @@ import time
import urllib.request import urllib.request
import urllib.parse import urllib.parse
from bs4 import BeautifulSoup
from dbus.exceptions import DBusException from dbus.exceptions import DBusException
from omxplayer import OMXPlayer from omxplayer import OMXPlayer
@ -292,18 +291,21 @@ class VideoOmxplayerPlugin(Plugin):
query = urllib.parse.quote(query) query = urllib.parse.quote(query)
url = "https://www.youtube.com/results?search_query=" + query url = "https://www.youtube.com/results?search_query=" + query
response = urllib.request.urlopen(url) response = urllib.request.urlopen(url)
html = response.read() html = response.read().decode('utf-8')
soup = BeautifulSoup(html, 'lxml')
results = [] results = []
for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}): while html:
m = re.match('(/watch\?v=[^&]+)', vid['href']) m = re.search('(<a href="(/watch\?v=.+?)".+?yt-uix-tile-link.+?title="(.+?)".+?>)', html)
if m: if m:
results.append({ results.append({
'url': 'https://www.youtube.com' + m.group(1), 'url': 'https://www.youtube.com' + m.group(2),
'title': vid['title'], 'title': m.group(3)
}) })
html = html.split(m.group(1))[1]
else:
html = ''
logging.info('{} YouTube video results for the search query "{}"' logging.info('{} YouTube video results for the search query "{}"'
.format(len(results), query)) .format(len(results), query))

View file

@ -74,7 +74,7 @@ setup(
'Support for Belkin WeMo Switch plugin': ['ouimeaux'], 'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
'Support for text2speech plugin': ['mplayer'], 'Support for text2speech plugin': ['mplayer'],
'Support for OMXPlayer plugin': ['omxplayer'], 'Support for OMXPlayer plugin': ['omxplayer'],
'Support for YouTube in the OMXPlayer plugin': ['youtube-dl', 'beautifulsoup4', 'lxml'], 'Support for YouTube in the OMXPlayer plugin': ['youtube-dl'],
'Support for torrents download': ['python-libtorrent'], 'Support for torrents download': ['python-libtorrent'],
'Support for Google Assistant': ['google-assistant-library'], 'Support for Google Assistant': ['google-assistant-library'],
'Support for the Google APIs': ['google-api-python-client'], 'Support for the Google APIs': ['google-api-python-client'],