forked from platypush/platypush
Replaced the YouTube search results parsing logic that relied on BeautifulSoup with a simpler logic that only uses regexes to parse video results. It greatly improves the performance of YouTube video search and removes the dependency on BeautifulSoul and lxml
This commit is contained in:
parent
01c5bbadcd
commit
d0ca6b8e93
2 changed files with 10 additions and 8 deletions
|
@ -8,7 +8,6 @@ import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from dbus.exceptions import DBusException
|
from dbus.exceptions import DBusException
|
||||||
from omxplayer import OMXPlayer
|
from omxplayer import OMXPlayer
|
||||||
|
|
||||||
|
@ -292,18 +291,21 @@ class VideoOmxplayerPlugin(Plugin):
|
||||||
query = urllib.parse.quote(query)
|
query = urllib.parse.quote(query)
|
||||||
url = "https://www.youtube.com/results?search_query=" + query
|
url = "https://www.youtube.com/results?search_query=" + query
|
||||||
response = urllib.request.urlopen(url)
|
response = urllib.request.urlopen(url)
|
||||||
html = response.read()
|
html = response.read().decode('utf-8')
|
||||||
soup = BeautifulSoup(html, 'lxml')
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}):
|
while html:
|
||||||
m = re.match('(/watch\?v=[^&]+)', vid['href'])
|
m = re.search('(<a href="(/watch\?v=.+?)".+?yt-uix-tile-link.+?title="(.+?)".+?>)', html)
|
||||||
if m:
|
if m:
|
||||||
results.append({
|
results.append({
|
||||||
'url': 'https://www.youtube.com' + m.group(1),
|
'url': 'https://www.youtube.com' + m.group(2),
|
||||||
'title': vid['title'],
|
'title': m.group(3)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
html = html.split(m.group(1))[1]
|
||||||
|
else:
|
||||||
|
html = ''
|
||||||
|
|
||||||
logging.info('{} YouTube video results for the search query "{}"'
|
logging.info('{} YouTube video results for the search query "{}"'
|
||||||
.format(len(results), query))
|
.format(len(results), query))
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -74,7 +74,7 @@ setup(
|
||||||
'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
|
'Support for Belkin WeMo Switch plugin': ['ouimeaux'],
|
||||||
'Support for text2speech plugin': ['mplayer'],
|
'Support for text2speech plugin': ['mplayer'],
|
||||||
'Support for OMXPlayer plugin': ['omxplayer'],
|
'Support for OMXPlayer plugin': ['omxplayer'],
|
||||||
'Support for YouTube in the OMXPlayer plugin': ['youtube-dl', 'beautifulsoup4', 'lxml'],
|
'Support for YouTube in the OMXPlayer plugin': ['youtube-dl'],
|
||||||
'Support for torrents download': ['python-libtorrent'],
|
'Support for torrents download': ['python-libtorrent'],
|
||||||
'Support for Google Assistant': ['google-assistant-library'],
|
'Support for Google Assistant': ['google-assistant-library'],
|
||||||
'Support for the Google APIs': ['google-api-python-client'],
|
'Support for the Google APIs': ['google-api-python-client'],
|
||||||
|
|
Loading…
Reference in a new issue