1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-15 11:33:30 +01:00

[youtube] Fix categories and improve tags extraction

This commit is contained in:
Sergey M․ 2020-06-16 03:13:39 +07:00
parent ed604ce7bc
commit dbeafce5d5
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -2356,17 +2356,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
m_cat_container = self._search_regex( m_cat_container = self._search_regex(
r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>', r'(?s)<h4[^>]*>\s*Category\s*</h4>\s*<ul[^>]*>(.*?)</ul>',
video_webpage, 'categories', default=None) video_webpage, 'categories', default=None)
category = None
if m_cat_container: if m_cat_container:
category = self._html_search_regex( category = self._html_search_regex(
r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category', r'(?s)<a[^<]+>(.*?)</a>', m_cat_container, 'category',
default=None) default=None)
video_categories = None if category is None else [category] if not category:
else: category = try_get(
video_categories = None microformat, lambda x: x['category'], compat_str)
video_categories = None if category is None else [category]
video_tags = [ video_tags = [
unescapeHTML(m.group('content')) unescapeHTML(m.group('content'))
for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)] for m in re.finditer(self._meta_regex('og:video:tag'), video_webpage)]
if not video_tags:
video_tags = try_get(video_details, lambda x: x['keywords'], list)
def _extract_count(count_name): def _extract_count(count_name):
return str_to_int(self._search_regex( return str_to_int(self._search_regex(