[extractor/generic] Support Dublin Core in RSS

This commit is contained in:
Damiano Amatruda 2022-11-20 14:09:37 +01:00
parent 7748ed1eef
commit b52c912a45
No known key found for this signature in database
GPG Key ID: 246FB54CAB37CD38

View File

@ -2192,6 +2192,7 @@ def report_detected(self, name, num=1, note=None):
def _extract_rss(self, url, video_id, doc): def _extract_rss(self, url, video_id, doc):
NS_MAP = { NS_MAP = {
'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd',
'dc': 'http://dublincore.org/specifications/dublin-core/dcmes-xml/2001-04-11/dcmes-xml-dtd.dtd',
} }
entries = [] entries = []
@ -2208,12 +2209,15 @@ def _extract_rss(self, url, video_id, doc):
def itunes(key): def itunes(key):
return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None) return xpath_text(it, xpath_with_ns(f'./itunes:{key}', NS_MAP), default=None)
def dc(key):
return xpath_text(it, xpath_with_ns(f'./dc:{key}', NS_MAP), default=None)
entries.append({ entries.append({
'_type': 'url_transparent', '_type': 'url_transparent',
'url': next_url_new, 'url': next_url_new,
'title': try_call(lambda: it.find('title').text), 'title': try_call(lambda: it.find('title').text),
'description': xpath_text(it, 'description', default=None), 'description': xpath_text(it, 'description', default=None),
'uploader': xpath_text(it, 'author', default=None), 'uploader': xpath_text(it, 'author', default=None) or itunes('author') or dc('creator'),
'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)), 'timestamp': unified_timestamp(xpath_text(it, 'pubDate', default=None)),
'duration': parse_duration(itunes('duration')), 'duration': parse_duration(itunes('duration')),
'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')), 'thumbnail': url_or_none(xpath_attr(it, xpath_with_ns('./itunes:image', NS_MAP), 'href')),