[utils] Place sanitize url function near other sanitizing functions

2024-11-13 18:50:59 +01:00 · 2015-03-17 21:34:22 +06:00 · 2015-03-17 21:34:22 +06:00 · 92a4793b3c
commit 92a4793b3c
parent dc03a42537
2 changed files with 28 additions and 31 deletions
--- a/test/test_utils.py
+++ b/test/test_utils.py
@ -39,6 +39,7 @@ from youtube_dl.utils import (
    read_batch_urls,
    sanitize_filename,
    sanitize_path,
    sanitize_url_path_consecutive_slashes,
    shell_quote,
    smuggle_url,
    str_to_int,
@ -55,7 +56,6 @@ from youtube_dl.utils import (
    xpath_with_ns,
    render_table,
    match_str,
    url_sanitize_consecutive_slashes,
 )
@ -169,6 +169,26 @@ class TestUtil(unittest.TestCase):
        self.assertEqual(sanitize_path('./abc'), 'abc')
        self.assertEqual(sanitize_path('./../abc'), '..\\abc')
    def test_sanitize_url_path_consecutive_slashes(self):
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname/foo//bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname//foo/bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname//'),
            'http://hostname/')
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname/foo/bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname/'),
            'http://hostname/')
        self.assertEqual(
            sanitize_url_path_consecutive_slashes('http://hostname/abc//'),
            'http://hostname/abc/')
    def test_ordered_set(self):
        self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7])
        self.assertEqual(orderedSet([]), [])
@ -539,21 +559,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
            'like_count > 100 & dislike_count <? 50 & description',
            {'like_count': 190, 'dislike_count': 10}))
    def test_url_sanitize_consecutive_slashes(self):
        self.assertEqual(url_sanitize_consecutive_slashes(
            'http://hostname/foo//bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(url_sanitize_consecutive_slashes(
            'http://hostname//foo/bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(url_sanitize_consecutive_slashes(
            'http://hostname//'), 'http://hostname/')
        self.assertEqual(url_sanitize_consecutive_slashes(
            'http://hostname/foo/bar/filename.html'),
            'http://hostname/foo/bar/filename.html')
        self.assertEqual(url_sanitize_consecutive_slashes(
            'http://hostname/'), 'http://hostname/')
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -326,6 +326,13 @@ def sanitize_path(s):
    return os.path.join(*sanitized_path)
 def sanitize_url_path_consecutive_slashes(url):
    """Collapses consecutive slashes in URLs' path"""
    parsed_url = list(compat_urlparse.urlparse(url))
    parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
    return compat_urlparse.urlunparse(parsed_url)
 def orderedSet(iterable):
    """ Remove all duplicates from the input iterable """
    res = []
@ -1804,18 +1811,3 @@ class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
            return None  # No Proxy
        return compat_urllib_request.ProxyHandler.proxy_open(
            self, req, proxy, type)
 def url_sanitize_consecutive_slashes(url):
    """Sanitize URLs with consecutive slashes
    For example, transform both
        http://hostname/foo//bar/filename.html
    and
        http://hostname//foo/bar/filename.html
    into
        http://hostname/foo/bar/filename.html
    """
    parsed_url = list(compat_urlparse.urlparse(url))
    parsed_url[2] = re.sub(r'/{2,}', '/', parsed_url[2])
    return compat_urlparse.urlunparse(parsed_url)