From 36e6f62cd0883f0f486d1666d010e5d9e6d515bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 25 Oct 2015 20:04:55 +0100
Subject: [PATCH 1/4] Use a wrapper around xml.etree.ElementTree.fromstring in
 python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).
---
 test/test_compat.py                 |  7 +++++++
 test/test_utils.py                  | 11 +++++++----
 youtube_dl/compat.py                | 25 +++++++++++++++++++++++++
 youtube_dl/downloader/f4m.py        |  4 ++--
 youtube_dl/extractor/bbc.py         |  8 +++++---
 youtube_dl/extractor/bilibili.py    |  6 ++++--
 youtube_dl/extractor/brightcove.py  |  4 ++--
 youtube_dl/extractor/common.py      |  4 ++--
 youtube_dl/extractor/crunchyroll.py |  4 ++--
 youtube_dl/extractor/vevo.py        |  6 +++---
 youtube_dl/utils.py                 |  3 ++-
 11 files changed, 61 insertions(+), 21 deletions(-)
diff --git a/test/test_compat.py b/test/test_compat.py
index 4ee0dc99d0..2b0860479e 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -13,8 +13,10 @@
 from youtube_dl.utils import get_filesystem_encoding
 from youtube_dl.compat import (
     compat_getenv,
+    compat_etree_fromstring,
     compat_expanduser,
     compat_shlex_split,
+    compat_str,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
 )
@@ -71,5 +73,10 @@ def test_compat_urllib_parse_unquote_plus(self):
     def test_compat_shlex_split(self):
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
 
+    def test_compat_etree_fromstring(self):
+        xml = '<el foo="bar"></el>'
+        doc = compat_etree_fromstring(xml.encode('utf-8'))
+        self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/test_utils.py b/test/test_utils.py
index 918a7a9ef8..a9e0fed7e9 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -68,6 +68,9 @@
     cli_valueless_option,
     cli_bool_option,
 )
+from youtube_dl.compat import (
+    compat_etree_fromstring,
+)
 
 
 class TestUtil(unittest.TestCase):
@@ -242,7 +245,7 @@ def test_find_xpath_attr(self):
             <node x="b" y="d" />
             <node x="" />
         </root>'''
-        doc = xml.etree.ElementTree.fromstring(testxml)
+        doc = compat_etree_fromstring(testxml)
 
         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
         self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
@@ -263,7 +266,7 @@ def test_xpath_with_ns(self):
                 <url>http://server.com/download.mp3</url>
             </media:song>
         </root>'''
-        doc = xml.etree.ElementTree.fromstring(testxml)
+        doc = compat_etree_fromstring(testxml)
         find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'}))
         self.assertTrue(find('media:song') is not None)
         self.assertEqual(find('media:song/media:author').text, 'The Author')
@@ -285,7 +288,7 @@ def test_xpath_text(self):
                 <p>Foo</p>
             </div>
         </root>'''
-        doc = xml.etree.ElementTree.fromstring(testxml)
+        doc = compat_etree_fromstring(testxml)
         self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
         self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default')
         self.assertTrue(xpath_text(doc, 'div/bar') is None)
@@ -297,7 +300,7 @@ def test_xpath_attr(self):
                 <p x="a">Foo</p>
             </div>
         </root>'''
-        doc = xml.etree.ElementTree.fromstring(testxml)
+        doc = compat_etree_fromstring(testxml)
         self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a')
         self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None)
         self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None)
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index d103ab9adf..cf10835ca5 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -14,6 +14,7 @@
 import subprocess
 import sys
 import itertools
+import xml.etree.ElementTree
 
 
 try:
@@ -212,6 +213,29 @@ def data_open(self, req):
 except ImportError:  # Python 2.6
     from xml.parsers.expat import ExpatError as compat_xml_parse_error
 
+if sys.version_info[0] >= 3:
+    compat_etree_fromstring = xml.etree.ElementTree.fromstring
+else:
+    # on python 2.x the the attributes of a node are str objects instead of
+    # unicode
+    etree = xml.etree.ElementTree
+
+    # on 2.6 XML doesn't have a parser argument, function copied from CPython
+    # 2.7 source
+    def _XML(text, parser=None):
+        if not parser:
+            parser = etree.XMLParser(target=etree.TreeBuilder())
+        parser.feed(text)
+        return parser.close()
+
+    def _element_factory(*args, **kwargs):
+        el = etree.Element(*args, **kwargs)
+        for k, v in el.items():
+            el.set(k, v.decode('utf-8'))
+        return el
+
+    def compat_etree_fromstring(text):
+        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
 
 try:
     from urllib.parse import parse_qs as compat_parse_qs
@@ -507,6 +531,7 @@ def compat_itertools_count(start=0, step=1):
     'compat_chr',
     'compat_cookiejar',
     'compat_cookies',
+    'compat_etree_fromstring',
     'compat_expanduser',
     'compat_get_terminal_size',
     'compat_getenv',
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 7f6143954d..6170cc1552 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -5,10 +5,10 @@
 import itertools
 import os
 import time
-import xml.etree.ElementTree as etree
 
 from .fragment import FragmentFD
 from ..compat import (
+    compat_etree_fromstring,
     compat_urlparse,
     compat_urllib_error,
     compat_urllib_parse_urlparse,
@@ -290,7 +290,7 @@ def real_download(self, filename, info_dict):
         man_url = urlh.geturl()
         manifest = urlh.read()
 
-        doc = etree.fromstring(manifest)
+        doc = compat_etree_fromstring(manifest)
         formats = [(int(f.attrib.get('bitrate', -1)), f)
                    for f in self._get_unencrypted_media(doc)]
         if requested_bitrate is None:
diff --git a/youtube_dl/extractor/bbc.py b/youtube_dl/extractor/bbc.py
index 2cdce1eb95..a55a6dbc9d 100644
--- a/youtube_dl/extractor/bbc.py
+++ b/youtube_dl/extractor/bbc.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..utils import (
@@ -14,7 +13,10 @@
     remove_end,
     unescapeHTML,
 )
-from ..compat import compat_HTTPError
+from ..compat import (
+    compat_etree_fromstring,
+    compat_HTTPError,
+)
 
 
 class BBCCoUkIE(InfoExtractor):
@@ -344,7 +346,7 @@ def _download_media_selector_url(self, url, programme_id=None):
                 url, programme_id, 'Downloading media selection XML')
         except ExtractorError as ee:
             if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
-                media_selection = xml.etree.ElementTree.fromstring(ee.cause.read().decode('utf-8'))
+                media_selection = compat_etree_fromstring(ee.cause.read().decode('utf-8'))
             else:
                 raise
         return self._process_media_selector(media_selection, programme_id)
diff --git a/youtube_dl/extractor/bilibili.py b/youtube_dl/extractor/bilibili.py
index ecc17ebebc..6c66a12368 100644
--- a/youtube_dl/extractor/bilibili.py
+++ b/youtube_dl/extractor/bilibili.py
@@ -4,9 +4,11 @@
 import re
 import itertools
 import json
-import xml.etree.ElementTree as ET
 
 from .common import InfoExtractor
+from ..compat import (
+    compat_etree_fromstring,
+)
 from ..utils import (
     int_or_none,
     unified_strdate,
@@ -88,7 +90,7 @@ def _real_extract(self, url):
         except ValueError:
             pass
 
-        lq_doc = ET.fromstring(lq_page)
+        lq_doc = compat_etree_fromstring(lq_page)
         lq_durls = lq_doc.findall('./durl')
 
         hq_doc = self._download_xml(
diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py
index 4721c22930..1686cdde14 100644
--- a/youtube_dl/extractor/brightcove.py
+++ b/youtube_dl/extractor/brightcove.py
@@ -3,10 +3,10 @@
 
 import re
 import json
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_parse_qs,
     compat_str,
     compat_urllib_parse,
@@ -119,7 +119,7 @@ def _build_brighcove_url(cls, object_str):
         object_str = fix_xml_ampersands(object_str)
 
         try:
-            object_doc = xml.etree.ElementTree.fromstring(object_str.encode('utf-8'))
+            object_doc = compat_etree_fromstring(object_str.encode('utf-8'))
         except compat_xml_parse_error:
             return
 
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 10c0d5d1f9..52523d7b24 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -10,7 +10,6 @@
 import socket
 import sys
 import time
-import xml.etree.ElementTree
 
 from ..compat import (
     compat_cookiejar,
@@ -23,6 +22,7 @@
     compat_urllib_request,
     compat_urlparse,
     compat_str,
+    compat_etree_fromstring,
 )
 from ..utils import (
     NO_DEFAULT,
@@ -461,7 +461,7 @@ def _download_xml(self, url_or_request, video_id,
             return xml_string
         if transform_source:
             xml_string = transform_source(xml_string)
-        return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8'))
+        return compat_etree_fromstring(xml_string.encode('utf-8'))
 
     def _download_json(self, url_or_request, video_id,
                        note='Downloading JSON metadata',
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index f8ce10111d..0c9b8ca024 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -5,12 +5,12 @@
 import json
 import base64
 import zlib
-import xml.etree.ElementTree
 
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_parse,
     compat_urllib_parse_unquote,
     compat_urllib_request,
@@ -234,7 +234,7 @@ def ass_bool(strvalue):
         return output
 
     def _extract_subtitles(self, subtitle):
-        sub_root = xml.etree.ElementTree.fromstring(subtitle)
+        sub_root = compat_etree_fromstring(subtitle)
         return [{
             'ext': 'srt',
             'data': self._convert_subtitles_to_srt(sub_root),
diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py
index c17094f819..4c0de354f7 100644
--- a/youtube_dl/extractor/vevo.py
+++ b/youtube_dl/extractor/vevo.py
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
 import re
-import xml.etree.ElementTree
 
 from .common import InfoExtractor
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_request,
 )
 from ..utils import (
@@ -97,7 +97,7 @@ def _formats_from_json(self, video_info):
         if last_version['version'] == -1:
             raise ExtractorError('Unable to extract last version of the video')
 
-        renditions = xml.etree.ElementTree.fromstring(last_version['data'])
+        renditions = compat_etree_fromstring(last_version['data'])
         formats = []
         # Already sorted from worst to best quality
         for rend in renditions.findall('rendition'):
@@ -114,7 +114,7 @@ def _formats_from_json(self, video_info):
 
     def _formats_from_smil(self, smil_xml):
         formats = []
-        smil_doc = xml.etree.ElementTree.fromstring(smil_xml.encode('utf-8'))
+        smil_doc = compat_etree_fromstring(smil_xml.encode('utf-8'))
         els = smil_doc.findall('.//{http://www.w3.org/2001/SMIL20/Language}video')
         for el in els:
             src = el.attrib['src']
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index a61e476467..7d846d6808 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -36,6 +36,7 @@
 from .compat import (
     compat_basestring,
     compat_chr,
+    compat_etree_fromstring,
     compat_html_entities,
     compat_http_client,
     compat_kwargs,
@@ -1974,7 +1975,7 @@ def parse_node(node):
 
         return out
 
-    dfxp = xml.etree.ElementTree.fromstring(dfxp_data.encode('utf-8'))
+    dfxp = compat_etree_fromstring(dfxp_data.encode('utf-8'))
     out = []
     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall(_x('.//ttaf1:p')) or dfxp.findall('.//p')
 

From 387db16a789fea25795433538d80513c18d0f699 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Sun, 25 Oct 2015 20:30:54 +0100
Subject: [PATCH 2/4] [compat] compat_etree_fromstring: only decode bytes
 objects

---
 test/test_compat.py  | 3 ++-
 youtube_dl/compat.py | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 2b0860479e..834f4bc552 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -74,9 +74,10 @@ def test_compat_shlex_split(self):
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
 
     def test_compat_etree_fromstring(self):
-        xml = '<el foo="bar"></el>'
+        xml = '<el foo="bar" spam="中文"></el>'
         doc = compat_etree_fromstring(xml.encode('utf-8'))
         self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
+        self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index cf10835ca5..f39d4e9a9e 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -216,8 +216,7 @@ def data_open(self, req):
 if sys.version_info[0] >= 3:
     compat_etree_fromstring = xml.etree.ElementTree.fromstring
 else:
-    # on python 2.x the the attributes of a node are str objects instead of
-    # unicode
+    # on python 2.x the the attributes of a node aren't always unicode objects
     etree = xml.etree.ElementTree
 
     # on 2.6 XML doesn't have a parser argument, function copied from CPython
@@ -231,7 +230,8 @@ def _XML(text, parser=None):
     def _element_factory(*args, **kwargs):
         el = etree.Element(*args, **kwargs)
         for k, v in el.items():
-            el.set(k, v.decode('utf-8'))
+            if isinstance(v, bytes):
+                el.set(k, v.decode('utf-8'))
         return el
 
     def compat_etree_fromstring(text):

From f78546272cf7c4b10c8003870728ab69bec982fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Mon, 26 Oct 2015 16:41:24 +0100
Subject: [PATCH 3/4] [compat] compat_etree_fromstring: also decode the text
 attribute

Deletes parse_xml from utils, because it also does it.
---
 test/test_compat.py             | 11 ++++++++++-
 youtube_dl/compat.py            | 18 ++++++++++++++++--
 youtube_dl/extractor/ard.py     |  4 ++--
 youtube_dl/extractor/generic.py |  4 ++--
 youtube_dl/utils.py             | 23 -----------------------
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/test/test_compat.py b/test/test_compat.py
index 834f4bc552..b6bfad05e3 100644
--- a/test/test_compat.py
+++ b/test/test_compat.py
@@ -74,10 +74,19 @@ def test_compat_shlex_split(self):
         self.assertEqual(compat_shlex_split('-option "one two"'), ['-option', 'one two'])
 
     def test_compat_etree_fromstring(self):
-        xml = '<el foo="bar" spam="中文"></el>'
+        xml = '''
+            <root foo="bar" spam="中文">
+                <normal>foo</normal>
+                <chinese>中文</chinese>
+                <foo><bar>spam</bar></foo>
+            </root>
+        '''
         doc = compat_etree_fromstring(xml.encode('utf-8'))
         self.assertTrue(isinstance(doc.attrib['foo'], compat_str))
         self.assertTrue(isinstance(doc.attrib['spam'], compat_str))
+        self.assertTrue(isinstance(doc.find('normal').text, compat_str))
+        self.assertTrue(isinstance(doc.find('chinese').text, compat_str))
+        self.assertTrue(isinstance(doc.find('foo/bar').text, compat_str))
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index f39d4e9a9e..2d43ec852c 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -216,9 +216,19 @@ def data_open(self, req):
 if sys.version_info[0] >= 3:
     compat_etree_fromstring = xml.etree.ElementTree.fromstring
 else:
-    # on python 2.x the the attributes of a node aren't always unicode objects
+    # on python 2.x the attributes and text of a node aren't always unicode
+    # objects
     etree = xml.etree.ElementTree
 
+    try:
+        _etree_iter = etree.Element.iter
+    except AttributeError:  # Python <=2.6
+        def _etree_iter(root):
+            for el in root.findall('*'):
+                yield el
+                for sub in _etree_iter(el):
+                    yield sub
+
     # on 2.6 XML doesn't have a parser argument, function copied from CPython
     # 2.7 source
     def _XML(text, parser=None):
@@ -235,7 +245,11 @@ def _element_factory(*args, **kwargs):
         return el
 
     def compat_etree_fromstring(text):
-        return _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        doc = _XML(text, parser=etree.XMLParser(target=etree.TreeBuilder(element_factory=_element_factory)))
+        for el in _etree_iter(doc):
+            if el.text is not None and isinstance(el.text, bytes):
+                el.text = el.text.decode('utf-8')
+        return doc
 
 try:
     from urllib.parse import parse_qs as compat_parse_qs
diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py
index 6f465789b4..73be6d2040 100644
--- a/youtube_dl/extractor/ard.py
+++ b/youtube_dl/extractor/ard.py
@@ -14,8 +14,8 @@
     parse_duration,
     unified_strdate,
     xpath_text,
-    parse_xml,
 )
+from ..compat import compat_etree_fromstring
 
 
 class ARDMediathekIE(InfoExtractor):
@@ -161,7 +161,7 @@ def _real_extract(self, url):
             raise ExtractorError('This program is only suitable for those aged 12 and older. Video %s is therefore only available between 20 pm and 6 am.' % video_id, expected=True)
 
         if re.search(r'[\?&]rss($|[=&])', url):
-            doc = parse_xml(webpage)
+            doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
                 return GenericIE()._extract_rss(url, video_id, doc)
 
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index ca5fbafb26..1de96b268c 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -9,6 +9,7 @@
 from .common import InfoExtractor
 from .youtube import YoutubeIE
 from ..compat import (
+    compat_etree_fromstring,
     compat_urllib_parse_unquote,
     compat_urllib_request,
     compat_urlparse,
@@ -21,7 +22,6 @@
     HEADRequest,
     is_html,
     orderedSet,
-    parse_xml,
     smuggle_url,
     unescapeHTML,
     unified_strdate,
@@ -1237,7 +1237,7 @@ def _real_extract(self, url):
 
         # Is it an RSS feed, a SMIL file or a XSPF playlist?
         try:
-            doc = parse_xml(webpage)
+            doc = compat_etree_fromstring(webpage.encode('utf-8'))
             if doc.tag == 'rss':
                 return self._extract_rss(url, video_id, doc)
             elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7d846d6808..c761ea22a4 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -1652,29 +1652,6 @@ def encode_dict(d, encoding='utf-8'):
     return dict((k.encode(encoding), v.encode(encoding)) for k, v in d.items())
 
 
-try:
-    etree_iter = xml.etree.ElementTree.Element.iter
-except AttributeError:  # Python <=2.6
-    etree_iter = lambda n: n.findall('.//*')
-
-
-def parse_xml(s):
-    class TreeBuilder(xml.etree.ElementTree.TreeBuilder):
-        def doctype(self, name, pubid, system):
-            pass  # Ignore doctypes
-
-    parser = xml.etree.ElementTree.XMLParser(target=TreeBuilder())
-    kwargs = {'parser': parser} if sys.version_info >= (2, 7) else {}
-    tree = xml.etree.ElementTree.XML(s.encode('utf-8'), **kwargs)
-    # Fix up XML parser in Python 2.x
-    if sys.version_info < (3, 0):
-        for n in etree_iter(tree):
-            if n.text is not None:
-                if not isinstance(n.text, compat_str):
-                    n.text = n.text.decode('utf-8')
-    return tree
-
-
 US_RATINGS = {
     'G': 0,
     'PG': 10,

From ae37338e681319a28d98dc551253d9fa1830969a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?=
 <jaime.marquinez.ferrandiz@gmail.com>
Date: Thu, 29 Oct 2015 13:58:40 +0100
Subject: [PATCH 4/4] [compat] compat_etree_fromstring: clarify comment

---
 youtube_dl/compat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py
index 2d43ec852c..a3e85264ac 100644
--- a/youtube_dl/compat.py
+++ b/youtube_dl/compat.py
@@ -216,8 +216,8 @@ def data_open(self, req):
 if sys.version_info[0] >= 3:
     compat_etree_fromstring = xml.etree.ElementTree.fromstring
 else:
-    # on python 2.x the attributes and text of a node aren't always unicode
-    # objects
+    # python 2.x tries to encode unicode strings with ascii (see the
+    # XMLParser._fixtext method)
     etree = xml.etree.ElementTree
 
     try: