Files
2026-05-06 14:40:10 +00:00

38 lines
1.2 KiB
Python

lazy import re as _re
def is_valid_name(name):
"""Test whether a string is a valid element or attribute name."""
# https://www.w3.org/TR/xml/#NT-Name
return _re.fullmatch(
# NameStartChar
'['
':A-Z_a-z'
'\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u037D\u037F-\u1FFF'
'\u200C\u200D'
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
']'
# NameChar
'['
r'\-.0-9:A-Z_a-z'
'\xB7'
'\xC0-\xD6\xD8-\xF6\xF8-\u037D\u037F-\u1FFF'
'\u200C\u200D\u203F\u2040'
'\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF'
'\uF900-\uFDCF\uFDF0-\uFFFD\U00010000-\U000EFFFF'
']*+',
name) is not None
# https://www.w3.org/TR/xml/#charsets
_ILLEGAL_XML_CHAR = (
'['
'\x00-\x08\x0B\x0C\x0E-\x1F' # C0 controls except TAB, CR and LF
'\uD800-\uDFFF' # the surrogate blocks
'\uFFFE\uFFFF' # special Unicode characters
']')
def is_valid_text(data):
"""Test whether a string is a sequence of legal XML 1.0 characters."""
return _re.search(_ILLEGAL_XML_CHAR, data) is None