gh-138907: Support RFC 9309 in robotparser (GH-138908)

* empty lines are always ignored instead of separating groups * the "user-agent" line after a rule starts a new group * groups matching the same user agent are now merged * the rule with the longest match wins instead of the first matching rule * in case of equal matches, the “Allow” rule wins over “Disallow” * special characters “$” and “*” are now supported in rules * prefer full match for user agent
2026-05-06 04:37:33 -04:00 · 2026-05-04 21:03:11 +03:00
parent c74cba16a3
commit bc285e5832
4 changed files with 441 additions and 111 deletions
@@ -18,7 +18,7 @@
 This module provides a single class, :class:`RobotFileParser`, which answers
 questions about whether or not a particular user agent can fetch a URL on the
 website that published the :file:`robots.txt` file.  For more details on the
-structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
+structure of :file:`robots.txt` files, see :rfc:`9309`.
 .. class:: RobotFileParser(url='')
@@ -15,14 +15,18 @@ class BaseRobotTest:
    good = []
    bad = []
    site_maps = None
    expected_output = None
    def __init_subclass__(cls):
        super().__init_subclass__()
        # Remove tests that do nothing.
-        if not cls.good:
+        if issubclass(cls, unittest.TestCase):
-            cls.test_good_urls = None
+            if not cls.good:
-        if not cls.bad:
+                cls.test_good_urls = None
-            cls.test_bad_urls = None
+            if not cls.bad:
                cls.test_bad_urls = None
            if cls.expected_output is None:
                cls.test_string_formatting = None
    def setUp(self):
        lines = io.StringIO(self.robots_txt).readlines()
@@ -50,6 +54,8 @@ class BaseRobotTest:
    def test_site_maps(self):
        self.assertEqual(self.parser.site_maps(), self.site_maps)
    def test_string_formatting(self):
        self.assertEqual(str(self.parser), self.expected_output)
 class UserAgentWildcardTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
@@ -61,6 +67,56 @@ Disallow: /foo.html
    good = ['/', '/test.html']
    bad = ['/cyberworld/map/index.html', '/tmp/xxx', '/foo.html']
 class SimpleExampleTest(BaseRobotTest, unittest.TestCase):
    # Example from RFC 9309, section 5.1.
    robots_txt = """\
 User-Agent: *
 Disallow: *.gif$
 Disallow: /example/
 Allow: /publications/
 User-Agent: foobot
 Disallow:/
 Allow:/example/page.html
 Allow:/example/allowed.gif
 User-Agent: barbot
 User-Agent: bazbot
 Disallow: /example/page.html
 User-Agent: quxbot
    """
    good = [
        '/', '/publications/',
        ('foobot', '/example/page.html'), ('foobot', '/example/allowed.gif'),
        ('barbot', '/'), ('barbot', '/example/'),
            ('barbot', '/example/allowed.gif'),
            ('barbot', '/example/disallowed.gif'),
            ('barbot', '/publications/'),
            ('barbot', '/publications/allowed.gif'),
        ('bazbot', '/'), ('bazbot', '/example/'),
            ('bazbot', '/example/allowed.gif'),
            ('bazbot', '/example/disallowed.gif'),
            ('bazbot', '/publications/'),
            ('bazbot', '/publications/allowed.gif'),
        ('quxbot', '/'), ('quxbot', '/example/'),
            ('quxbot', '/example/page.html'), ('quxbot', '/example/allowed.gif'),
            ('quxbot', '/example/disallowed.gif'),
            ('quxbot', '/publications/'),
            ('quxbot', '/publications/allowed.gif'),
        ]
    bad = [
        '/example/', '/example/page.html', '/example/allowed.gif',
            '/example/disallowed.gif',
            '/publications/allowed.gif',
        ('foobot', '/'), ('foobot', '/example/'),
            ('foobot', '/example/disallowed.gif'),
            ('foobot', '/publications/'),
            ('foobot', '/publications/allowed.gif'),
        ('barbot', '/example/page.html'),
        ('bazbot', '/example/page.html'),
    ]
 class CrawlDelayAndCustomAgentTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
@@ -102,7 +158,7 @@ class RejectAllRobotsTest(BaseRobotTest, unittest.TestCase):
 User-agent: *
 Disallow: /
    """
-    good = []
+    good = ['/robots.txt']
    bad = ['/cyberworld/map/index.html', '/', '/tmp/']
@@ -137,6 +193,7 @@ class BaseRequestRateTest(BaseRobotTest):
 class EmptyFileTest(BaseRequestRateTest, unittest.TestCase):
    robots_txt = ''
    good = ['/foo']
    expected_output = ''
 class CrawlDelayAndRequestRateTest(BaseRequestRateTest, unittest.TestCase):
@@ -203,35 +260,209 @@ Request-rate: whale/banana
 class UserAgentOrderingTest(BaseRobotTest, unittest.TestCase):
-    # the order of User-agent should be correct. note
+    # the order of User-agent should not matter
    # that this file is incorrect because "Googlebot" is a
    # substring of "Googlebot-Mobile"
    robots_txt = """\
 User-agent: Googlebot
 Disallow: /
 Allow: /folder1/
 User-agent: Googlebot-Mobile
 Allow: /
 Disallow: /folder1/
    """
    agent = 'Googlebot'
    bad = ['/something.jpg']
    good = ['/folder1/myfile.html']
 class UserAgentGoogleMobileTest(UserAgentOrderingTest):
-    agent = 'Googlebot-Mobile'
+    agent = 'Googlebot-mobile'
    bad = ['/folder1/myfile.html']
    good = ['/something.jpg']
-class GoogleURLOrderingTest(BaseRobotTest, unittest.TestCase):
+class LongestMatchTest(BaseRobotTest, unittest.TestCase):
-    # Google also got the order wrong. You need
+    # Based on example from RFC 9309, section 5.2.
    # to specify the URLs from more specific to more general
    robots_txt = """\
-User-agent: Googlebot
+User-agent: *
-Allow: /folder1/myfile.html
+Allow: /example/page/
-Disallow: /folder1/
+Disallow: /example/page/disallowed.gif
 Allow: /example/
    """
-    agent = 'googlebot'
+    good = ['/example/', '/example/page/']
-    good = ['/folder1/myfile.html']
+    bad = ['/example/page/disallowed.gif']
-    bad = ['/folder1/anotherfile.html']
+
 class LongestMatchWildcardTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
 Allow: /example/page/
 Disallow: *.gif
 Allow: /example/
    """
    good = ['/example/', '/example/page/']
    bad = ['/example/page/disallowed.gif', '/x.gif']
 class AllowWinsEqualMatchTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
 Disallow: /spam
 Allow: /spam
 Disallow: /spam
    """
    good = ['/spam', '/spam/']
 class AllowWinsEqualFullMatchTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
 Disallow: /spam
 Allow: /spam$
 Disallow: /spam
 Disallow: /eggs$
 Allow: /eggs
 Disallow: /eggs$
    """
    good = ['/spam', '/eggs', '/eggs/']
    bad = ['/spam/']
 class AllowWinsEqualMatchWildcardTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
 Disallow: /spam
 Allow: *am
 Disallow: /spam
 Disallow: *gs
 Allow: /eggs
 Disallow: *gs
    """
    good = ['/spam', '/eggs', '/spam/', '/eggs/']
 class MergeGroupsTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: spambot
 Disallow: /some/path
 User-agent: spambot
 Disallow: /another/path
    """
    agent = 'spambot'
    bad = ['/some/path', '/another/path']
 class UserAgentStartsGroupTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: spambot
 Disallow: /some/path
 User-agent: eggsbot
 Disallow: /another/path
    """
    good = [('spambot', '/'), ('spambot', '/another/path'),
            ('eggsbot', '/'), ('eggsbot', '/some/path')]
    bad = [('spambot', '/some/path'), ('eggsbot', '/another/path')]
    expected_output = """\
 User-agent: spambot
 Disallow: /some/path
 User-agent: eggsbot
 Disallow: /another/path\
 """
 class IgnoreEmptyLinesTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: spambot
 User-agent: eggsbot
 Disallow: /some/path
 Disallow: /another/path
    """
    good = [('spambot', '/'), ('eggsbot', '/')]
    bad = [
        ('spambot', '/some/path'), ('spambot', '/another/path'),
        ('eggsbot', '/some/path'), ('eggsbot', '/another/path'),
    ]
    expected_output = """\
 User-agent: spambot
 User-agent: eggsbot
 Disallow: /some/path
 Disallow: /another/path\
 """
 class IgnoreRulesWithoutUserAgentTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 Disallow: /some/path
 User-agent: *
 Disallow: /another/path
    """
    good = ['/', '/some/path']
    bad = ['/another/path']
    expected_output = """\
 User-agent: *
 Disallow: /another/path\
 """
 class EmptyGroupTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
 Disallow: /some/path
 User-agent: spambot
    """
    agent = 'spambot'
    good = ['/', '/some/path']
    expected_output = """\
 User-agent: *
 Disallow: /some/path
 User-agent: spambot
 Allow:\
 """
 class WeirdPathTest(BaseRobotTest, unittest.TestCase):
    robots_txt = f"""\
 User-agent: *
 Disallow: /a$$$
 Disallow: /b$z
 Disallow: /c***
 Disallow: /d***z
 Disallow: /e*$**$$
 Disallow: /f*$**$$z
 Disallow: /g$*$$**
 Disallow: /h$*$$**z
    """
    good = ['/ax', '/a$$', '/b', '/bz', '/b$z', '/d', '/f', '/fz',
            '/f$$$z', '/fx$y$$z', '/gx', '/g$$$', '/g$x$$y', '/h', '/hz',
            '/h$$$z', '/h$x$$yz']
    bad = ['/a', '/c', '/cxy', '/dz', '/dxyz', '/dxzy', '/e', '/exy',
           '/e$$', '/ex$y$', '/g']
    expected_output = """\
 User-agent: *
 Disallow: /a$
 Disallow: /c*
 Disallow: /d*z
 Disallow: /e*$
 Disallow: /g$\
 """
 class PathWithManyWildcardsTest(BaseRobotTest, unittest.TestCase):
    # This test would take many years if use naive translation to regular
    # expression (* -> .*).
    N = 50
    robots_txt = f"""\
 User-agent: *
 Disallow: /{'*a'*N}*b
    """
    good = ['/' + 'a'*N + 'a']
    bad = ['/' + 'a'*N + 'b']
 class DisallowQueryStringTest(BaseRobotTest, unittest.TestCase):
@@ -245,25 +476,13 @@ Disallow: /yet/one/path?name=value&more
    good = ['/some/path', '/some/path?',
            '/some/path%3Fname=value', '/some/path?name%3Dvalue',
            '/another/path', '/another/path%3F',
-            '/yet/one/path?name=value%26more']
+            '/yet/one/path?name=value%26more',
            '/some/pathxname=value']
    bad = ['/some/path?name=value'
           '/another/path?', '/another/path?name=value',
           '/yet/one/path?name=value&more']
 class UseFirstUserAgentWildcardTest(BaseRobotTest, unittest.TestCase):
    # obey first * entry (#4108)
    robots_txt = """\
 User-agent: *
 Disallow: /some/path
 User-agent: *
 Disallow: /another/path
    """
    good = ['/another/path']
    bad = ['/some/path']
 class PercentEncodingTest(BaseRobotTest, unittest.TestCase):
    robots_txt = """\
 User-agent: *
@@ -365,17 +584,60 @@ Disallow: /some/path
    """
    expected_output = """\
 User-agent: cybermapper
 Disallow: /some/path
 User-agent: *
 Crawl-delay: 1
 Request-rate: 3/15
-Disallow: /cyberworld/map/\
+Disallow: /cyberworld/map/
 User-agent: cybermapper
 Disallow: /some/path\
 """
-    def test_string_formatting(self):
+
-        self.assertEqual(str(self.parser), self.expected_output)
+class ConstructedStringFormattingTest(unittest.TestCase):
    def test_empty(self):
        parser = urllib.robotparser.RobotFileParser()
        self.assertEqual(str(parser), '')
    def test_group_without_rules(self):
        parser = urllib.robotparser.RobotFileParser()
        entry = urllib.robotparser.Entry()
        entry.useragents = ['spambot']
        parser._add_entry(entry)
        entry = urllib.robotparser.Entry()
        entry.useragents = ['hambot']
        entry.rulelines = [urllib.robotparser.RuleLine('/ham', False)]
        parser._add_entry(entry)
        entry = urllib.robotparser.Entry()
        entry.useragents = ['eggsbot']
        parser._add_entry(entry)
        self.assertEqual(str(parser), """\
 User-agent: spambot
 Allow:
 User-agent: hambot
 Disallow: /ham
 User-agent: eggsbot
 Allow:\
 """)
    def test_group_without_user_agent(self):
        parser = urllib.robotparser.RobotFileParser()
        entry = urllib.robotparser.Entry()
        entry.rulelines = [urllib.robotparser.RuleLine('/ham', False)]
        parser._add_entry(entry)
        entry = urllib.robotparser.Entry()
        entry.useragents = ['spambot']
        entry.rulelines = [urllib.robotparser.RuleLine('/spam', False)]
        parser._add_entry(entry)
        entry = urllib.robotparser.Entry()
        entry.rulelines = [urllib.robotparser.RuleLine('/eggs', False)]
        parser._add_entry(entry)
        self.assertEqual(str(parser), """\
 User-agent: spambot
 Disallow: /spam\
 """)
@unittest.skipUnless(
@@ -495,7 +757,7 @@ class NetworkTestCase(unittest.TestCase):
    def test_can_fetch(self):
        self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
        self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
-        self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
+        self.assertTrue(self.parser.can_fetch('Nutch', self.url('brian')))
        self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
        self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
        self.assertTrue(self.parser.can_fetch('*', self.base_url))
@@ -7,7 +7,7 @@
    2) PSF license for Python 2.2
    The robots.txt Exclusion Protocol is implemented as specified in
-    http://www.robotstxt.org/norobots-rfc.txt
+    RFC 9309
 """
 import collections
@@ -21,19 +21,6 @@ __all__ = ["RobotFileParser"]
 RequestRate = collections.namedtuple("RequestRate", "requests seconds")
 def normalize(path):
    unquoted = urllib.parse.unquote(path, errors='surrogateescape')
    return urllib.parse.quote(unquoted, errors='surrogateescape')
 def normalize_path(path):
    path, sep, query = path.partition('?')
    path = normalize(path)
    if sep:
        query = re.sub(r'[^=&]+', lambda m: normalize(m[0]), query)
        path += '?' + query
    return path
 class RobotFileParser:
    """ This class provides a set of methods to read, parse and answer
    questions about a single robots.txt file.
@@ -42,6 +29,7 @@ class RobotFileParser:
    def __init__(self, url=''):
        self.entries = []
        self.groups = {}
        self.sitemaps = []
        self.default_entry = None
        self.disallow_all = False
@@ -86,13 +74,13 @@ class RobotFileParser:
            self.parse(raw.decode("utf-8", "surrogateescape").splitlines())
    def _add_entry(self, entry):
-        if "*" in entry.useragents:
+        self.entries.append(entry)
-            # the default entry is considered last
+        for agent in entry.useragents:
-            if self.default_entry is None:
+            agent = agent.lower()
-                # the first default entry wins
+            if agent not in self.groups:
-                self.default_entry = entry
+                self.groups[agent] = entry
-        else:
+            else:
-            self.entries.append(entry)
+                self.groups[agent] = merge_entries(self.groups[agent], entry)
    def parse(self, lines):
        """Parse the input lines from a robots.txt file.
@@ -100,6 +88,7 @@ class RobotFileParser:
        We allow that a user-agent: line is not preceded by
        one or more blank lines.
        """
        entries = []
        # states:
        #   0: start state
        #   1: saw user-agent line
@@ -109,14 +98,6 @@ class RobotFileParser:
        self.modified()
        for line in lines:
            if not line:
                if state == 1:
                    entry = Entry()
                    state = 0
                elif state == 2:
                    self._add_entry(entry)
                    entry = Entry()
                    state = 0
            # remove optional comment and strip line
            i = line.find('#')
            if i >= 0:
@@ -132,16 +113,23 @@ class RobotFileParser:
                    if state == 2:
                        self._add_entry(entry)
                        entry = Entry()
-                    entry.useragents.append(line[1])
+                    product_token = line[1]
                    entry.useragents.append(product_token)
                    state = 1
                elif line[0] == "disallow":
                    if state != 0:
                        entry.rulelines.append(RuleLine(line[1], False))
                        state = 2
                        try:
                            entry.rulelines.append(RuleLine(line[1], False))
                        except ValueError:
                            pass
                elif line[0] == "allow":
                    if state != 0:
                        entry.rulelines.append(RuleLine(line[1], True))
                        state = 2
                        try:
                            entry.rulelines.append(RuleLine(line[1], True))
                        except ValueError:
                            pass
                elif line[0] == "crawl-delay":
                    if state != 0:
                        # before trying to convert to int we need to make
@@ -164,9 +152,18 @@ class RobotFileParser:
                    #  so it doesn't matter where you place it in your file."
                    # Therefore we do not change the state of the parser.
                    self.sitemaps.append(line[1])
-        if state == 2:
+        if state != 0:
            self._add_entry(entry)
    def _find_entry(self, useragent):
        entry = self.groups.get(useragent.lower())
        if entry is not None:
            return entry
        for entry in self.groups.values():
            if entry.applies_to(useragent):
                return entry
        return self.groups.get('*')
    def can_fetch(self, useragent, url):
        """using the parsed robots.txt decide if useragent can fetch url"""
        if self.disallow_all:
@@ -179,43 +176,36 @@ class RobotFileParser:
        # calls can_fetch() before calling read().
        if not self.last_checked:
            return False
        # search for given user agent matches
        # the first match counts
        # TODO: The private API is used in order to preserve an empty query.
        # This is temporary until the public API starts supporting this feature.
        parsed_url = urllib.parse._urlsplit(url, '')
        url = urllib.parse._urlunsplit(None, None, *parsed_url[2:])
-        url = normalize_path(url)
+        url = normalize_uri(url)
        if not url:
            url = "/"
-        for entry in self.entries:
+        if url == '/robots.txt':
-            if entry.applies_to(useragent):
+            # The /robots.txt URI is implicitly allowed.
-                return entry.allowance(url)
+            return True
-        # try the default entry last
+        entry = self._find_entry(useragent)
-        if self.default_entry:
+        if entry is None:
-            return self.default_entry.allowance(url)
+            return True
-        # agent not found ==> access granted
+        return entry.allowance(url)
        return True
    def crawl_delay(self, useragent):
        if not self.mtime():
            return None
-        for entry in self.entries:
+        entry = self._find_entry(useragent)
-            if entry.applies_to(useragent):
+        if entry is None:
-                return entry.delay
+            return None
-        if self.default_entry:
+        return entry.delay
            return self.default_entry.delay
        return None
    def request_rate(self, useragent):
        if not self.mtime():
            return None
-        for entry in self.entries:
+        entry = self._find_entry(useragent)
-            if entry.applies_to(useragent):
+        if entry is None:
-                return entry.req_rate
+            return None
-        if self.default_entry:
+        return entry.req_rate
            return self.default_entry.req_rate
        return None
    def site_maps(self):
        if not self.sitemaps:
@@ -226,7 +216,7 @@ class RobotFileParser:
        entries = self.entries
        if self.default_entry is not None:
            entries = entries + [self.default_entry]
-        return '\n\n'.join(map(str, entries))
+        return '\n\n'.join(filter(None, map(str, entries)))
 class RuleLine:
    """A rule line is a single "Allow:" (allowance==True) or "Disallow:"
@@ -235,14 +225,42 @@ class RuleLine:
        if path == '' and not allowance:
            # an empty value means allow all
            allowance = True
-        self.path = normalize_path(path)
+        path = re.sub(r'[*]{2,}', '*', path)
        path = re.sub(r'[$][$*]+', '$', path)
        path = normalize_pattern(path)
        self.fullmatch = path.endswith('$')
        path = path.rstrip('$')
        if '$' in path:
            raise ValueError('$ not at the end of path')
        self.matcher = None
        if '*' in path:
            pattern = re.compile(translate_pattern(path), re.DOTALL)
            if self.fullmatch:
                self.matcher = pattern.fullmatch
            else:
                self.matcher = pattern.match
        self.path = path
        self.allowance = allowance
    def applies_to(self, filename):
-        return self.path == "*" or filename.startswith(self.path)
+        # If the filename matches the rule, return the matching length plus 1.
        # If it does not match, return 0.
        if self.matcher is not None:
            m = self.matcher(filename)
            if m:
                return m.end() + 1
        else:
            if self.fullmatch:
                if filename == self.path:
                    return len(self.path) + 1
            else:
                if filename.startswith(self.path):
                    return len(self.path) + 1
        return 0
    def __str__(self):
-        return ("Allow" if self.allowance else "Disallow") + ": " + self.path
+        return (("Allow" if self.allowance else "Disallow") + ": " + self.path
                + ('$' if self.fullmatch else ''))
 class Entry:
@@ -254,6 +272,8 @@ class Entry:
        self.req_rate = None
    def __str__(self):
        if not self.useragents:
            return ''
        ret = []
        for agent in self.useragents:
            ret.append(f"User-agent: {agent}")
@@ -262,27 +282,74 @@ class Entry:
        if self.req_rate is not None:
            rate = self.req_rate
            ret.append(f"Request-rate: {rate.requests}/{rate.seconds}")
-        ret.extend(map(str, self.rulelines))
+        if self.rulelines:
            ret.extend(map(str, self.rulelines))
        else:
            ret.append("Allow:")
        return '\n'.join(ret)
    def applies_to(self, useragent):
        """check if this entry applies to the specified agent"""
        if useragent is None:
            return '*' in self.useragents
        # split the name token and make it lower case
        useragent = useragent.split("/")[0].lower()
        for agent in self.useragents:
-            if agent == '*':
+            if agent != '*':
-                # we have the catch-all agent
+                agent = agent.lower()
-                return True
+                if agent in useragent:
-            agent = agent.lower()
+                    return True
            if agent in useragent:
                return True
        return False
    def allowance(self, filename):
        """Preconditions:
        - our agent applies to this entry
-        - filename is URL encoded"""
+        - filename is URL encoded
        """
        best_match = -1
        allowance = True
        for line in self.rulelines:
-            if line.applies_to(filename):
+            m = line.applies_to(filename)
-                return line.allowance
+            if m:
-        return True
+                if m > best_match:
                    best_match = m
                    allowance = line.allowance
                elif m == best_match and not allowance:
                    allowance = line.allowance
        return allowance
 def normalize(path):
    unquoted = urllib.parse.unquote(path, errors='surrogateescape')
    return urllib.parse.quote(unquoted, errors='surrogateescape')
 def normalize_uri(path):
    path, sep, query = path.partition('?')
    path = normalize(path)
    if sep:
        query = re.sub(r'[^=&]+', lambda m: normalize(m[0]), query)
        path += '?' + query
    return path
 def normalize_pattern(path):
    path, sep, query = path.partition('?')
    path = re.sub(r'[^*$]+', lambda m: normalize(m[0]), path)
    if sep:
        query = re.sub(r'[^=&*$]+', lambda m: normalize(m[0]), query)
        path += '?' + query
    return path
 def translate_pattern(path):
    parts = list(map(re.escape, path.split('*')))
    for i in range(1, len(parts)-1):
        parts[i] = f'(?>.*?{parts[i]})'
    parts[-1] = f'.*{parts[-1]}'
    return ''.join(parts)
 def merge_entries(e1, e2):
    entry = Entry()
    entry.useragents = list(filter(set(e2.useragents).__contains__, e1.useragents))
    entry.rulelines = e1.rulelines + e2.rulelines
    entry.delay = e1.delay if e2.delay is None else e2.delay
    entry.req_rate = e1.req_rate if e2.req_rate is None else e2.req_rate
    return entry
@@ -0,0 +1 @@
 Support :rfc:`9309` in :mod:`urllib.robotparser`.
		`@@ -0,0 +1 @@`
							Support :rfc:`9309` in :mod:`urllib.robotparser`.