InvalidLinkBear: XML Namespaces

satwikkansal · satwikkansal · commit 7f99537f706d · 2017-04-05T16:31:29.000+05:30
Reduce the severity of XML Namespace results to INFO. Related to #1239
diff --git a/bears/general/InvalidLinkBear.py b/bears/general/InvalidLinkBear.py
@@ -80,17 +80,25 @@ def extract_links_from_file(file, link_ignore_regex, link_ignore_list):
                                         # Unbalanced parenthesis
             (?<!\.)(?<!,)               # Exclude trailing `.` or `,` from URL
             """, re.VERBOSE)
-
+        file_context = {}
         for line_number, line in enumerate(file):
+            xmlns_regex = re.compile(r'xmlns:?\w*="(.*)"')
             for match in re.findall(regex, line):
                 link = match[0]
-                context = enum(
-                    pip_vcs_url=False)
-                if link.startswith(('hg+', 'bzr+', 'git+', 'svn+')):
-                    context.pip_vcs_url = True
+                link_context = file_context.get(link)
+                if not link_context:
+                    link_context = enum(
+                        xml_namespace=False,
+                        pip_vcs_url=False)
+                    xmlns_match = xmlns_regex.search(line)
+                    if xmlns_match and link in xmlns_match.groups():
+                        link_context.xml_namespace = True
+                    if link.startswith(('hg+', 'bzr+', 'git+', 'svn+')):
+                        link_context.pip_vcs_url = True
+                    file_context[link] = link_context
                 if not (link_ignore_regex.search(link) or
                         fnmatch(link, link_ignore_list)):
-                    yield link, line_number, context
+                    yield link, line_number, link_context
 
     def analyze_links_in_file(self, file, network_timeout, link_ignore_regex,
                               link_ignore_list):
@@ -149,7 +157,18 @@ def run(self, filename, file,
 
         for line_number, link, code, context in self.analyze_links_in_file(
                 file, network_timeout, link_ignore_regex, link_ignore_list):
-            if code is None:
+            if context.xml_namespace:
+                if code and 200 <= code < 300:
+                    pass
+                else:
+                    yield Result.from_values(
+                        origin=self,
+                        message=('XML Namespace - '
+                                 '{url}').format(url=link),
+                        file=filename,
+                        line=line_number,
+                        severity=RESULT_SEVERITY.INFO)
+            elif code is None:
                 yield Result.from_values(
                     origin=self,
                     message=('Broken link - unable to connect to '
diff --git a/tests/general/InvalidLinkBearTest.py b/tests/general/InvalidLinkBearTest.py
@@ -230,6 +230,28 @@ def test_pip_vcs_url(self):
         for line in brokenlink_at_hash.splitlines():
             self.assertResult(invalid_file=[line])
 
+    def test_xml_namespaces(self):
+        valid_file = """
+        #Namespace and also a valid link
+        <ruleset name="test" xmlns="http://httpbin.org/status/200">
+
+        # xml where xmlns: and xsi:schema are valid links
+        <ruleset name="test" xmlns="http://xmlnamespace.org/status/200"
+        xmlns:xsi="http://xmlnamespace.org/status/200"
+        xsi:schemaLocation="http://xmlnamespace.org/status/200">
+        """.splitlines()
+
+        self.assertResult(valid_file=valid_file)
+
+        invalid_file = """
+        <ruleset name="test" xmlns="http://this.isa.namespace/ruleset/7.0.0"
+        xmlns:xsi="http://this.is.another/kindof/namespace"
+        xsi:schemaLocation="http://this.namespace.dosent/exists/7.0.0"
+        xsi:schemaLocation="http://httpbin.com/404">""".splitlines()
+
+        for line in invalid_file[1:]:
+            self.assertResult(invalid_file=[line])
+
     def test_links_to_ignore(self):
         valid_file = """http://httpbin.org/status/200
         http://httpbin.org/status/201