Expensify · marcaaron · Mar 19, 2024 · Mar 12, 2024 · Mar 19, 2024 · Mar 19, 2024
@@ -1779,14 +1779,14 @@ describe('when should keep raw input flag is enabled', () => {
         });
     });
 });
-    
+
 test('Test code fence within inline code', () => {
     let testString = 'Hello world `(```test```)` Hello world';
     expect(parser.replace(testString)).toBe('Hello world &#x60;(<pre>test</pre>)&#x60; Hello world');
-    
+
     testString = 'Hello world `(```test\ntest```)` Hello world';
     expect(parser.replace(testString)).toBe('Hello world &#x60;(<pre>test<br />test</pre>)&#x60; Hello world');
-    
+
     testString = 'Hello world ```(`test`)``` Hello world';
     expect(parser.replace(testString)).toBe('Hello world <pre>(&#x60;test&#x60;)</pre> Hello world');
 
@@ -1893,12 +1893,9 @@ describe('Image markdown conversion to html tag', () => {
         expect(parser.replace(testString)).toBe(resultString);
     });
 
-    // Currently any markdown used inside the square brackets is converted to html string in the alt attribute
-    // The attributes should only contain plain text, but it doesn't seem possible to convert markdown to plain text
-    // or let the parser know not to convert markdown to html for html attributes
-    xtest('Image with alt text containing markdown', () => {
-        const testString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
-        const resultString = '<img src="https://example.com/image.png" alt="*bold* _italic_ ~strike~" />';
+    test('Image with alt text containing markdown', () => {
+        const testString = '![# fake-heading *bold* _italic_ ~strike~ [:-)]](https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" alt="# fake-heading &ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126; &lbrack;:-)&rbrack;" />';
         expect(parser.replace(testString)).toBe(resultString);
     });
 
@@ -1934,4 +1931,10 @@ describe('Image markdown conversion to html tag', () => {
         const resultString = '<img src=\"https://example.com/image.png\" alt=\"test&quot; onerror=&quot;alert(&#x27;xss&#x27;)\" />';
         expect(parser.replace(testString)).toBe(resultString);
     });
+
+    test('No html inside the src attribute', () => {
+        const testString = '![`code`](https://example.com/image.png)';
+        const resultString = '<img src="https://example.com/image.png" alt="<code>code</code>" />';
+        expect(parser.replace(testString)).toBe(resultString);
+    })
 });
@@ -769,4 +769,10 @@ describe('Image tag conversion to markdown', () => {
         const resultString = '![https://example.com/image.png](https://example.com/image.png)';
         expect(parser.htmlToMarkdown(testString)).toBe(resultString);
    });
+
+    test('Image with alt text containing escaped markdown', () => {
+        const testString = '<img src="https://example.com/image.png" alt="&ast;bold&ast; &lowbar;italic&lowbar; &#126;strike&#126;" />';
+        const resultString = '![*bold* _italic_ ~strike~](https://example.com/image.png)';
+        expect(parser.htmlToMarkdown(testString)).toBe(resultString);
+    });
 });
@@ -113,12 +113,13 @@
              * Converts markdown style images to img tags e.g. ![Expensify](https://www.expensify.com/attachment.png)
              * We need to convert before linking rules since they will not try to create a link from an existing img
              * tag.
+             * Additional sanitization is done to the alt attribute to prevent parsing it further to html by later rules.
              */
             {
                 name: 'image',
                 regex: MARKDOWN_IMAGE_REGEX,
-                replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${g1}" />`,
-                rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${g1}" data-raw-href="${g2}" data-link-variant="labeled" />`
+                replacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" />`,
+                rawInputReplacement: (match, g1, g2) => `<img src="${Str.sanitizeURL(g2)}" alt="${this.escapeMarkdownEntities(g1)}" data-raw-href="${g2}" data-link-variant="labeled" />`
             },
 
             /**
@@ -152,7 +153,7 @@
             */
            {
                name: 'hereMentions',
                regex: /([a-zA-Z0-9.!$%&+/=?^`{|}_-]?)(@here)([.!$%&+/=?^`{|}_-]?)(?=\b)(?!([\w'#%+-]*@(?:[a-z\d-]+\.)+[a-z]{2,}(?:\s|$|@here))|((?:(?!<a).)+)?<\/a>|[^<]*(<\/pre>|<\/code>))/gm,
                replacement: (match, g1, g2, g3) => {
                    if (!Str.isValidMention(match)) {
                        return match;
@@ -197,7 +198,7 @@
 
                 process: (textToProcess, replacement) => {
                     const regex = new RegExp(
-                        `(?![^<]*>|[^<>]*<\\/(?!h1>))([_*~]*?)${MARKDOWN_URL_REGEX}\\1(?!((?:(?!<a).)+)?<\\/a>|[^<]*(<\\/pre>|<\\/code>))`,
+                        `(?![^<]*>|[^<>]*<\\/(?!h1>))([_*~]*?)${MARKDOWN_URL_REGEX}\\1(?!((?:(?!<a).)+)?<\\/a>|[^<]*(<\\/pre>|<\\/code>|.+\\/>))`,
                         'gi',
                     );
                     return this.modifyTextForUrlLinks(regex, textToProcess, replacement);
@@ -945,4 +946,27 @@
         const linksInNew = this.extractLinksInMarkdownComment(newComment);
         return linksInOld === undefined || linksInNew === undefined ? [] : _.difference(linksInOld, linksInNew);
     }
+
+    /**
+     * Replace MD characters with their HTML entity equivalent
+     * @param {String} text
+     * @return {String}
+     */
+    escapeMarkdownEntities(text) {
+        // A regex pattern matching special MD characters we'd like to escape
+        const pattern = /([*_{}[\]~])/g;
+
+        // A map of MD characters to their HTML entity equivalent
+        const entities = {
+            '*': '&ast;',
+            _: '&lowbar;',
+            '{': '&lbrace;',
+            '}': '&rbrace;',
+            '[': '&lbrack;',
+            ']': '&rbrack;',
+            '~': '&#126;',
+        };
+
+        return text.replace(pattern, char => entities[char] || char);
+    }
 }