1
+ import os
2
+ import unittest
3
+ import pytest
4
+
5
+ from unittest .mock import patch , MagicMock
6
+ from contentmap .sitemap import SitemapToContentDatabase
7
+
8
+
9
+ class TestSitemapToContentDatabase (unittest .TestCase ):
10
+ def create_mock_response (self , content ):
11
+ mock_response = MagicMock ()
12
+ mock_response .content = content
13
+ return mock_response
14
+
15
+ def generate_sample_sitemap_xml (self , url ):
16
+ return f'''
17
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
18
+ <url>
19
+ <loc>{ url } </loc>
20
+ </url>
21
+ </urlset>'''
22
+ @patch ('contentmap.sitemap.requests.get' )
23
+ def test_get_urls_given_one_sitemap_url (self , mock_get ):
24
+ mock_get .return_value = self .create_mock_response (self .generate_sample_sitemap_xml ('https://www.example.com/docs/en/example/?topic=testing' ))
25
+
26
+ sitemap_db = SitemapToContentDatabase (sitemap_sources = ['https://example.com/sitemap.xml' ], source_type = 'url' )
27
+ urls = sitemap_db .get_urls ()
28
+
29
+ self .assertEqual (urls , ['https://www.example.com/docs/en/example/?topic=testing' ])
30
+ mock_get .assert_called_once_with ('https://example.com/sitemap.xml' )
31
+
32
+
33
+ @patch ('contentmap.sitemap.requests.get' )
34
+ def test_get_urls_given_multiple_sitemap_urls (self , mock_get ):
35
+ mock_get .side_effect = [
36
+ self .create_mock_response (self .generate_sample_sitemap_xml ('https://www.example.com/docs/en/example/?topic=testing' )),
37
+ self .create_mock_response (self .generate_sample_sitemap_xml ('https://www.anotherexample.com/docs/en/example/?topic=contact-us' ))
38
+ ]
39
+
40
+ sitemap_db = SitemapToContentDatabase (sitemap_sources = ['https://example.com/sitemap.xml' , 'https://anotherexample.com/sitemap.xml' ], source_type = 'url' )
41
+ urls = sitemap_db .get_urls ()
42
+
43
+ self .assertEqual (urls , [
44
+ 'https://www.example.com/docs/en/example/?topic=testing' ,
45
+ 'https://www.anotherexample.com/docs/en/example/?topic=contact-us'
46
+ ])
47
+ mock_get .assert_any_call ('https://example.com/sitemap.xml' )
48
+ mock_get .assert_any_call ('https://anotherexample.com/sitemap.xml' )
49
+ self .assertEqual (mock_get .call_count , 2 )
50
+
51
+ def test_get_urls_given_one_location_on_disk (self ):
52
+ sitemap_folder_a_path = os .path .join (os .path .dirname (__file__ ), 'fixtures' , 'sitemap_folder_a' )
53
+ sitemap_db = SitemapToContentDatabase (sitemap_sources = [sitemap_folder_a_path ], source_type = 'disk' )
54
+ urls = sitemap_db .get_urls ()
55
+
56
+ self .assertEqual (urls , ['https://www.example.com/docs/en/example/?topic=testing' ,
57
+ 'https://www.example.com/docs/en/example/?topic=contact-us'
58
+ ])
59
+
60
+
61
+ def test_get_urls_given_multiple_locations_on_disk (self ):
62
+ sitemap_folder_a_path = os .path .join (os .path .dirname (__file__ ), 'fixtures' , 'sitemap_folder_a' )
63
+ sitemap_folder_b_path = os .path .join (os .path .dirname (__file__ ), 'fixtures' , 'sitemap_folder_b' )
64
+ sitemap_db = SitemapToContentDatabase (sitemap_sources = [sitemap_folder_a_path , sitemap_folder_b_path ], source_type = 'disk' )
65
+ urls = sitemap_db .get_urls ()
66
+
67
+ self .assertEqual (urls , ['https://www.example.com/docs/en/example/?topic=testing' ,
68
+ 'https://www.example.com/docs/en/example/?topic=contact-us' ,
69
+ 'https://www.example.com/docs/en/example/?topic=library-overview' ,
70
+ 'https://www.example.com/docs/en/example/?topic=about-this-content'
71
+ ])
0 commit comments