Skip to content

Commit 6165b66

Browse files
Script for aggregating guides and analytics (linode#6983)
* Test * Updates to script * reset changes made to the update-frontmatter.py script
1 parent 3175e44 commit 6165b66

File tree

1 file changed

+219
-0
lines changed

1 file changed

+219
-0
lines changed
+219
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
import os
2+
import frontmatter
3+
import csv
4+
import argparse
5+
6+
# Define command-line arguements
7+
parser = argparse.ArgumentParser(description='Script that generatees a CSV file of guides and processes actions added to that file.')
8+
parser.add_argument('--process', help="Process actions added to the CSV file", default=False, action=argparse.BooleanOptionalAction)
9+
ARGS = parser.parse_args()
10+
11+
# Identifies if --process arguement has been used and, if so, set PROCESS_MODE to true
12+
PROCESS_MODE = False
13+
if ARGS.process:
14+
PROCESS_MODE = True
15+
16+
# Define the Guide class
17+
class Guide:
18+
def __init__(self, slug, root, path, title, uri, published, modified, deprecated, deprecated_link):
19+
self.slug = slug
20+
self.root = root
21+
self.path = path
22+
self.title = title
23+
self.uri = uri
24+
self.published = published
25+
self.modified = modified
26+
self.deprecated = deprecated
27+
self.deprecated_link = deprecated_link
28+
29+
# Define the path used for all input and output csv files
30+
data_path = "../data/"
31+
32+
# Path and name for the file that will be generated that contains all guides and analaytics data
33+
aggregate_file = data_path + "guides.csv"
34+
# Path and name for the file that contains Google Search Console data
35+
search_file = data_path + "search_data.csv"
36+
# Path and name for the file that contains Adobe Analytics data
37+
analytics_file = data_path + "analytics_data.csv"
38+
# Path and name for the file that contains actions to be performed on guides (a modified version of the aggregate_file)
39+
process_file = data_path + "guides_process.csv"
40+
41+
# ------------------
42+
# Generate an array of all guides
43+
# ------------------
44+
def get_guides():
45+
46+
guides = []
47+
48+
guides_dir = "docs/guides"
49+
50+
# Iterate through each file in each docs directory
51+
for root, dirs, files in os.walk(guides_dir):
52+
for file in files:
53+
54+
# The relative file path of the file
55+
file_path = os.path.join(root, file)
56+
path_segments = file_path.split("/")
57+
58+
# If the file is markdown..
59+
if file.endswith('.md'):
60+
try:
61+
# Loads the entire guide (including front matter)
62+
expanded_guide = frontmatter.load(file_path)
63+
64+
# Ignores the guide if it's headless
65+
if "headless" in expanded_guide.keys():
66+
if expanded_guide["headless"] == True:
67+
continue
68+
69+
# If the guide is in the guides section...
70+
if "slug" in expanded_guide.keys() and "docs/guides/" in file_path:
71+
72+
# Get various front matter parameters from the guide
73+
slug = expanded_guide['slug']
74+
title = expanded_guide['title']
75+
published = expanded_guide['published']
76+
if "modified" in expanded_guide.keys():
77+
modified = expanded_guide['modified']
78+
else:
79+
modified = published
80+
if "deprecated" in expanded_guide.keys():
81+
deprecated = expanded_guide['deprecated']
82+
else:
83+
deprecated = False
84+
if "deprecated_link" in expanded_guide.keys():
85+
deprecated_link = expanded_guide['deprecated_link']
86+
else:
87+
deprecated_link = ""
88+
89+
# Construct the URI for the guide
90+
uri = "/docs/guides/" + slug + "/"
91+
92+
# Construct the guide object
93+
guide = Guide(slug, root, file_path, title, uri, published, modified, deprecated, deprecated_link)
94+
95+
# Append the guide object to the list of guides
96+
guides.append(guide)
97+
except Exception as e: print(e)
98+
return guides
99+
100+
# ------------------
101+
# Generate a CSV for guides, aggregating data from other sources
102+
# ------------------
103+
def generate_csv(guides):
104+
105+
# An array that contains the keys (first row headers) for the CSV file to be generated
106+
guide_keys = ["Title", "Published", "Modified", "Slug", "Deprecated", "Deprecated Link", "Visitors", "Page Views", "Clicks", "Impressions", "CTR", "Position", "Action", "Redirect Link", "Notes"]
107+
108+
# Write to the CSV file used for aggregating all data
109+
with open(aggregate_file, 'w+') as file:
110+
writer = csv.writer(file)
111+
112+
# Write the first row of the CSV file (header row)
113+
writer.writerow(guide_keys)
114+
115+
# Loop through all guides.
116+
for guide in guides:
117+
visitors = 0
118+
page_views = 0
119+
clicks = ""
120+
impressions = ""
121+
ctr = ""
122+
position = ""
123+
124+
# Get data from analytics file
125+
for row in csv.reader(open(analytics_file, "r", encoding='utf-8'), delimiter=","):
126+
if len(row) < 3:
127+
continue
128+
if row[0] == "www.linode.com" + guide.uri:
129+
visitors = row[1]
130+
page_views = row[2]
131+
132+
# Get data from search file
133+
for row in csv.reader(open(search_file, "r", encoding='utf-8'), delimiter=","):
134+
if len(row) < 5:
135+
continue
136+
if row[0] == "https://www.linode.com" + guide.uri:
137+
clicks = row[1]
138+
impressions = row[2]
139+
ctr = row[3]
140+
position = row[4]
141+
142+
# Write a row to the CSV file contianing all information for this guide.
143+
writer.writerow(["=HYPERLINK(\"https://www.linode.com" + guide.uri + "\",\" " + guide.title + "\")", guide.published, guide.modified, guide.slug, guide.deprecated, guide.deprecated_link, visitors, page_views, clicks, impressions, ctr, position])
144+
145+
# ------------------
146+
# Process CSV file and perform the "Deprecate" action as needed
147+
# ------------------
148+
def process_csv(guides):
149+
150+
# Open modified aggregated data file and read in the data
151+
with open(process_file, newline='') as csvfile:
152+
reader = csv.DictReader(csvfile)
153+
154+
# Iterate through each row in the CSV file and determine if the action is "Deprecate"
155+
for row in reader:
156+
if row['Action'] == "Deprecate":
157+
158+
# If there is an action of "Deprecate, find the cooresponding guide in the guides array
159+
for guide in guides:
160+
if row["Slug"] == guide.slug and not guide.deprecated == True:
161+
162+
# Capture the intended redirect link (deprecated_link) from the CSV file
163+
redirect_link = row["Redirect Link"]
164+
165+
with open(guide.path, "r") as fp:
166+
lines = fp.readlines()
167+
with open(guide.path, "w") as fp:
168+
169+
frontmatter = False
170+
yaml_token = "---"
171+
yaml_token_counter = 0
172+
existing_deprecated_status = False
173+
existing_deprecated_link = False
174+
175+
# Iterate through each line in the file
176+
for line in lines:
177+
178+
# Determine if the front matter section has started or ended
179+
if line.startswith(yaml_token) and yaml_token_counter == 0:
180+
yaml_token_counter += 1
181+
frontmatter = True
182+
elif line.startswith(yaml_token) and yaml_token_counter == 1:
183+
yaml_token_counter += 1
184+
frontmatter = False
185+
elif line.startswith(yaml_token):
186+
yaml_token_counter += 1
187+
188+
# If there's an existing deprecated parameter, skip it (do not write it to the file). It will be added back in later.
189+
if frontmatter and line.startswith("deprecated:"):
190+
continue
191+
# If there's an existing deprecated_link parameter, skip it (do not write it to the file). It will be added back in later.
192+
elif frontmatter and line.startswith("deprecated_link:"):
193+
continue
194+
195+
# If the line is the last line of the front matter, write the deprecated parameter and the deprecated_link parameter
196+
if line.startswith(yaml_token) and yaml_token_counter == 2:
197+
fp.write("deprecated: true\n")
198+
if not redirect_link == "":
199+
fp.write("deprecated_link: \'" + redirect_link + "\'\n")
200+
fp.write("---\n")
201+
else:
202+
fp.write(line)
203+
204+
# ------------------
205+
# Main function
206+
# ------------------
207+
def main():
208+
209+
# Get all guides
210+
guides = get_guides()
211+
212+
# If --process has been passed, run the process_csv function. Otherwise, run the generate_csv function.
213+
if PROCESS_MODE == False:
214+
generate_csv(guides)
215+
else:
216+
process_csv(guides)
217+
218+
if __name__ == "__main__":
219+
main()

0 commit comments

Comments
 (0)