Police-Data-Accessibility-Project · CaptainStabs · Dec 7, 2021 · Oct 25, 2021
@@ -19,13 +19,13 @@
 
 # this function is used for gathering time stats
 def function_timer(stats):
-    if stats != False:
+    if stats:
         return time.perf_counter()
 
 
 # this function simply calculates and prints the difference between the end and start times
 def time_dif(stats, string, start, end):
-    if stats != False:
+    if stats:
         print(f"{string}: {end - start} seconds")
 
 

@@ -22,13 +22,13 @@
 
 # this function is used for gathering time stats
 def function_timer(stats):
-    if stats != False:
+    if stats:
         return time.perf_counter()
 
 
 # this function simply calculates and prints the difference between the end and start times
 def time_dif(stats, string, start, end):
-    if stats != False:
+    if stats:
         print(f"{string}: {end - start} seconds")
 
 

@@ -20,13 +20,13 @@
 
 # this function is used for gathering time stats
 def function_timer(stats):
-    if stats != False:
+    if stats:
         return time.perf_counter()
 
 
 # this function simply calculates and prints the difference between the end and start times
 def time_dif(stats, string, start, end):
-    if stats != False:
+    if stats:
         print(f"{string}: {end - start} seconds")
 
 

@@ -32,7 +32,7 @@ def file_compare(save_dir, file_1, file_2, try_overwite=False, no_overwrite=Fals
     else:
         # I tried to just put the code to write it here, but it would've required too many arguments
         print("File has changed")
-        if try_overwite == True:
+        if try_overwite:
             os.remove(file_1)
             # Renames the new file to the old_file's name (without the new_)
             os.rename(file_2, file_1)
@@ -64,18 +64,31 @@ def check_if_exists(save_dir, file_name, add_date):
     else:
         return False
 
-
+# These can likely get merged into a single function
 def get_pdf(
     save_dir, file_name, url_2, sleep_time, debug=False, try_overwite=False, no_overwrite=False, add_date=False,
 ):
+    """
+    Download PDFs
+    :param save_dir: path where files should be saved, string
+    :param file_name: name of file,  string
+    :param name_in_url: url of file, string
+    :param extract_name: time to sleep between requests, integer
+    :param debug: more verbose printing, should be replaced with logging module, bool
+    :param try_overwite: mostly deprecated. ask before using
+    :param no_overwrite: replaces try_overwrite. Use with add_date for best results. Prevent overwriting of data files. (default false)
+    :param add_date: adds the date scraped to the filename, bool
+    """
     file_name = file_name.lstrip("/")
     print(file_name)
 
-    if add_date is True:
+    if add_date:
         print(" [?] add_date is True")
+
         if not os.path.isfile("last_run.txt"):
             print(" [!] last_run.txt did not exist... Is this your first time running?")
             print("    [*] Creating last_run.txt and adding data...")
+
             with open("last_run.txt", "w") as last_run:
                 date_name = str(date.today()).replace("-", "_")
                 print(date_name)
@@ -87,11 +100,12 @@ def get_pdf(
 
     # Default run mode, simply checks that the file does not already exists.
     # Don't need to check if
-    if os.path.exists(save_dir + file_name) == False and check_if_exists(save_dir, file_name, add_date=add_date) == False:
+    if not os.path.exists(save_dir + file_name) and check_if_exists(save_dir, file_name, add_date=add_date) is False:
         print(" [*] File does not exist")
         try:
             print(" [*] Requesting file....")
             pdf = urllib.request.urlopen(url_2.replace(" ", "%20"))
+
         except urllib.error.HTTPError as exception:
             print(f"   [!] {exception}")
             print("   [!] URL: " + str(url_2))
@@ -100,7 +114,7 @@ def get_pdf(
                 traceback.print_exc()
             sys.exit()
 
-        if add_date == True:
+        if add_date:
             print(" [?] add_date is True")
             date_name = date.today()
             file_name = file_name.strip(".pdf") + "_" + str(date_name).replace("-", "_") + ".pdf"
@@ -116,9 +130,9 @@ def get_pdf(
 
         # If the file exists, and no_overwrite is true, then:
     elif (
-        os.path.exists(save_dir + file_name) == True
-        and check_if_exists(save_dir, file_name, add_date=add_date) == False
-        and no_overwrite == True
+        os.path.exists(save_dir + file_name) is True
+        and check_if_exists(save_dir, file_name, add_date=add_date) is False
+        and no_overwrite is True
     ):
         # Tries to get the file and set it to pdf
         try:
@@ -131,7 +145,7 @@ def get_pdf(
             if debug:
                 traceback.print_exc()
             sys.exit()
-        print("Comparing")
+        print("   [*] Comparing")
 
         # Saves the pdf while prepending with "new_"
         print(" [*] Saving as new_" + file_name)
@@ -141,7 +155,8 @@ def get_pdf(
         new_filename = "new_" + file_name
 
         print(" [*] Comparing...")
-        if file_compare(save_dir, file_name, new_filename, no_overwrite=True) == False:
+
+        if not file_compare(save_dir, file_name, new_filename, no_overwrite=True):
             print("    [?] Files are different")
             date_name = date.today()
             # print(date_name)
@@ -152,7 +167,7 @@ def get_pdf(
                 file.write(pdf.read())
             file.close()
     # Checks if the files exists, and that `try_overwite` is True
-    elif os.path.exists(save_dir + file_name) == True and try_overwite == True:
+    elif os.path.exists(save_dir + file_name) is True and try_overwite is True:
         print(" [!!!] try_overwite is set to True, verify that you want this before continuing")
         # Tries to get the file and set it to pdf
         try:
@@ -166,7 +181,7 @@ def get_pdf(
             sys.exit()
         print("Comparing")
 
-        if add_date == True:
+        if add_date:
             date_name = date.today()
             file_name = file_name.strip(".pdf") + "_" + str(date_name).replace("-", "_") + ".pdf"
             print(" [*] Date appended name: " + file_name)
@@ -186,9 +201,9 @@ def get_xls(save_dir, file_name, url_2, sleep_time, debug=False):
     if ".xls" not in file_name:
         # Allows saving as xls even if it's not in the file_name (saves in proper format)
         file_name = file_name + ".xls"
-    if os.path.exists(save_dir + file_name) == False:
+    if not os.path.exists(save_dir + file_name):
         try:
-            print(" [*] Requesting file...")
+            print("   [*] Requesting file...")
             pdf = urllib.request.urlopen(url_2.replace(" ", "%20"))
         except urllib.error.HTTPError as exception:
             print(f"    [!] {exception} ")
@@ -197,18 +212,22 @@ def get_xls(save_dir, file_name, url_2, sleep_time, debug=False):
             if debug:
                 traceback.print_exc()
             exit()
+
         with open(save_dir + file_name, "wb") as file:
             file.write(pdf.read())
+
         file.close()
         time.sleep(sleep_time)
-        print("Sleep")
+        print("   [*] Sleeping for: " + str(sleep_time))
 
 
 def get_doc(save_dir, file_name, url_2, sleep_time):
-    if os.path.exists(save_dir + file_name) == False:
+    if not os.path.exists(save_dir + file_name):
         document = requests.get(url_2.replace(" ", "%20", allow_redirects=True))
+
         with open(file_name, "w") as data_file:
             data_file.write(document.text)  # Writes using requests text 	function thing
+
         data_file.close()
         time.sleep(sleep_time)
-        print("Sleep")
+        print("   [*] Sleeping for: " + str(sleep_time))
@@ -36,9 +36,11 @@ def extract_info(soup, configs, extract_name=False, name_in_url=True, configs_fi
 
         url = str(link["href"])
         print(url)
-        if extract_name == False:
+
+        if not extract_name:
             # print(" [?] extract_name is False")
             name = url[url.rindex("/") :]
+
         else:
             name = link.string
             # print(" [?] extract_name is True")

@@ -161,5 +161,5 @@ def get_files(
     input_file.close()
 
     # Used for debugging
-    if delete is not False:
+    if delete:
         os.remove("url_name.txt")