Source code for causeinfer.data.download_utils

"""
Download Utilities
------------------

Utility functions for downloading data.

Based on
    Kuchumov, A. pyuplift: Lightweight uplift modeling framework for Python. (2019).
    URL: https://github.com/duketemon/pyuplift.
    License: https://github.com/duketemon/pyuplift/blob/master/LICENSE.

Contents
    download_file,
    get_download_paths
"""

import os
import urllib
import zipfile

import requests


[docs]def download_file(url: str, output_path: str, zip_file=False): """ Downloads a file from a url to a specified path. Parameters ---------- url : str the URL from which the file can be downloaded from. output_path : str a user specified path, which defaults to a 'files' folder in the cwd. """ print("Attempting to download file to '{}'...".format(output_path)) # Set header for requests.get(), which is required for some websites. headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" } res = requests.get(url, headers=headers) # Check if the response is ok (200). status_code = int(res.status_code) if status_code == 200: if zip_file == True: file = urllib.request.urlretrieve(url, output_path) with zipfile.ZipFile(output_path, "r") as zip_ref: print("Unzipping '{}'...".format(output_path)) zip_ref.extractall(output_path.split(".zip")[0]) os.remove(output_path) print("File unzipped - deleting .zip file") print("Download complete") else: with open(output_path, "wb") as file: # A chunk of 128 bytes. for chunk in res: file.write(chunk) print("Download complete") elif status_code == 404: raise Exception("Wrong URL: " + url) elif status_code == 403: raise Exception("Forbidden URL: " + url)
[docs]def get_download_paths(file_path, file_directory="files", file_name="file"): """ Derives paths for a file folder and a file. Parameters ---------- path : str A user specified path that the data should go to file_directory : str (default=files) A user specified directory. file_name : str (default=file) The name to call the file. """ if file_path is None: directory_path = os.path.join(os.getcwd() + "/" + file_directory) file_path = os.path.join(directory_path + "/" + file_name) else: directory_path = file_path.split("/")[0] file_path = file_path return directory_path, file_path