Source code for causeinfer.standard_algorithms.two_model

"""
Two Model
---------

The Two Model Approach (Double Model, Separate Model).

Based on
    Kuchumov, A. pyuplift: Lightweight uplift modeling framework for Python. (2019).
    URL: https://github.com/duketemon/pyuplift.
    License: https://github.com/duketemon/pyuplift/blob/master/LICENSE.

    Hansotia, B. and B. Rukstales (2002). “Incremental value modeling”.
    In: Journal of Interactive Marketing 16(3), pp. 35–46.
    URL: https://search.proquest.com/openview/1f86b52432f7d80e46101b2b4b7629c0/1?cbl=32002&  pq-origsite=gscholar

    Devriendt, F. et al. (2018). A Literature Survey and Experimental Evaluation of the   State-of-the-Art in Uplift
    Modeling: A Stepping Stone Toward the Development of Prescriptive Analytics. Big Data, Vol. 6, No. 1,   March 1,
    2018, pp. 1-29. Codes found at: data-lab.be/downloads.php.

Contents
    TwoModel Class
        fit,
        predict,
        predict_proba
"""

import numpy as np
from causeinfer.standard_algorithms.base_models import BaseModel


[docs]class TwoModel(BaseModel):
    def __init__(self, control_model=None, treatment_model=None):
        """
        Checks the attributes of the control and treatment models before assignment.
        """
        try:
            control_model.__getattribute__("fit")
            control_model.__getattribute__("predict")
        except AttributeError:
            raise AttributeError(
                "The passed control model should contain both fit and predict methods."
            )

        try:
            treatment_model.__getattribute__("fit")
            treatment_model.__getattribute__("predict")
        except AttributeError:
            raise AttributeError(
                "The passed treatment model should contain both fit and predict methods."
            )

        self.control_model = control_model
        self.treatment_model = treatment_model

[docs]    def fit(self, X, y, w):
        """
        Trains a model given covariates, responses and assignments.

        Parameters
        ----------
            X : numpy.ndarray : (num_units, num_features) : int, float
                Matrix of covariates.

            y : numpy.ndarray : (num_units,) : int, float
                Vector of unit responses.

            w : numpy.ndarray : (num_units,) : int, float
                Vector of original treatment allocations across units.

        Returns
        -------
            treatment_model, control_model : causeinfer.standard_algorithms.TwoModel
                Two trained models (one for training group, one for control).
        """
        # Split data into treatment and control subsets.
        X_treatment, y_treatment = [], []
        X_control, y_control = [], []

        for i, e in enumerate(w):
            if e:
                X_treatment.append(X[i])
                y_treatment.append(y[i])
            else:
                X_control.append(X[i])
                y_control.append(y[i])

        # Fit two separate models.
        self.treatment_model.fit(X_treatment, y_treatment)
        self.control_model.fit(X_control, y_control)

        return self

[docs]    def predict(self, X):
        """
        Predicts a causal effect given covariates.

        Parameters
        ----------
            X : numpy.ndarray : (num_units, num_features) : int, float
                New data on which to make predictions.

        Returns
        -------
            predictions : numpy.ndarray : (num_units, 2) : float
                Predicted causal effects for all units given treatment model and control.
        """
        pred_treatment = self.treatment_model.predict(X)
        pred_control = self.control_model.predict(X)

        # Select the separate predictions for each model.
        return np.array([(pred_treatment[i], pred_control[i]) for i in range(len(X))])

[docs]    def predict_proba(self, X):
        """
        Predicts the probability that a subject will be a given class given covariates.

        Parameters
        ----------
            X : numpy.ndarray : (num_units, num_features) : int, float
                New data on which to make predictions.

        Returns
        -------
            probas : numpy.ndarray : (num_units, 2) : float
                Predicted probability to respond for all units given treatment and control models.
        """
        pred_treatment = self.treatment_model.predict_proba(X)
        pred_control = self.control_model.predict_proba(X)

        # For each model, select the probability to respond given the treatment class.
        return np.array(
            [(pred_treatment[i][0], pred_control[i][0]) for i in range(len(X))]
        )