Source code for Taweret.mix.trees

# Name:
# trees.py
# Author: John Yannotty (yannotty.1@osu.edu)
# Start Date: 10/05/22
# Version: 1.0

import numpy as np
from openbtmixing import Openbtmix

from Taweret.core.base_mixer import BaseMixer



[docs]
class Trees(BaseMixer):
    r'''
        Constructor for the Trees mixing class,
        which implements a mean-mixing strategy.
        The weight functions are modeled using
        Bayesian Additive Regression Trees (BART).
        Please read the installation page of the
        documentation to ensure the BART-BMM Ubuntu
        package is downloaded and installed.

        .. math::
            f_\dagger(x) = \sum_{k = 1}^K w_k(x)\;f_k(x)

        Example:
        --------

        .. code-block:: python

            # Initialize trees class
            mix = Trees(model_dict = model_dict)

            # Set prior information
            mix.set_prior(k=2.5,ntree=30,overallnu=5,
            overallsd=0.01,inform_prior=False)

            # Train the model
            fit = mix.train(X=x_train, y=y_train, ndpost = 10000,
            nadapt = 2000, nskip = 2000, adaptevery = 500, minnumbot = 4)

            # Get predictions and posterior weight functions.
            ppost, pmean, pci, pstd = mix.predict(X = x_test, ci = 0.95)
            wpost, wmean, wci, wstd = mix.predict_weights(X=x_test,ci = 0.95)

    '''

    def __init__(self, model_dict: dict, **kwargs):
        '''

        Parameters:
        -----------
        :param dict model_dict:
            Dictionary of models where each item is an
            instance of BaseModel.

        :param dict kwargs:
            Additional arguments to pass to the constructor.

        Returns:
        ---------
        :returns: None.
        '''
# Store model dictionary if all models are instances of BaseModel
        self.model_dict = model_dict
        self.nummodels = len(model_dict)
        self.obt = Openbtmix(**kwargs)


[docs]
    def evaluate(self):
        '''
        Evaluate the mixed-model to get a point prediction.
        This method is not applicable to BART-based mixing.
        '''

        raise Exception("Not applicable for trees.")



[docs]
    def evaluate_weights(self):
        '''
        Evaluate the weight functions to get a point prediction.
        This method is not applicable to BART-based mixing.
        '''
        raise Exception("Not applicable for trees.")


    @property
    def map(self):
        '''
        Return the map values for parameters in the model.
        This method is not applicable to BART-based mixing.
        '''
        return super().map

    @property
    def posterior(self):
        '''
        Returns the posterior distribution of the error standard deviation,
        which is learned during the training process.

        Parameters:
        ------------
        :param: None.

        Returns:
        ---------
        :returns: The posterior of the error standard deviation .
        :rtype: np.ndarray

        '''
        return self._posterior

    @property
    def prior(self):
        '''
        Returns a dictionary of the hyperparameter settings used in the
        various prior distributions.

        Parameters:
        -----------
        :param: None.

        Returns:
        --------
        :returns: A dictionary of the hyperparameters used in the model.
        :rtype: dict

        '''
        return self.obt.get_prior()


[docs]
    def set_prior(
            self,
            ntree: int = 1,
            ntreeh: int = 1,
            k: float = 2,
            power: float = 2.0,
            base: float = 0.95,
            sighat: float = 1,
            nu: int = 10,
            inform_prior: bool = True):
        '''
        Sets the hyperparameters in the tree and terminal node priors. Also
        specifies if an informative or non-informative prior will be used
        when mixing EFTs.

        Parameters:
        -----------
        :param int ntree:
            The number of trees used in the sum-of-trees model for
            the weights.
        :param int ntreeh:
            The number of trees used in the product-of-trees model
            for the error standard deviation. Set to 1 for
            homoscedastic variance assumption.
        :param float k:
            The tuning parameter in the prior variance of the terminal node
            parameter prior. This is a value greater than zero.
        :param float power:
            The power parameter in the tree prior.
        :param float base:
            The base parameter in the tree prior.
        :param float overallsd:
            An initial estimate of the error standard deviation.
            This value is used to calibrate the scale parameter in
            variance prior.
        :param float overallnu:
            The shape parameter in the error variance prior.
        :param bool inform_prior:
            Controls if the informative or non-informative prior
            is used.
            Specify true for the informative prior.
        :param np.ndarray tauvec:
            A K-dimensional array (where K is the number of models)
            that contains the prior standard deviation of the terminal node
            parameter priors. This is used when specifying different
            priors for the different model weights.
        :param np.ndarray betavec:
            A K-dimensional array (where K is the number of models) that
            contains the prior mean of the terminal node
            parameter priors. This is used when specifying different
            priors for the different model weights.

        Returns:
        --------
        :returns: None.

        '''
        self.obt.set_prior(ntree, ntreeh, k, power, base,
                           sighat, nu, inform_prior)



[docs]
    def prior_predict(self):
        '''
        Return the prior predictive distribution of the mixed-model.
        This method is not applicable to BART-based mixing.
        '''
        raise Exception("Not applicable for trees at the moment.")



[docs]
    def train(self, X: np.ndarray, y: np.ndarray, **kwargs):
        '''
        Train the mixed-model using a set of observations y at inputs x.

        Parameters:
        -----------
        :param np.ndarray X: input parameter values of dimension (n x p).
        :param np.ndarray y: observed data at inputs X of dimension  (n x 1).
        :param dict kwargs: dictionary of arguments

        Returns:
        --------
        :returns: A dictionary which contains relevant information to the
            model such as values of tuning parameters.  The MCMC results are
            written to a text file and stored in a temporary directory as
            defined by the fpath key in the results dictionary.
        :rtype: dict
        '''
        # Cast data to arrays if not already and reshape if needed
        if isinstance(X, list):
            X = np.array(X)

        if len(X.shape) == 1:
            X = X.reshape(X.shape[0], 1)
        # Get predictions from the model set at X's
        fhat_list = []
        shat_list = []
        for m in list(self.model_dict.values()):
            # Get predictions from selected model
            fhat_col, shat_col = m.evaluate(X)

            # Append predictions to respective lists
            fhat_list.append(fhat_col)
            shat_list.append(shat_col)

        # Construct two matrices using concatenate
        f_matrix = np.concatenate(fhat_list, axis=1)
        s_matrix = np.concatenate(shat_list, axis=1)

        # Run the train command in openbtmixing
        res = self.obt.train(x_train=X, y_train=y, f_train=f_matrix,
                             s_train=s_matrix, **kwargs)

        # Get predictions at training points -- more importanlty,
        # get the posterior of sigma
        # ci level doesn't matter here, all we want is the posterior
        # using tc*2 just to get a small subset of data that
        # won't break the array structures when reading in results
        res_sig = self.obt.predict(X[0:(self.obt.tc*2),],
                                   f_matrix[0:(self.obt.tc*2),], ci=0.68)
        self._posterior = res_sig["sigma"]["draws"][:, 0]

        return res



[docs]
    def predict(self, X: np.ndarray, ci: float = 0.95):
        '''
        Obtain the posterior predictive distribution of the mixed-model
        at a set of inputs X.

        Parameters:
        -----------
        :param np.ndarray X: design matrix of testing inputs.
        :param float ci: credible interval width, must be a value
                within the interval (0,1).

        Returns:
        --------
        :returns: The posterior prediction draws and summaries.
        :rtype: np.ndarray, np.ndarray, np.ndarray, np.ndarray
        :return value: the posterior predictive distribution
                evaluated at the specified test points
        :return value: the posterior mean of the mixed-model
                at each input in X.
        :return value: the pointwise credible intervals at each input in X.
        :return value: the posterior standard deviation of the
                mixed-model at each input in X.
        '''

        # Set q_lower and q_upper
        alpha = (1 - ci)
        q_lower = alpha / 2
        q_upper = 1 - alpha / 2

        # Casting lists to arrays when needed
        if (isinstance(X, list)):
            X = np.array(X)
        if (len(X.shape) == 1):  # If shape is (n, ), change it to (n, 1):
            X = X.reshape(len(X), 1)

        # Get predictions from the model set at X's
        fhat_list = []
        shat_list = []
        for m in list(self.model_dict.values()):
            # Get predictions from selected model
            fhat_col, shat_col = m.evaluate(X)

            # Append predictions to respective lists
            fhat_list.append(fhat_col)
            shat_list.append(shat_col)

        # Construct F matrix using concatenate
        f_test = np.concatenate(fhat_list, axis=1)
        # Set control values
        self.p_test = X.shape[1]
        self.n_test = X.shape[0]

        self.q_lower = q_lower
        self.q_upper = q_upper

        # predict via openbtmixing grid
        res = self.obt.predict(X, f_test, ci)

        posterior = res["pred"]["draws"]
        post_mean = res["pred"]["mean"]
        post_sd = res["pred"]["sd"]
        post_credible_interval = [res["pred"]["lb"], res["pred"]["ub"]]

        return posterior, post_mean, post_credible_interval, post_sd



[docs]
    def predict_weights(self, X: np.ndarray, ci: float = 0.95):
        '''
        Obtain posterior distribution of the weight functions at a set
        of inputs X.

        Parameters:
        -----------
        :param np.ndarray X: design matrix of testing inputs.
        :param float ci: credible interval width, must be a value
                within the interval (0,1).

        Returns:
        --------
        :returns: The posterior weight function draws and summaries.
        :rtype: np.ndarray, np.ndarray, np.ndarray, np.ndarray
        :return value: the posterior draws of the weight functions
                at each input in X.
        :return value: posterior mean of the weight functions at
                each input in X.
        :return value: pointwise credible intervals for
                the weight functions.
        :return value: posterior standard deviation of
                the weight functions at each input in X.
        '''

        # Set q_lower and q_upper
        alpha = (1 - ci)
        q_lower = alpha / 2
        q_upper = 1 - alpha / 2
        self.q_lower = q_lower
        self.q_upper = q_upper

        # predict via openbtmixing grid
        res = self.obt.predict_weights(X, ci)

        posterior = res["wts"]["draws"]
        post_mean = res["wts"]["mean"]
        post_sd = res["wts"]["sd"]
        post_credible_interval = [res["wts"]["lb"], res["wts"]["ub"]]

        return posterior, post_mean, post_credible_interval, post_sd