Skip to content

Оценщики

Этот модуль содержит оценщики. По сути, это обертка над сетями bamt с дополнительными ограничениями. Пожалуйста, используйте этот модуль, только если вы являетесь разработчиком, так как он низкоуровневый. В противном случае используйте предназначенные для этого интерфейсы (например, конвейеры, объяснители и т.д.).

Подробнее читайте в Руководстве пользователя.

BNEstimator

Bases: BaseEstimator

A Bayesian Network Estimator class that extends scikit-learn's BaseEstimator.

Source code in applybn/core/estimators/base_estimator.py
class BNEstimator(BaseEstimator):
    """
    A Bayesian Network Estimator class that extends scikit-learn's BaseEstimator.
    """

    _parameter_constraints = {
        "has_logit": [bool],
        "use_mixture": [bool],
        "bn_type": [str, None],
        "partial": [Options(object, {False, "parameters", "structure"})],
        "learning_params": [None, dict],
    }

    def __init__(
        self,
        has_logit: bool = False,
        use_mixture: bool = False,
        partial: False | Literal["parameters", "structure"] = False,
        bn_type: Literal["hybrid", "disc", "cont"] | None = None,
        learning_params: Unpack[ParamDict] | None = None,
    ):
        """
        Initializes the BNEstimator with the given parameters.

        Args:
            has_logit: Indicates if logit transformation is used.
            use_mixture: Indicates if mixture model is used.
            partial: Indicates if partial fitting is used.
            bn_type: Type of Bayesian Network.
            learning_params: Parameters for learning.
        """
        self.has_logit = has_logit
        self.use_mixture = use_mixture
        self.bn_type = bn_type
        self.partial = partial
        self.learning_params = {} if learning_params is None else learning_params

    def _is_fitted(self):
        """
        Checks whether the estimator is fitted or not by checking "bn_" key if __dict__.
        This has to be done because check_is_fitted(self) does not imply correct and goes into recursion because of
        delegating strategy in getattr method.
        """
        return True if "bn_" in self.__dict__ else False

    def __getattr__(self, attr: str):
        """If attribute is not found in the pipeline, look in the last step of the pipeline."""
        try:
            return object.__getattribute__(self, attr)
        except AttributeError:
            if self._is_fitted():
                return getattr(self.bn_, attr)
            else:
                raise NotFittedError("BN Estimator has not been fitted.")

    @staticmethod
    def detect_bn(data: pd.DataFrame) -> Literal["hybrid", "disc", "cont"]:
        """
        Detects the type of Bayesian Network based on the data.
        Bamt typing is used.

        Args:
            data (pd.DataFrame): The input data to analyze.

        Returns:
            bn_type: The detected type of Bayesian Network.

        Raises:
            None: an error translates into bamt logger.
                Possible errors:
                    "Unsupported data type. Dtype: {dtypes}"
        """

        node_types = nodes_types(data)

        if len(node_types.keys()) != len(data.columns):
            diff = set(data.columns) - set(node_types.keys())
            raise NodesAutoTypingError(diff)

        nodes_types_unique = set(node_types.values())

        net_types2unqiue = {
            "hybrid": [
                {"cont", "disc", "disc_num"},
                {"cont", "disc_num"},
                {"cont", "disc"},
            ],
            "disc": [{"disc"}, {"disc_num"}, {"disc", "disc_num"}],
            "cont": [{"cont"}],
        }
        find_matching_key = (
            {frozenset(s): k for k, v in net_types2unqiue.items() for s in v}
        ).get
        return find_matching_key(frozenset(nodes_types_unique))

    def init_bn(
        self, bn_type: Literal["hybrid", "disc", "cont"]
    ) -> HybridBN | DiscreteBN | ContinuousBN:
        """
        Initializes the Bayesian Network based on the type.

        Args:
            bn_type: The type of Bayesian Network to initialize.

        Returns:
            An instance of the corresponding Bayesian Network class.

        Raises:
            TypeError: Invalid bn_type.
        """
        str2net = {"hybrid": HybridBN, "disc": DiscreteBN, "cont": ContinuousBN}

        params = dict()
        match bn_type:
            case "hybrid":
                params = dict(use_mixture=self.use_mixture, has_logit=self.has_logit)
            case "cont":
                params = dict(use_mixture=self.use_mixture)
            case "disc":
                ...
            case _:
                raise TypeError(f"Invalid bn_type, obtained bn_type: {bn_type}")
        return str2net[bn_type](**params)

    @_fit_context(prefer_skip_nested_validation=True)
    def fit(self, X, y=None):
        """
        Fits the Bayesian Network to the data.

        Args:
            X (tuple): a tuple with (X, descriptor, clean_data).
                If partial is "structure", clean_data can be None (not used).
            y (None): not used.

        Returns:
            self (BNEstimator): The fitted estimator.
        """

        # this has to be done because scikit learn unpacking problem
        # inside pipeline there is unpacking.
        X, descriptor, clean_data = X
        if not self.partial == "parameters":
            if not self.bn_type in ["hybrid", "disc", "cont"]:
                bn_type_ = self.detect_bn(clean_data)
            else:
                bn_type_ = self.bn_type

            bn = self.init_bn(bn_type_)

            self.bn_ = bn
            self.bn_type = bn_type_

        match self.partial:
            case "parameters":
                if not self.bn_.edges:
                    raise NotFittedError(
                        "Trying to learn parameters on unfitted estimator. Call fit method first."
                    )
                self.bn_.fit_parameters(clean_data)
            case "structure":
                self.bn_.add_nodes(descriptor)
                self.bn_.add_edges(X, progress_bar=False, **self.learning_params)
            case False:
                self.bn_.add_nodes(descriptor)
                self.bn_.add_edges(X, progress_bar=False, **self.learning_params)
                self.bn_.fit_parameters(clean_data)

        return self

__getattr__(attr)

If attribute is not found in the pipeline, look in the last step of the pipeline.

Source code in applybn/core/estimators/base_estimator.py
def __getattr__(self, attr: str):
    """If attribute is not found in the pipeline, look in the last step of the pipeline."""
    try:
        return object.__getattribute__(self, attr)
    except AttributeError:
        if self._is_fitted():
            return getattr(self.bn_, attr)
        else:
            raise NotFittedError("BN Estimator has not been fitted.")

__init__(has_logit=False, use_mixture=False, partial=False, bn_type=None, learning_params=None)

Initializes the BNEstimator with the given parameters.

Parameters:

Name Type Description Default
has_logit bool

Indicates if logit transformation is used.

False
use_mixture bool

Indicates if mixture model is used.

False
partial False | Literal['parameters', 'structure']

Indicates if partial fitting is used.

False
bn_type Literal['hybrid', 'disc', 'cont'] | None

Type of Bayesian Network.

None
learning_params Unpack[ParamDict] | None

Parameters for learning.

None
Source code in applybn/core/estimators/base_estimator.py
def __init__(
    self,
    has_logit: bool = False,
    use_mixture: bool = False,
    partial: False | Literal["parameters", "structure"] = False,
    bn_type: Literal["hybrid", "disc", "cont"] | None = None,
    learning_params: Unpack[ParamDict] | None = None,
):
    """
    Initializes the BNEstimator with the given parameters.

    Args:
        has_logit: Indicates if logit transformation is used.
        use_mixture: Indicates if mixture model is used.
        partial: Indicates if partial fitting is used.
        bn_type: Type of Bayesian Network.
        learning_params: Parameters for learning.
    """
    self.has_logit = has_logit
    self.use_mixture = use_mixture
    self.bn_type = bn_type
    self.partial = partial
    self.learning_params = {} if learning_params is None else learning_params

detect_bn(data) staticmethod

Detects the type of Bayesian Network based on the data. Bamt typing is used.

Parameters:

Name Type Description Default
data DataFrame

The input data to analyze.

required

Returns:

Name Type Description
bn_type Literal['hybrid', 'disc', 'cont']

The detected type of Bayesian Network.

Raises:

Type Description
None

an error translates into bamt logger. Possible errors: "Unsupported data type. Dtype: {dtypes}"

Source code in applybn/core/estimators/base_estimator.py
@staticmethod
def detect_bn(data: pd.DataFrame) -> Literal["hybrid", "disc", "cont"]:
    """
    Detects the type of Bayesian Network based on the data.
    Bamt typing is used.

    Args:
        data (pd.DataFrame): The input data to analyze.

    Returns:
        bn_type: The detected type of Bayesian Network.

    Raises:
        None: an error translates into bamt logger.
            Possible errors:
                "Unsupported data type. Dtype: {dtypes}"
    """

    node_types = nodes_types(data)

    if len(node_types.keys()) != len(data.columns):
        diff = set(data.columns) - set(node_types.keys())
        raise NodesAutoTypingError(diff)

    nodes_types_unique = set(node_types.values())

    net_types2unqiue = {
        "hybrid": [
            {"cont", "disc", "disc_num"},
            {"cont", "disc_num"},
            {"cont", "disc"},
        ],
        "disc": [{"disc"}, {"disc_num"}, {"disc", "disc_num"}],
        "cont": [{"cont"}],
    }
    find_matching_key = (
        {frozenset(s): k for k, v in net_types2unqiue.items() for s in v}
    ).get
    return find_matching_key(frozenset(nodes_types_unique))

fit(X, y=None)

Fits the Bayesian Network to the data.

Parameters:

Name Type Description Default
X tuple

a tuple with (X, descriptor, clean_data). If partial is "structure", clean_data can be None (not used).

required
y None

not used.

None

Returns:

Name Type Description
self BNEstimator

The fitted estimator.

Source code in applybn/core/estimators/base_estimator.py
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y=None):
    """
    Fits the Bayesian Network to the data.

    Args:
        X (tuple): a tuple with (X, descriptor, clean_data).
            If partial is "structure", clean_data can be None (not used).
        y (None): not used.

    Returns:
        self (BNEstimator): The fitted estimator.
    """

    # this has to be done because scikit learn unpacking problem
    # inside pipeline there is unpacking.
    X, descriptor, clean_data = X
    if not self.partial == "parameters":
        if not self.bn_type in ["hybrid", "disc", "cont"]:
            bn_type_ = self.detect_bn(clean_data)
        else:
            bn_type_ = self.bn_type

        bn = self.init_bn(bn_type_)

        self.bn_ = bn
        self.bn_type = bn_type_

    match self.partial:
        case "parameters":
            if not self.bn_.edges:
                raise NotFittedError(
                    "Trying to learn parameters on unfitted estimator. Call fit method first."
                )
            self.bn_.fit_parameters(clean_data)
        case "structure":
            self.bn_.add_nodes(descriptor)
            self.bn_.add_edges(X, progress_bar=False, **self.learning_params)
        case False:
            self.bn_.add_nodes(descriptor)
            self.bn_.add_edges(X, progress_bar=False, **self.learning_params)
            self.bn_.fit_parameters(clean_data)

    return self

init_bn(bn_type)

Initializes the Bayesian Network based on the type.

Parameters:

Name Type Description Default
bn_type Literal['hybrid', 'disc', 'cont']

The type of Bayesian Network to initialize.

required

Returns:

Type Description
HybridBN | DiscreteBN | ContinuousBN

An instance of the corresponding Bayesian Network class.

Raises:

Type Description
TypeError

Invalid bn_type.

Source code in applybn/core/estimators/base_estimator.py
def init_bn(
    self, bn_type: Literal["hybrid", "disc", "cont"]
) -> HybridBN | DiscreteBN | ContinuousBN:
    """
    Initializes the Bayesian Network based on the type.

    Args:
        bn_type: The type of Bayesian Network to initialize.

    Returns:
        An instance of the corresponding Bayesian Network class.

    Raises:
        TypeError: Invalid bn_type.
    """
    str2net = {"hybrid": HybridBN, "disc": DiscreteBN, "cont": ContinuousBN}

    params = dict()
    match bn_type:
        case "hybrid":
            params = dict(use_mixture=self.use_mixture, has_logit=self.has_logit)
        case "cont":
            params = dict(use_mixture=self.use_mixture)
        case "disc":
            ...
        case _:
            raise TypeError(f"Invalid bn_type, obtained bn_type: {bn_type}")
    return str2net[bn_type](**params)