Source code for artan.mixture.bernoulli_mixture

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at

#  http://www.apache.org/licenses/LICENSE-2.0

#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

from artan.state import StatefulTransformer
from artan.mixture.mixture_params import MixtureParams
from pyspark.ml.param import Params, Param, TypeConverters


class _HasInitialProbabilities(Params):
    """
    Mixin for initial probabilities parameter.
    """

    initialProbabilities = Param(
        Params._dummy(),
        "initialProbabilities", "Initial Probabilities vectors of mixtures, as a list of floats")

    def __init__(self):
        super(_HasInitialProbabilities, self).__init__()

    def getInitialProbabilities(self):
        """
        Gets the value of initial probabilities or its default value.
        """
        return self.getOrDefault(self.initialProbabilities)


class _HasInitialProbabilitiesCol(Params):
    """
    Mixin for initial Probabilities parameter.
    """

    initialProbabilitiesCol = Param(
        Params._dummy(),
        "initialProbabilitiesCol",
        "Initial probabilities vectors of mixtures from dataframe column",
        TypeConverters.toString)

    def __init__(self):
        super(_HasInitialProbabilitiesCol, self).__init__()

    def getInitialProbabilitiesCol(self):
        """
        Gets the value of initial Probabilities or its default value.
        """
        return self.getOrDefault(self.initialProbabilitiesCol)


class _HasBernoulliMixtureModelCol(Params):
    """
    Mixin for Bernoulli mixture model parameter.
    """

    bernoulliMixtureModelCol = Param(
        Params._dummy(),
        "bernoulliMixtureModelCol",
        "Initial mixture model from dataframe column", TypeConverters.toString)

    def __init__(self):
        super(_HasBernoulliMixtureModelCol, self).__init__()

    def getBernoulliMixtureModelCol(self):
        """
        Gets the value of cmm col or its default value.
        """
        return self.getOrDefault(self.bernoulliMixtureModelCol)


[docs]class BernoulliMixture(StatefulTransformer, MixtureParams, _HasInitialProbabilities, _HasInitialProbabilitiesCol, _HasBernoulliMixtureModelCol): """ Online bernoulli mixture estimator with a stateful transformer, based on Cappe (2011) Online Expectation-Maximisation paper. Outputs an estimate for each input sample in a single pass, by replacing the E-step in EM with a recursively averaged stochastic E-step. """ def __init__(self, mixtureCount): super(BernoulliMixture, self).__init__() self._java_obj = self._new_java_obj( "com.github.ozancicek.artan.ml.mixture.BernoulliMixture", mixtureCount, self.uid)
[docs] def setInitialProbabilities(self, value): """ Sets the initial bernoulli probabilities of the mixtures. The length of the array should be equal to mixture count, each element in the array should be between 0 and 1. Default is equally spaced probabilities between 0 and 1 :param value: List[Float] :return: BernoulliMixture """ return self._set(initialProbabilities=value)
[docs] def setInitialProbabilitiesCol(self, value): """ Sets the initial probabilities from dataframe column to set different probabilities across different models. Overrides the parameter set by setInitialProbabilities. :param value: String :return: BernoulliMixture """ return self._set(initialProbabilitiesCol=value)