Custom algorithm template

You can use the following custom algorithm template to help get you started with adding a custom algorithm to MLTK.

BaseAlgo class

From base import BaseAlgo.



class CustomAlgoTemplate(BaseAlgo):
    def __init__(self, options):
        # Option checking & initializations here
        pass

    def fit(self, df, options):
        # Fit an estimator to df, a pandas DataFrame of the search results
        pass

    def partial_fit(self, df, options):
        # Incrementally fit a model
        pass

    def apply(self, df, options):
        # Apply a saved model
        # Modify df, a pandas DataFrame of the search results
        return df

    @staticmethod
    def register_codecs():
        # Add codecs to the codec manager
        pass

Using the Basealgo template in a search, reflects the input data back to the search as shown in the following example.

These are all described in detail in the $SPLUNK_HOME/etc/apps/Splunk_ML_Toolkit/bin/base.py BaseAlgo class as shown below.

Pygment.png

Example

The following is an example of a custom Correlation Matrix:

from base import BaseAlgo


class CorrelationMatrix(BaseAlgo):
    """Compute and return a correlation matrix."""

    def __init__(self, options):
        """Check for valid correlation type, and save it to an attribute on self."""

        feature_variables = options.get('feature_variables', {})
        target_variable = options.get('target_variable', {})

        if len(feature_variables) == 0:
            raise RuntimeError('You must supply one or more fields')

        if len(target_variable) > 0:
            raise RuntimeError('CorrelationMatrix does not support the from clause')

        valid_methods = ['spearman', 'kendall', 'pearson']

        # Check to see if parameters exist
        params = options.get('params', {})

        # Check if method is in parameters in search
        if 'method' in params:
            if params['method'] not in valid_methods:
                error_msg = 'Invalid value for method: must be one of {}'.format(
                    ', '.join(valid_methods))
                raise RuntimeError(error_msg)

            # Assign method to self for later usage
            self.method = params['method']

        # Assign default method & ensure no other parameters are present
        else:
            # Default method for correlation
            self.method = 'pearson'

            # Check for bad parameters
            if len(params) > 0:
                raise RuntimeError('The only valid parameter is method.')

    def fit(self, df, options):
        """Compute the correlations and return a DataFrame."""

        # df contains all the search results, including hidden fields
        # but the fields we requested are saved as self.feature_variables
        requested_columns = df[self.feature_variables]

        # Get correlations
        correlations = requested_columns.corr(method=self.method)

        # Reset index so that all the data are in columns
        # (this is usually not necessary, but is for the corr method)
        output_df = correlations.reset_index()

        return output_df