Custom algorithm template
You can use the following custom algorithm template to help get you started with adding a custom algorithm to MLTK.
BaseAlgo class
From base import BaseAlgo
.
class CustomAlgoTemplate(BaseAlgo):
def __init__(self, options):
# Option checking & initializations here
pass
def fit(self, df, options):
# Fit an estimator to df, a pandas DataFrame of the search results
pass
def partial_fit(self, df, options):
# Incrementally fit a model
pass
def apply(self, df, options):
# Apply a saved model
# Modify df, a pandas DataFrame of the search results
return df
@staticmethod
def register_codecs():
# Add codecs to the codec manager
pass
Using the Basealgo template in a search, reflects the input data back to the search as shown in the following example.
| fit CustomAlgoTemplate *
These are all described in detail in the $SPLUNK_HOME/etc/apps/Splunk_ML_Toolkit/bin/base.py
BaseAlgo class as shown below.
Example
The following is an example of a custom Correlation Matrix:
from base import BaseAlgo
class CorrelationMatrix(BaseAlgo):
"""Compute and return a correlation matrix."""
def __init__(self, options):
"""Check for valid correlation type, and save it to an attribute on self."""
feature_variables = options.get('feature_variables', {})
target_variable = options.get('target_variable', {})
if len(feature_variables) == 0:
raise RuntimeError('You must supply one or more fields')
if len(target_variable) > 0:
raise RuntimeError('CorrelationMatrix does not support the from clause')
valid_methods = ['spearman', 'kendall', 'pearson']
# Check to see if parameters exist
params = options.get('params', {})
# Check if method is in parameters in search
if 'method' in params:
if params['method'] not in valid_methods:
error_msg = 'Invalid value for method: must be one of {}'.format(
', '.join(valid_methods))
raise RuntimeError(error_msg)
# Assign method to self for later usage
self.method = params['method']
# Assign default method & ensure no other parameters are present
else:
# Default method for correlation
self.method = 'pearson'
# Check for bad parameters
if len(params) > 0:
raise RuntimeError('The only valid parameter is method.')
def fit(self, df, options):
"""Compute the correlations and return a DataFrame."""
# df contains all the search results, including hidden fields
# but the fields we requested are saved as self.feature_variables
requested_columns = df[self.feature_variables]
# Get correlations
correlations = requested_columns.corr(method=self.method)
# Reset index so that all the data are in columns
# (this is usually not necessary, but is for the corr method)
output_df = correlations.reset_index()
return output_df