Scikit-learn Pipeline and GridSearchCV with the OPU

Since lightonml API is scikit-learn compliant, you can use lightonml transforms in a Pipeline and for example run a grid search on parameters using GridSearchCV.

import warnings
warnings.filterwarnings('ignore')
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

from sklearn.model_selection import train_test_split
from sklearn.linear_model import RidgeClassifier
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from lightonml.datasets import MNIST
random_state = np.random.RandomState(1234)
(X_train, y_train), (X_test, y_test) = MNIST()
X, y = np.concatenate([X_train, X_test]), np.concatenate([y_train, y_test])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000,
                                                    random_state=42)

OPU pipeline

To define a flow of operations, we can conveniently use sklearn.pipeline.Pipeline. In this way, we can easily perform cross-validation on the hyperparameters of the model.

from sklearn.pipeline import Pipeline
pipeline_steps = []

Data and data encoding

from lightonml.encoding.base import BinaryThresholdEncoder
encoder = BinaryThresholdEncoder()
print('Encoder threshold: ', encoder.threshold_enc)
Encoder threshold:  25
pipeline_steps.append(('encoder', encoder))

Random Mapping on the OPU

from lightonopu.opu import OPU
from lightonml.random_projections.opu import OPURandomMapping
opu = OPU(700, 400) # OPU objects to interface with the hardware
n_components = 10000 # number of random projections
position = '2d_macro_pixels' # repeats the same value in a given area on the DMD

# disable_pbar disables the progress bar that is displayed when performing random projections
random_mapping = OPURandomMapping(opu=opu, n_components=n_components, position=position, disable_pbar=True)
pipeline_steps.append(('mapping', random_mapping))

Decoding

Some encoders, like SeparatedBitPlanEncoder, need a specific decoder to decode the random features. In this case we don’t need one, so we can use the NoDecoding class or just skip this pipeline step.

from lightonml.encoding.base import NoDecoding
pipeline_steps.append(('decoding', NoDecoding()))

Model

classifier = RidgeClassifier()
pipeline_steps.append(('classifier', classifier))

Instantiate and run the pipeline

pipe = Pipeline(steps=pipeline_steps)
pipe.named_steps
{'classifier': RidgeClassifier(alpha=1.0, class_weight=None, copy_X=True, fit_intercept=True,
         max_iter=None, normalize=False, random_state=None, solver='auto',
         tol=0.001),
 'decoding': NoDecoding(),
 'encoder': BinaryThresholdEncoder(greater_is_one=True, threshold_enc=25),
 'mapping': OPURandomMapping(disable_pbar=True, n_components=10000,
          opu=<lightonopu.opu.OPU object at 0x7f2cffd2ae10>,
          position='2d_macro_pixels', roi_position=(0, 0),
          roi_shape=(1140, 912))}

Opening the OPU

opu.open()
print('Fitting the model...')
pipe.fit(X_train, y_train)
Fitting the model...
OPU: random projections of an array of size (60000,784)
Pipeline(memory=None,
     steps=[('encoder', BinaryThresholdEncoder(greater_is_one=True, threshold_enc=25)), ('mapping', OPURandomMapping(disable_pbar=True, n_components=10000,
         opu=<lightonopu.opu.OPU object at 0x7f2cffd2ae10>,
         position='2d_macro_pixels', roi_position=(0, 0),
         roi_shape=(1140, 912))...True,
        max_iter=None, normalize=False, random_state=None, solver='auto',
        tol=0.001))])
train_accuracy = pipe.score(X_train, y_train)
test_accuracy = pipe.score(X_test, y_test)

print('Train accuracy {:.2f}'.format(train_accuracy * 100))
print('Test accuracy {:.2f}'.format(test_accuracy * 100))
OPU: random projections of an array of size (60000,784)
OPU: random projections of an array of size (10000,784)
Train accuracy 98.38
Test accuracy 96.13
from sklearn.model_selection import ShuffleSplit, GridSearchCV

# grid for the values of alpha
alpha_values = 10. ** np.arange(-1, 1)
# define the parameters grid
grid_parameters = [{'classifier__alpha': alpha_values}]

# build cross validation scheme
cv_scheme = ShuffleSplit(n_splits=2, test_size=0.15)

grid_search = GridSearchCV(pipe, grid_parameters, cv=cv_scheme, refit=False, return_train_score=True)
grid_search.fit(X_train, y_train)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (9000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (9000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (9000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (51000,784)
OPU: random projections of an array of size (9000,784)
OPU: random projections of an array of size (51000,784)
GridSearchCV(cv=ShuffleSplit(n_splits=2, random_state=None, test_size=0.15, train_size=None),
       error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('encoder', BinaryThresholdEncoder(greater_is_one=True, threshold_enc=25)), ('mapping', OPURandomMapping(disable_pbar=True, n_components=10000,
         opu=<lightonopu.opu.OPU object at 0x7f2cffd2ae10>,
         position='2d_macro_pixels', roi_position=(0, 0),
         roi_shape=(1140, 912))...True,
        max_iter=None, normalize=False, random_state=None, solver='auto',
        tol=0.001))]),
       fit_params=None, iid=True, n_jobs=1,
       param_grid=[{'classifier__alpha': array([ 0.1,  1. ])}],
       pre_dispatch='2*n_jobs', refit=False, return_train_score=True,
       scoring=None, verbose=0)
opu.close()
import pandas as pd

pd.DataFrame.from_dict(grid_search.cv_results_)
  mean_fit_time mean_score_time mean_test_score mean_train_score param_classifier_alpha rank_test_score split0_test_score split0_train_score split1_test_score split1_train_score std_fit_time std_score_time std_test_score std_train_score
0 99.118136 12.622288 0.957111 0.985490 0.1 2 0.961444 0.985118 0.952778 0.985863 0.307551 0.231072 0.004333 0.000373
1 99.969269 13.894830 0.961056 0.985549 1 1 0.963889 0.985529 0.958222 0.985569 0.742741 1.466716 0.002833 0.000020