Source code for bsmart.ml.mls_NN

"""

Helper functions for the xBit MLS scan


"""

import math
import numpy as np
import os
import random
from bsmart import core
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable

"""

Functions from xBit aux

"""


[docs]
def LINEAR_DIFF(a, b):
    return 1. * abs(a - b)


# gauss likelihood: do we need something else?

[docs]
def gauss(val, m, s):
    return math.exp(-(val-m)**2/(2.*s)**2)




[docs]
def logL(val, m, s):
    return 1./(1.+(val-m)**2/(2.*s)**2)




[docs]
def limit_lh(steps):
    if (steps < 1.0e4):
        return 0.1
    elif (steps < 1.0e5):
        return 0.01
    elif (steps < 1.0e6):
        return 0.001
    else:
        return 1.0e-5




[docs]
def step(x):
    if x > 0.5:
        return 1.
    else:
        return 0.




[docs]
def exp_safe(x):
    try:
        out = math.exp(x)
    except OverflowError:
        out = float('inf')
    return out




[docs]
def make_safe_lh(x):
    """
        curb extremes of a function, plateau at 100000
    """
    #return 100.0*math.tanh(x/100.0)
    return 1000000.0*math.tanh(x/1000000.0)



[docs]
class NNMLS:
    def __init__(self, inputs):
        self.input_dim = len(inputs['Variables'])

        #self.train_lh = self.inputs['ML']['TrainLH']
        self.train_lh = eval(inputs['Setup'].get('TrainLH',"True"))


        
        if self.train_lh:
            self.log.info('Will train predictor on Likelihood')
            self.output_dim = 1
        else:
            self.log.info('Will train predictor on observables')
            #self.output_dim = len(self.inputs['Observables']) # this picks up all observables defined even in the tools
            # need to find how many observables we are *actually* using. 
            #self.output_dim = len(self.likelihoodfns)
            self.output_dim= len([x for x in self.observable_masks if x]) # these are false if masked
        
        self.log.info('Setting up predictor with %d outputs' % self.output_dim)
        self.epochs = inputs['Setup'].get('Epochs',500)
        self.iterations = inputs['Setup'].get('Iterations',20)
        self.neurons = inputs['Setup'].get('Neurons',[50,50,50])
        self.LR = inputs['Setup'].get('LR',0.001)
        
        if 'DensityPenalty' not in inputs['Setup']:
            self.distance_penalty = 0.1
        else:
            self.distance_penalty = inputs['Setup'].get('DensityPenality')  ## string because of eval
        
        
        self.use_classifier = eval(inputs['Setup'].get('Classifier',"False"))


[docs]
    def get_train_lh(self):
        return self.train_lh



[docs]
    def set_predictor(self):
        self.log.debug("Neurons: %s" %str(self.neurons))
        self.map1 = nn.Linear(self.input_dim, self.neurons[0])
        layers = [self.map1, nn.ReLU(), nn.Dropout(0.1)]

        for i in range(1,len(self.neurons)):
            layers.append(nn.Linear(self.neurons[i - 1], self.neurons[i]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.1))

        layers.append(nn.Linear(self.neurons[-1], self.output_dim))

        self.predictor = nn.Sequential(*layers)
        self.predictor_optimizer = optim.Adam(self.predictor.parameters(),lr=self.LR)
        self.predictor_criterion = nn.MSELoss()



[docs]
    def set_classifier(self):
        self.classifier = nn.Sequential(nn.Linear(self.input_dim, 50), nn.ReLU(), nn.Dropout(0.1),nn.Linear(50, 50), nn.ReLU(), nn.Dropout(0.1), nn.Linear(50,2), nn.Sigmoid())
        self.classifier_optimizer = optim.Adam(self.classifier.parameters(),lr=self.LR)
        self.classifier_criterion = nn.BCELoss()



[docs]
    def splitter(self, x, y):   # split torch tensors into training and testing sets
        setlen=x.size(0)
        self.log.debug("Splitting dataset of size %d "%setlen)
        try:
            assert(y.size(0) == setlen)
        except Exception as e:
            self.log.info("Length of inputs/ouptuts %d vs  %d are not equal, %s" %(setlen,y.size(0),str(e)))
            raise
        perm = torch.randperm(setlen)
        splitind=int(0.8*float(setlen))
        idtrain = perm[:splitind]
        idtest=perm[splitind:]
        return x[idtrain], y[idtrain], x[idtest], y[idtest]



[docs]
    def do_train(self, x_val, y_val, name):
        #self.log.info(f"calling do_train with {len(x_val)} points and {self.inputs['Setup']['Cores']} cores")
        self.log.info("calling do_train with %d points and %d cores" %(len(x_val),self.inputs['Setup']['Cores']))
        #print(x_val)
        #print(y_val)
        #print(f"x: {x_val} \n y: {y_val}")
        #self.classifier = nn.Sequential(nn.Linear(self.input_dim, 50), nn.ReLU(), nn.Dropout(0.1),nn.Linear(50, 50), nn.ReLU(), nn.Dropout(0.1), nn.Linear(50,2), nn.Sigmoid())
        torch.set_num_threads(self.inputs['Setup']['Cores'])
        
        best = 1.0e6        
        wait = 0
        batch_size = 64
        nr_batches = max(1, int(len(x_val) / batch_size))
        #patience = max(100, self.epochs / 5 / nr_batches)
        patience = max(20, self.epochs / 5 / nr_batches)  ## number of epochs to wait if there is no improvement in loss

        #self.log.info(f"Patience is {patience}")
        self.log.info("Patience is %.1f" %patience)
        #torchx=torch.tensor(x_val)
        #torchx=x_val

        if name is self.predictor:
            criterion = nn.MSELoss()                                        #self.predictor_criterion
            optimizer = optim.Adam(self.predictor.parameters(),lr=self.LR)  #self.predictor_optimizer
            torchx=x_val # feed it a torch tensor if it is a predictor
            torchy=torch.tensor(y_val).reshape((-1,self.output_dim))
            checkpoint_name=os.path.join(self.inputs['Setup']['RunName'],'checkpoint_predictor.pt')
        else:
            criterion = nn.BCELoss()                                        #self.classifier_criterion
            optimizer = optim.Adam(self.classifier.parameters(),lr=self.LR) #self.classifier_optimizer
            torchx=torch.tensor(x_val)
            torchy=torch.tensor(y_val).reshape((-1,2))  # for some stupid reason have two outputs
            checkpoint_name=os.path.join(self.inputs['Setup']['RunName'],'checkpoint_classifier.pt')
        
        
        x_train, y_train, x_test, y_test = self.splitter(torchx,torchy)

        permutation = torch.randperm(x_train.size(0))

        ## want to get the best lost from *previous* run!
        if os.path.exists(checkpoint_name):
            old_checkpoint = torch.load(checkpoint_name)
            previous_best = old_checkpoint['loss']

        for epoch in range(self.epochs):
            name.train()
            for _ in range(x_train.size()[1]):
                for i in range(0, x_train.size(0), batch_size):
                    optimizer.zero_grad()
                    indices = permutation[i:i+batch_size]
                    x_batch = x_train[indices]
                    y_batch = y_train[indices] #[indices]
                    self.loss = criterion(name(Variable(x_batch)), Variable(y_batch))
                    self.loss.backward()
                    optimizer.step()
                name.eval()

            y_pred = name(Variable(x_test).data.cpu())
            loss = criterion(Variable(y_pred), Variable(y_test))

            if epoch == 0:  # have to deal with the case that the initial loss is *worse* than 1e6!
                best = loss

            if epoch % int(self.epochs/10) == 0:
                self.log.info("Epoch: %i; loss %f, best loss %f" %(epoch, loss,best))

            if loss < best:
                best = loss
                wait = 0
                
                torch.save({'state_dict': name.state_dict(), 'optimizer': optimizer.state_dict(), 'loss': self.loss}, checkpoint_name)
            else:
                wait = wait + 1

            if (wait > patience or epoch == (self.epochs - 1)) and os.path.exists(checkpoint_name):
                """
                    This stops training when there is no more gain and reloads the best network
                """
                self.log.info("Loading state dict %s" % checkpoint_name)
                checkpoint = torch.load(checkpoint_name)
                name.load_state_dict(checkpoint['state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer'])
                self.loss = checkpoint['loss']
                break
        #self.log.info(f"Completed training after {epoch+1} epochs")
        self.log.info("Completed training after %d epochs" % (epoch+1))



[docs]
    def scale_data(self, input):
        """

            This is needed to curb excess values 
            'input' is a list of observables
       
        """
        self.scalings = []
        transposedinput = np.array(input).T
        data_out = None
        for data in transposedinput:
            #print(data)
            if max(data) / min(data) > 100:
                if data_out is None:
                    data_out = np.array([np.log(data)])
                else:
                    data_out = np.concatenate((data_out, [np.log(data)]), axis = 0)
                self.scalings.append("log")
            else:
                if data_out is None:
                    data_out = np.array([data])
                else:
                    data_out = np.concatenate((data_out, [data]), axis = 0)
                self.scalings.append("id")

        return((data_out.T).tolist())

    

[docs]
    def rescale_data(self, input):
        """
            undoes the effect of the rescaling, to return to the actual observables
        """
        transposedinput = np.array(input).T
        data_out = None
        for data, scale in zip(transposedinput, self.scalings):
            if scale == "log":
                if data_out is None:  # why doesn't it work with empty lists?
                    data_out = np.array([np.exp(data)])
                else:
                    data_out = np.concatenate((data_out, [np.exp(data)]),axis=0)
            elif scale == "id":
                if data_out is None:
                    data_out = np.array([(data)])
                else:
                    data_out = np.concatenate((data_out, [data]), axis=0)
        return((data_out.T).tolist())