CLN-ML Application

This section explains the internal mechanics of the CLN-ML pipeline. The CLN-ML pipeline can be used to classify medium to large number of number-based data. This document includes accepted data formats, the model architecture used, training details, and deployment instructions for using the .bin model file after download.

Data Processing Details

Ensuring your data is in the correct format is crucial for the CLT applet to function properly. The applet supports six different formats:

The file should be a .json file in the following format:

[
    [42, 42, 0],
    [73, 73, 0],
    [10, 10, 0],
    [26, 26, 0],
    [91, 91, 0],
    [34, 11, 1],
    [90, 68, 1],
    [19, 72, 1],
    [88, 53, 1],
    [47, 39, 1]
  ]
  

Model Variants

You can choose one of these models:

Logistic Regression

Used for small, medium, and potentially large-scale number classification.

class LogisticRegressionCLN(Model, nn.Module):
    def __init__(self):
        super(LogisticRegressionCLN, self).__init__()
        self.linear = None
    
    def initialise(self, d):
        self.linear = nn.Linear(d, 1)

    def forward(self, x):
        x = x.to(torch.float32)
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted, 0

Shallow Neural Network

class NeuralNetworkCLN(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLN, self).__init__()
        self.linear = None
        self.relu = nn.ReLU()
        self.output_layer = None
        self.sigmoid = nn.Sigmoid()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = self.relu(output)
        y_predicted = self.sigmoid(self.output_layer(output))
        return y_predicted, 0

Deep Neural Network

class DeepNeuralNetworkCLN(Model, nn.Module):
    def __init__(self):
        super(DeepNeuralNetworkCLN, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.model = nn.Sequential(
            nn.Linear(d, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1),
            nn.Sigmoid()
        )

    def forward(self, input):
        y_predicted = self.model(input.to(torch.float32))
        return y_predicted, 0

Regularized Models

  • L2 Regularization
class NeuralNetworkCLNL2(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNL2, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
        return y_predicted, l2_penalty
  • L1 Regularization
class NeuralNetworkCLNL1(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNL1, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
        return y_predicted, l1_penalty
  • Elastic Net (L1 + L2)
class NeuralNetworkCLNElasticNet(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNElasticNet, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
        l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
        return y_predicted, l1_penalty + l2_penalty

Dropout-Enhanced Neural Network

class NeuralNetworkCLNDropout(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNDropout, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        output = self.dropout(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        return y_predicted, 0

k-Nearest Neighbors (k-NN)

class kNN(Model, nn.Module):
    def __init__(self):
        super(kNN, self).__init__()
        self.linear = None

    def initialise(self, input, output):
        self.data = torch.tensor(input, dtype=torch.float32)
        self.labels = torch.tensor(output, dtype=torch.float32)

    def forward(self, input):
        n_samples, d_features = input.shape
        k = 1  # Change if tuning

        # Compute pairwise distances
        dist = torch.cdist(input, self.data)

        # Get k nearest neighbors
        knn_indices = dist.topk(k, largest=False).indices
        knn_labels = self.labels[knn_indices]

        # Count votes
        votes = torch.zeros((n_samples, 2), dtype=torch.int32)
        votes[:, 0] = (knn_labels == 0).sum(dim=1)
        votes[:, 1] = (knn_labels == 1).sum(dim=1)

        # Pick the label with more votes
        final_predictions = torch.argmax(votes, dim=1)

        # Make shape and dtype compatible with BCELoss
        return final_predictions.unsqueeze(1).float(), 0

Hidden Size Decision Helper

def decide_hs(d):
    if d < 4:
        return 4
    if d < 16:
        return d
    if d < 64:
        return int(1.5 * d)
    return min(int(1.5 * d), 256)

Training Loop

def train(self, qthread=None, progress_updated=None, loss_updated=None):
    self.num_batches = len(self.train_loader)
    total_steps = len(self.train_loader)
    current_step = 0
    total_loss = 0

    for x, y in self.train_loader:
        y_predicted = self.model(x)
        y = y.unsqueeze(1).to(torch.float32)
        loss = self.loss_fn(y_predicted, y)
        total_loss += loss.item()
        
        loss.backward()
        self.optimizer.step()

        current_step += 1
        progress = int((current_step / total_steps) * 100)
        progress_updated.emit(progress)
        qthread.msleep(1)
        print('done one')

    avg_loss = total_loss / len(self.train_loader)
    loss_updated.emit(avg_loss)

Evaluation Loop

def evaluate(self, eval_dataset=None):
    self.model.eval()
    total_loss = 0
    num = 0

    val_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False) if eval_dataset else self.val_loader

    with torch.no_grad():
        for x, y in val_loader:
            outputs = self.model(x)
            y = y.unsqueeze(1).to(torch.float32)
            if torch.equal(y, torch.round(outputs)):
                total_loss += 1
            num += 1

    return total_loss / num

Deployment (Use the Trained Model)

class UserCLN(User):
    def __init__(self, model, tokenizer, device, max_length):
        super().__init__(model, tokenizer, device, max_length)

    def use_model(self, input):
        output = self.model(input)
        return torch.round(output)

Example Usage

class UserCLN(User):
    def __init__(self, model , tokenizer, device, max_length):
        super().__init__(model, tokenizer, device, max_length)

    def use_model(self, input):
        output = self.model(input)
        return torch.round(output)


model_path = "path/to/model.bin"
model = LogisticRegressionCLN()
model.initialise(d=10)  # Same feature dimension used in training
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
model.eval()

input_tensor = torch.rand(1, 10)  # Simulate test input

user = UserCLN(model=model, tokenizer=None, device="cpu", max_length=None)
prediction = user.use_model(input_tensor)
print("Predicted class:", int(prediction.item()))