CLN-ML Application

This section explains the internal mechanics of the CLN-ML pipeline. The CLN-ML pipeline can be used to classify medium to large number of number-based data. This document includes accepted data formats, the model architecture used, training details, and deployment instructions for using the .bin model file after download.

Data Processing Details

Ensuring your data is in the correct format is crucial for the CLT applet to function properly. The applet supports six different formats:

Folder with 2 txt files

Use a folder containing input.txt and output.txt, where each line in input.txt corresponds to a label in output.txt.

input.txt

23321
44432

output.txt

0
1

TXT file

A .txt file can also be used, where each line contains an input-output pair separated by a comma.

33212, 0
33215, 1

JSON file

The file should contain a list of objects, each with input and output fields.

[
  {
    "input": "88587",
    "output": "0"
  },
  {
    "input": "98747",
    "output": "1"
  }
]

CSV file

Ensure the first row contains headers: input and output.

input,output
88574, 0
66547, 1

XML file

Use <input> and <output> tags inside <entry> elements.

<qaPairs>
  <entry>
    <input>88784</input>
    <output>0</output>
  </entry>
</qaPairs>

YAML file

Each entry should include input and output fields.

- input: "8874"
  output: "0"

Model Variants

You can choose one of these models:

Logistic Regression

Used for small, medium, and potentially large-scale number classification.

class LogisticRegressionCLN(Model, nn.Module):
    def __init__(self):
        super(LogisticRegressionCLN, self).__init__()
        self.linear = None
    
    def initialise(self, d):
        self.linear = nn.Linear(d, 1)

    def forward(self, x):
        x = x.to(torch.float32)
        y_predicted = torch.sigmoid(self.linear(x))
        return y_predicted, 0

Shallow Neural Network

class NeuralNetworkCLN(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLN, self).__init__()
        self.linear = None
        self.relu = nn.ReLU()
        self.output_layer = None
        self.sigmoid = nn.Sigmoid()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = self.relu(output)
        y_predicted = self.sigmoid(self.output_layer(output))
        return y_predicted, 0

Deep Neural Network

class DeepNeuralNetworkCLN(Model, nn.Module):
    def __init__(self):
        super(DeepNeuralNetworkCLN, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.model = nn.Sequential(
            nn.Linear(d, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1),
            nn.Sigmoid()
        )

    def forward(self, input):
        y_predicted = self.model(input.to(torch.float32))
        return y_predicted, 0

Regularized Models

  • L2 Regularization
class NeuralNetworkCLNL2(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNL2, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
        return y_predicted, l2_penalty
  • L1 Regularization
class NeuralNetworkCLNL1(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNL1, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
        return y_predicted, l1_penalty
  • Elastic Net (L1 + L2)
class NeuralNetworkCLNElasticNet(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNElasticNet, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
        l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
        return y_predicted, l1_penalty + l2_penalty

Dropout-Enhanced Neural Network

class NeuralNetworkCLNDropout(Model, nn.Module):
    def __init__(self):
        super(NeuralNetworkCLNDropout, self).__init__()

    def initialise(self, d):
        hidden_size = decide_hs(d)
        self.linear = nn.Linear(d, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, input):
        x = input.to(torch.float32)
        output = self.linear(x)
        output = F.relu(output)
        output = self.dropout(output)
        y_predicted = torch.sigmoid(self.output_layer(output))
        return y_predicted, 0

k-Nearest Neighbors (k-NN)

class kNN(Model, nn.Module):
    def __init__(self):
        super(kNN, self).__init__()

    def initialise(self, input, output):
        self.data = torch.tensor(input, dtype=torch.float32)
        self.labels = torch.tensor(output, dtype=torch.float32)

    def forward(self, input):
        n_samples, d_features = input.shape
        votes = np.zeros((n_samples, 2), dtype=int)
        input = torch.tensor(input, dtype=torch.float32)
        dist = torch.cdist(input, self.data)

        for k in range(1, d_features + 1):
            knn_indices = dist.topk(k, largest=False).indices
            knn_labels = self.labels[knn_indices]
            votes[:, 0] = (knn_labels == 0).sum(dim=1)
            votes[:, 1] = (knn_labels == 1).sum(dim=1)

        final_predictions = np.argmax(votes, axis=1)
        return torch.tensor(final_predictions), 0

Hidden Size Decision Helper

def decide_hs(d):
    if d < 4:
        return 4
    if d < 16:
        return d
    if d < 64:
        return int(1.5 * d)
    return min(int(1.5 * d), 256)

Training Loop

def train(self, qthread=None, progress_updated=None, loss_updated=None):
    self.num_batches = len(self.train_loader)
    total_steps = len(self.train_loader)
    current_step = 0
    total_loss = 0

    for x, y in self.train_loader:
        y_predicted = self.model(x)
        y = y.unsqueeze(1).to(torch.float32)
        loss = self.loss_fn(y_predicted, y)
        total_loss += loss.item()
        
        loss.backward()
        self.optimizer.step()

        current_step += 1
        progress = int((current_step / total_steps) * 100)
        progress_updated.emit(progress)
        qthread.msleep(1)
        print('done one')

    avg_loss = total_loss / len(self.train_loader)
    loss_updated.emit(avg_loss)

Evaluation Loop

def evaluate(self, eval_dataset=None):
    self.model.eval()
    total_loss = 0
    num = 0

    val_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False) if eval_dataset else self.val_loader

    with torch.no_grad():
        for x, y in val_loader:
            outputs = self.model(x)
            y = y.unsqueeze(1).to(torch.float32)
            if torch.equal(y, torch.round(outputs)):
                total_loss += 1
            num += 1

    return total_loss / num

Deployment (Use the Trained Model)

class UserCLN(User):
    def __init__(self, model, tokenizer, device, max_length):
        super().__init__(model, tokenizer, device, max_length)

    def use_model(self, input):
        output = self.model(input)
        return torch.round(output)

Example Usage

class UserCLN(User):
    def __init__(self, model , tokenizer, device, max_length):
        super().__init__(model, tokenizer, device, max_length)

    def use_model(self, input):
        output = self.model(input)
        return torch.round(output)


model_path = "path/to/model.bin"
model = LogisticRegressionCLN()
model.initialise(d=10)  # Same feature dimension used in training
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
model.eval()

input_tensor = torch.rand(1, 10)  # Simulate test input

user = UserCLN(model=model, tokenizer=None, device="cpu", max_length=None)
prediction = user.use_model(input_tensor)
print("Predicted class:", int(prediction.item()))