CLN-ML Application
This section explains the internal mechanics of the CLN-ML pipeline. The CLN-ML pipeline can be used to classify medium to large number of number-based data. This document includes accepted data formats, the model architecture used, training details, and deployment instructions for using the .bin
model file after download.
Data Processing Details
Ensuring your data is in the correct format is crucial for the CLT applet to function properly. The applet supports six different formats:
The file should be a .json file in the following format:
[
[42, 42, 0],
[73, 73, 0],
[10, 10, 0],
[26, 26, 0],
[91, 91, 0],
[34, 11, 1],
[90, 68, 1],
[19, 72, 1],
[88, 53, 1],
[47, 39, 1]
]
Model Variants
You can choose one of these models:
Logistic Regression
Used for small, medium, and potentially large-scale number classification.
class LogisticRegressionCLN(Model, nn.Module):
def __init__(self):
super(LogisticRegressionCLN, self).__init__()
self.linear = None
def initialise(self, d):
self.linear = nn.Linear(d, 1)
def forward(self, x):
x = x.to(torch.float32)
y_predicted = torch.sigmoid(self.linear(x))
return y_predicted, 0
Shallow Neural Network
class NeuralNetworkCLN(Model, nn.Module):
def __init__(self):
super(NeuralNetworkCLN, self).__init__()
self.linear = None
self.relu = nn.ReLU()
self.output_layer = None
self.sigmoid = nn.Sigmoid()
def initialise(self, d):
hidden_size = decide_hs(d)
self.linear = nn.Linear(d, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
def forward(self, input):
x = input.to(torch.float32)
output = self.linear(x)
output = self.relu(output)
y_predicted = self.sigmoid(self.output_layer(output))
return y_predicted, 0
Deep Neural Network
class DeepNeuralNetworkCLN(Model, nn.Module):
def __init__(self):
super(DeepNeuralNetworkCLN, self).__init__()
def initialise(self, d):
hidden_size = decide_hs(d)
self.model = nn.Sequential(
nn.Linear(d, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, hidden_size),
nn.ReLU(),
nn.Linear(hidden_size, 1),
nn.Sigmoid()
)
def forward(self, input):
y_predicted = self.model(input.to(torch.float32))
return y_predicted, 0
Regularized Models
- L2 Regularization
class NeuralNetworkCLNL2(Model, nn.Module):
def __init__(self):
super(NeuralNetworkCLNL2, self).__init__()
def initialise(self, d):
hidden_size = decide_hs(d)
self.linear = nn.Linear(d, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
def forward(self, input):
x = input.to(torch.float32)
output = self.linear(x)
output = F.relu(output)
y_predicted = torch.sigmoid(self.output_layer(output))
l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
return y_predicted, l2_penalty
- L1 Regularization
class NeuralNetworkCLNL1(Model, nn.Module):
def __init__(self):
super(NeuralNetworkCLNL1, self).__init__()
def initialise(self, d):
hidden_size = decide_hs(d)
self.linear = nn.Linear(d, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
def forward(self, input):
x = input.to(torch.float32)
output = self.linear(x)
output = F.relu(output)
y_predicted = torch.sigmoid(self.output_layer(output))
l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
return y_predicted, l1_penalty
- Elastic Net (L1 + L2)
class NeuralNetworkCLNElasticNet(Model, nn.Module):
def __init__(self):
super(NeuralNetworkCLNElasticNet, self).__init__()
def initialise(self, d):
hidden_size = decide_hs(d)
self.linear = nn.Linear(d, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
def forward(self, input):
x = input.to(torch.float32)
output = self.linear(x)
output = F.relu(output)
y_predicted = torch.sigmoid(self.output_layer(output))
l1_penalty = sum(torch.abs(param).sum() for param in self.parameters())
l2_penalty = sum(torch.norm(param, p=2) ** 2 for param in self.parameters())
return y_predicted, l1_penalty + l2_penalty
Dropout-Enhanced Neural Network
class NeuralNetworkCLNDropout(Model, nn.Module):
def __init__(self):
super(NeuralNetworkCLNDropout, self).__init__()
def initialise(self, d):
hidden_size = decide_hs(d)
self.linear = nn.Linear(d, hidden_size)
self.output_layer = nn.Linear(hidden_size, 1)
self.dropout = nn.Dropout(p=0.5)
def forward(self, input):
x = input.to(torch.float32)
output = self.linear(x)
output = F.relu(output)
output = self.dropout(output)
y_predicted = torch.sigmoid(self.output_layer(output))
return y_predicted, 0
k-Nearest Neighbors (k-NN)
class kNN(Model, nn.Module):
def __init__(self):
super(kNN, self).__init__()
self.linear = None
def initialise(self, input, output):
self.data = torch.tensor(input, dtype=torch.float32)
self.labels = torch.tensor(output, dtype=torch.float32)
def forward(self, input):
n_samples, d_features = input.shape
k = 1 # Change if tuning
# Compute pairwise distances
dist = torch.cdist(input, self.data)
# Get k nearest neighbors
knn_indices = dist.topk(k, largest=False).indices
knn_labels = self.labels[knn_indices]
# Count votes
votes = torch.zeros((n_samples, 2), dtype=torch.int32)
votes[:, 0] = (knn_labels == 0).sum(dim=1)
votes[:, 1] = (knn_labels == 1).sum(dim=1)
# Pick the label with more votes
final_predictions = torch.argmax(votes, dim=1)
# Make shape and dtype compatible with BCELoss
return final_predictions.unsqueeze(1).float(), 0
Hidden Size Decision Helper
def decide_hs(d):
if d < 4:
return 4
if d < 16:
return d
if d < 64:
return int(1.5 * d)
return min(int(1.5 * d), 256)
Training Loop
def train(self, qthread=None, progress_updated=None, loss_updated=None):
self.num_batches = len(self.train_loader)
total_steps = len(self.train_loader)
current_step = 0
total_loss = 0
for x, y in self.train_loader:
y_predicted = self.model(x)
y = y.unsqueeze(1).to(torch.float32)
loss = self.loss_fn(y_predicted, y)
total_loss += loss.item()
loss.backward()
self.optimizer.step()
current_step += 1
progress = int((current_step / total_steps) * 100)
progress_updated.emit(progress)
qthread.msleep(1)
print('done one')
avg_loss = total_loss / len(self.train_loader)
loss_updated.emit(avg_loss)
Evaluation Loop
def evaluate(self, eval_dataset=None):
self.model.eval()
total_loss = 0
num = 0
val_loader = DataLoader(eval_dataset, batch_size=1, shuffle=False) if eval_dataset else self.val_loader
with torch.no_grad():
for x, y in val_loader:
outputs = self.model(x)
y = y.unsqueeze(1).to(torch.float32)
if torch.equal(y, torch.round(outputs)):
total_loss += 1
num += 1
return total_loss / num
Deployment (Use the Trained Model)
class UserCLN(User):
def __init__(self, model, tokenizer, device, max_length):
super().__init__(model, tokenizer, device, max_length)
def use_model(self, input):
output = self.model(input)
return torch.round(output)
Example Usage
class UserCLN(User):
def __init__(self, model , tokenizer, device, max_length):
super().__init__(model, tokenizer, device, max_length)
def use_model(self, input):
output = self.model(input)
return torch.round(output)
model_path = "path/to/model.bin"
model = LogisticRegressionCLN()
model.initialise(d=10) # Same feature dimension used in training
model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
model.eval()
input_tensor = torch.rand(1, 10) # Simulate test input
user = UserCLN(model=model, tokenizer=None, device="cpu", max_length=None)
prediction = user.use_model(input_tensor)
print("Predicted class:", int(prediction.item()))