-
Notifications
You must be signed in to change notification settings - Fork 22
/
Copy pathmnist_KANvolve.py
104 lines (84 loc) · 4.01 KB
/
mnist_KANvolve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
from KANvolver import KANvolver as KAN
# Initialize Weights & Biases for experiment tracking and visualization
import wandb
wandb.init(project="KANvolution_evaluation", entity="1ssb")
class Trainer:
def __init__(self, model, device, train_loader, val_loader, optimizer, scheduler, criterion):
self.model = model.to(device)
self.device = device
self.train_loader = train_loader
self.val_loader = val_loader
self.optimizer = optimizer
self.scheduler = scheduler
self.criterion = criterion
def train_epoch(self):
self.model.train()
total_loss, total_accuracy = 0, 0
for images, labels in self.train_loader:
images, labels = images.view(-1, 28 * 28).to(self.device), labels.to(self.device)
self.optimizer.zero_grad()
outputs = self.model(images)
loss = self.criterion(outputs, labels)
loss.backward()
self.optimizer.step()
accuracy = (outputs.argmax(dim=1) == labels).float().mean().item()
total_loss += loss.item()
total_accuracy += accuracy
return total_loss / len(self.train_loader), total_accuracy / len(self.train_loader)
def validate_epoch(self):
self.model.eval()
val_loss, val_accuracy = 0, 0
with torch.no_grad():
for images, labels in self.val_loader:
images, labels = images.view(-1, 28 * 28).to(self.device), labels.to(self.device)
outputs = self.model(images)
val_loss += self.criterion(outputs, labels).item()
val_accuracy += (outputs.argmax(dim=1) == labels).float().mean().item()
return val_loss / len(self.val_loader), val_accuracy / len(self.val_loader)
def fit(self, epochs):
train_accuracies, val_accuracies = [], []
pbar = tqdm(range(epochs), desc="Epoch Progress")
for epoch in pbar:
train_loss, train_accuracy = self.train_epoch()
val_loss, val_accuracy = self.validate_epoch()
wandb.log({
"Train Loss": train_loss,
"Train Accuracy": train_accuracy,
"Validation Loss": val_loss,
"Validation Accuracy": val_accuracy
})
pbar.set_description(f"Epoch {epoch+1} | Train Loss: {train_loss:.4f} | Val Accuracy: {val_accuracy:.4f}")
self.scheduler.step()
train_accuracies.append(train_accuracy)
val_accuracies.append(val_accuracy)
return train_accuracies, val_accuracies
def train_and_validate(epochs=15):
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform)
valset = torchvision.datasets.MNIST(root="./data", train=False, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
valloader = DataLoader(valset, batch_size=64, shuffle=False)
model = KAN([28 * 28, 64, 10])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
criterion = nn.CrossEntropyLoss()
trainer = Trainer(model, device, trainloader, valloader, optimizer, scheduler, criterion)
train_accuracies, val_accuracies = trainer.fit(epochs)
model_save_dir = "./models"
os.makedirs(model_save_dir, exist_ok=True)
torch.save(model.state_dict(), os.path.join(model_save_dir, "mononet_model.pth"))
print(f"Train Accuracies: {train_accuracies}")
print(f"Validation Accuracies: {val_accuracies}")
wandb.finish()
train_and_validate()