[Standalone DL] 11 Lab - #18 Handling Visualization of Many Experiments
Save
What to Save?
무엇을 시각화할 지 생각해보기 전에, 무엇을 ‘저장’해야 하는지에 대해 생각해보자.
- args에 있는 세팅 값들을 저장해야 한다.
- epoch에 따른 train loss, val loss, train acc, val acc를 저장해야 한다.
- 최종 train acc, val acc, test acc도 저장해야 한다.
How to Save?
각 실험 결과들을 dictionary에 넣고 append해나가자
→ 인터넷이 끊기거나 튕길 시 모든 저장 결과가 날아간다.
→ JSON 포맷을 활용하자 (모든 언어에서 활용 가능하며, 한 줄 한 줄 저장하면 인터넷이 끊겨도 저장 결과들이 날아가지 않는다)
import json
a = {'value1': 5, 'value2':10, 'seq'=[1,2,3,4,5]}
filename = 'test.json'
with open(filename, 'w') as f:
json.dump(a, f)
with open(filename, 'r') as f:
result = json.load(f)
print(result)
실험 돌아갈 때마다 각각에 대해 json 파일을 만들자. 구분하기 위해 어떤 방식을 사용하면 좋을까?
→ 만약 시간이나 랜덤 숫자로 진행한다면, 같은 시험 세팅으로 다시 돌릴 시 다른 Json 파일이 또 생겨서 뭐가 맞는 것인지 알 수 없음
→ 변수 값들을 파일 제목에 같이 넣어주자.
import hashlib
a = 'my name is rachel'
hash_key = hashlib.shal(a.encode()).hexdigest()[:6] # select 6 string from front
print(hash_key) # random strings
setting = {'value1': 5, 'value2':10, 'seq'=[1,2,3,4,5]} # 이것을 우리의 변수들이라고 생각하자
hash_key = hashlib.shal(str(setting).encode()).hexdigest()[:6]
print(hash_key) # 9e23e4 (value에 대한 hash_key가 생성된 것)
setting = {'value1': 6, 'value2':10, 'seq'=[1,2,3,4,5]}
hash_key = hashlib.shal(str(setting).encode()).hexdigest()[:6]
print(hash_key) # 6f3bbe (다른 value에 대한 다른 hash_key가 생성된 것)
같은 세팅 값이면 같은 hash 값 , 다른 세팅 값이면 다른 hash 값을 얻을 수 있게 되었다.
setting = {'value1': 5, 'value2':10, 'seq'=[1,2,3,4,5], 'exp_name': 'exp1'}
exp_name = setting['exp_name']
hash_key = hashlib.shal(str(setting).encode()).hexdigest()[:6]
filename = '{}-{}.json'.format(exp_name, hash_key)
print(filename) # exp1-628721.json 와 같은 식으로, hask_key가 들어간 filename을 얻을 수 있음
Code
basic
!mkdir results # results라는 폴더 만들기
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
import time
from copy import deepcopy # Add Deepcopy for args
data preparation
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
partition = {'train': trainset, 'val':valset, 'test':testset}
model architecture
class MLP(nn.Module):
def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout, use_bn, use_xavier):
super(MLP, self).__init__()
self.in_dim = in_dim
self.out_dim = out_dim
self.hid_dim = hid_dim
self.n_layer = n_layer
self.act = act
self.dropout = dropout
self.use_bn = use_bn
self.use_xavier = use_xavier
# ====== Create Linear Layers ====== #
self.fc1 = nn.Linear(self.in_dim, self.hid_dim)
self.linears = nn.ModuleList()
self.bns = nn.ModuleList()
for i in range(self.n_layer-1):
self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
if self.use_bn:
self.bns.append(nn.BatchNorm1d(self.hid_dim))
self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
# ====== Create Activation Function ====== #
if self.act == 'relu':
self.act = nn.ReLU()
elif self.act == 'tanh':
self.act == nn.Tanh()
elif self.act == 'sigmoid':
self.act = nn.Sigmoid()
else:
raise ValueError('no valid activation function selected!')
# ====== Create Regularization Layer ======= #
self.dropout = nn.Dropout(self.dropout)
if self.use_xavier:
self.xavier_init()
def forward(self, x):
x = self.act(self.fc1(x))
for i in range(len(self.linears)):
x = self.act(self.linears[i](x))
x = self.bns[i](x)
x = self.dropout(x)
x = self.fc2(x)
return x
def xavier_init(self):
for linear in self.linears:
nn.init.xavier_normal_(linear.weight)
linear.bias.data.fill_(0.01)
net = MLP(3072, 10, 100, 4, 'relu', 0.1, True, True) # Testing Model Construction
train, validate, test, exp
def train(net, partition, optimizer, criterion, args):
trainloader = torch.utils.data.DataLoader(partition['train'],
batch_size=args.train_batch_size,
shuffle=True, num_workers=2)
net.train()
correct = 0
total = 0
train_loss = 0.0
for i, data in enumerate(trainloader, 0):
optimizer.zero_grad()
# get the inputs
inputs, labels = data
inputs = inputs.view(-1, 3072)
inputs = inputs.cuda()
labels = labels.cuda()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
train_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
train_loss = train_loss / len(trainloader)
train_acc = 100 * correct / total
return net, train_loss, train_acc
def validate(net, partition, criterion, args):
valloader = torch.utils.data.DataLoader(partition['val'],
batch_size=args.test_batch_size,
shuffle=False, num_workers=2)
net.eval()
correct = 0
total = 0
val_loss = 0
with torch.no_grad():
for data in valloader:
images, labels = data
images = images.view(-1, 3072)
images = images.cuda()
labels = labels.cuda()
outputs = net(images)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
val_loss = val_loss / len(valloader)
val_acc = 100 * correct / total
return val_loss, val_acc
def test(net, partition, args):
testloader = torch.utils.data.DataLoader(partition['test'],
batch_size=args.test_batch_size,
shuffle=False, num_workers=2)
net.eval()
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
images = images.view(-1, 3072)
images = images.cuda()
labels = labels.cuda()
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
test_acc = 100 * correct / total
return test_acc
def experiment(partition, args):
net = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout, args.use_bn, args.use_xavier)
net.cuda()
criterion = nn.CrossEntropyLoss()
if args.optim == 'SGD':
optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
elif args.optim == 'RMSprop':
optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
elif args.optim == 'Adam':
optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.l2)
else:
raise ValueError('In-valid optimizer choice')
# ===== List for epoch-wise data ====== #
train_losses = []
val_losses = []
train_accs = []
val_accs = []
# ===================================== #
for epoch in range(args.epoch): # loop over the dataset multiple times
ts = time.time()
net, train_loss, train_acc = train(net, partition, optimizer, criterion, args)
val_loss, val_acc = validate(net, partition, criterion, args)
te = time.time()
# ====== Add Epoch Data ====== #
train_losses.append(train_loss)
val_losses.append(val_loss)
train_accs.append(train_acc)
val_accs.append(val_acc)
# ============================ #
print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
test_acc = test(net, partition, args)
# ======= Add Result to Dictionary ======= #
result = {}
result['train_losses'] = train_losses
result['val_losses'] = val_losses
result['train_accs'] = train_accs
result['val_accs'] = val_accs
result['train_acc'] = train_acc
result['val_acc'] = val_acc
result['test_acc'] = test_acc
return vars(args), result
# ===================================== #
Exp results
import hashlib
import json
from os import listdir
from os.path import isfile, join
import pandas as pd
def save_exp_result(setting, result):
exp_name = setting['exp_name']
del setting['epoch'] # epoch을 지운다 - epoch이 바뀌어도 파일이 또 생성되지 않고 업데이트 되도록.
del setting['test_batch_size'] # 마찬가지
hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
filename = './results/{}-{}.json'.format(exp_name, hash_key)
result.update(setting) # result라는 딕셔너리에 setting도 저장을 해준다
with open(filename, 'w') as f:
json.dump(result, f)
def load_exp_result(exp_name):
dir_path = './results'
filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
list_result = []
for filename in filenames:
if exp_name in filename:
with open(join(dir_path, filename), 'r') as infile:
results = json.load(infile)
list_result.append(results)
df = pd.DataFrame(list_result) # .drop(columns=[])
return df
Exp
# ====== Random Seed Initialization ====== #
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_n_layer_hid_dim" # layer과 hid_dim을 바꾼 실험이었다는 뜻으로 작성
# ====== Model Capacity ====== #
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'
# ====== Regularization ======= #
args.dropout = 0.2
args.use_bn = True
args.l2 = 0.00001
args.use_xavier = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.0015
args.epoch = 10
args.train_batch_size = 256
args.test_batch_size = 1024
# ====== Experiment Variable ====== #
name_var1 = 'n_layer'
name_var2 = 'hid_dim'
list_var1 = [1, 2, 3]
list_var2 = [500, 300]
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1) # setattr 은 args.name_var1 = var1 이라고 하는 것과 동일
setattr(args, name_var2, var2) # 위에서 값을 받아오므로 사용자의 실수를 줄여줄 수 있는 느낌 !
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
!ls results
을 통해 파일이 잘 생성되었는지를 확인할 수 있다.
!cat results/exp1_n_layer_hid_dim-41b634.json
를 하면 저장된 값들을 불러올 수 있다.
{“train_losses”: [1.7552594508335089, 1.5035573031492293, 1.399491632819935, 1.3134926542354997, 1.246969583687509, 1.189121641930501, 1.1298883709178609, 1.0882677378927825, 1.0231012895608405, 0.978762801285762],
“val_losses”: [1.670531678199768, 1.547693169116974, 1.530729103088379, 1.5106648325920105, 1.7050195932388306, 1.4638991713523866, 1.4923243045806884, 1.4569032311439514, 1.5163418173789978, 1.5914841771125794],
“train_accs”: [37.3225, 46.01, 49.93, 53.055, 55.1475, 57.57, 59.64, 61.3175, 63.5725, 65.0825],
“val_accs”: [40.28, 44.13, 46.26, 47.4, 42.84, 49.12, 48.65, 50.43, 49.2, 49.72],
“train_acc”: 65.0825, “val_acc”: 49.72, “test_acc”: 50.12,
“exp_name”: “exp1_n_layer_hid_dim”, “in_dim”: 3072, “out_dim”: 10, “hid_dim”: 300, “act”: “relu”, “dropout”: 0.2, “use_bn”: true, “l2”: 1e-05, “use_xavier”: true, “optim”: “RMSprop”, “lr”: 0.0015, “train_batch_size”: 256, “n_layer”: 3}
이렇게, 실험 결과를 하드디스크에 저장할 수 있게 되었다.
visualization
이러한 방식으로 여러 실험을 진행한 후, 우리가 변경한 변수들에 대한 acc, loss의 변화 등을 시각화해보자.
import seaborn as sns
import matplotlib.pyplot as plt
df = load_exp_result('exp1') # 위에서 만들었던 함수를 사용해서 dataframe으로 저장해준다
fig, ax = plt.subplots(1, 3)
fig.set_size_inches(15, 6)
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
sns.barplot(x='n_layer', y='train_acc', hue='hid_dim', data=df, ax=ax[0])
sns.barplot(x='n_layer', y='val_acc', hue='hid_dim', data=df, ax=ax[1])
sns.barplot(x='n_layer', y='test_acc', hue='hid_dim', data=df, ax=ax[2])
var1 = 'n_layer'
var2 = 'hid_dim'
df = load_exp_result('exp1')
list_v1 = df[var1].unique()
list_v2 = df[var2].unique()
list_data = []
for value1 in list_v1:
for value2 in list_v2:
row = df.loc[df[var1]==value1]
row = row.loc[df[var2]==value2]
train_losses = list(row.train_losses)[0]
val_losses = list(row.val_losses)[0]
for epoch, train_loss in enumerate(train_losses):
list_data.append({'type':'train', 'loss':train_loss, 'epoch':epoch, var1:value1, var2:value2})
for epoch, val_loss in enumerate(val_losses):
list_data.append({'type':'val', 'loss':val_loss, 'epoch':epoch, var1:value1, var2:value2})
df = pd.DataFrame(list_data)
g = sns.FacetGrid(df, row=var2, col=var1, hue='type', margin_titles=True, sharey=False)
g = g.map(plt.plot, 'epoch', 'loss', marker='.')
g.add_legend()
g.fig.suptitle('Train loss vs Val loss')
plt.subplots_adjust(top=0.89)
var1 = 'n_layer'
var2 = 'hid_dim'
df = load_exp_result('exp1')
list_v1 = df[var1].unique()
list_v2 = df[var2].unique()
list_data = []
for value1 in list_v1:
for value2 in list_v2:
row = df.loc[df[var1]==value1]
row = row.loc[df[var2]==value2]
train_accs = list(row.train_accs)[0]
val_accs = list(row.val_accs)[0]
test_acc = list(row.test_acc)[0]
for epoch, train_acc in enumerate(train_accs):
list_data.append({'type':'train', 'Acc':train_acc, 'test_acc':test_acc, 'epoch':epoch, var1:value1, var2:value2})
for epoch, val_acc in enumerate(val_accs):
list_data.append({'type':'val', 'Acc':val_acc, 'test_acc':test_acc, 'epoch':epoch, var1:value1, var2:value2})
df = pd.DataFrame(list_data)
g = sns.FacetGrid(df, row=var2, col=var1, hue='type', margin_titles=True, sharey=False)
g = g.map(plt.plot, 'epoch', 'Acc', marker='.')
def show_acc(x, y, metric, **kwargs):
plt.scatter(x, y, alpha=0.3, s=1)
metric = "Test Acc: {:1.3f}".format(list(metric.values)[0])
plt.text(0.05, 0.95, metric, horizontalalignment='left', verticalalignment='center', transform=plt.gca().transAxes, bbox=dict(facecolor='yellow', alpha=0.5, boxstyle="round,pad=0.1"))
g = g.map(show_acc, 'epoch', 'Acc', 'test_acc')
g.add_legend()
g.fig.suptitle('Train Accuracy vs Val Accuracy')
plt.subplots_adjust(top=0.89)