[study] tutorial Dataset, DataLoader
import torch
import numpy as np
data = [[1,2], [3,4]]
t_data = torch.tensor(data)
tensor([[1, 2], [3, 4]])
torch.Size([2, 2])
np_array = np.array(data)
t_np = torch.from_numpy(np_array)
tensor([[1, 2], [3, 4]])
torch.Size([2, 2])
torch.Size([2, 2])
x_ones = torch.ones_like(t_data)
tensor([[1, 1], [1, 1]])
x_rand = torch.rand_like(t_data, dtype=torch.float)
tensor([[0.4474, 0.7949], [0.4205, 0.8668]])
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)
tensor([[0.8367, 0.4293, 0.0447], [0.2943, 0.1218, 0.8405]]) tensor([[1., 1., 1.], [1., 1., 1.]]) tensor([[0., 0., 0.], [0., 0., 0.]])
tensor = torch.rand(3,4)
torch.Size([3, 4]) torch.float32 cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
tensor = torch.ones(4,4)
print("First row", tensor[0])
print("First column", tensor[:,0])
print("Last Column", tensor[..., -1])
tensor[:,1] = 0
tensor([[1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.], [1., 1., 1., 1.]]) First row tensor([1., 1., 1., 1.]) First column tensor([1., 1., 1., 1.]) Last Column tensor([1., 1., 1., 1.]) tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]])
t1 = torch.cat([tensor, tensor, tensor], dim=1)
tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.], [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])
t2 = torch.cat([tensor,tensor,tensor], dim=0)
tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]])
torch.Size([12, 4])
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)
y3 = torch.rand_like(tensor)
torch.matmul(tensor, tensor.T, out=y3)
tensor([[0.3516, 0.7082, 0.2357, 0.0189], [0.5164, 0.8764, 0.1845, 0.9110], [0.4364, 0.2171, 0.0746, 0.9275], [0.3004, 0.5628, 0.6385, 0.0230]]) tensor([[3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.]]) tensor([[3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.]]) tensor([[3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.], [3., 3., 3., 3.]])
z1 = tensor * tensor
z2 = tensor.mul(tensor)
z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)
tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]]) tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]]) tensor([[1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.], [1., 0., 1., 1.]])
agg = tensor.sum()
agg = agg.type(torch.int32)
tensor = tensor.sub(1)
tensor([[ 0., -1., 0., 0.], [ 0., -1., 0., 0.], [ 0., -1., 0., 0.], [ 0., -1., 0., 0.]])
tensor = tensor.add(2)
tensor([[2., 1., 2., 2.], [2., 1., 2., 2.], [2., 1., 2., 2.], [2., 1., 2., 2.]])
t = torch.ones(5)
n = t.numpy()
at = torch.from_numpy(n)
tensor([1., 1., 1., 1., 1.]) <class 'torch.Tensor'> [1. 1. 1. 1. 1.] <class 'numpy.ndarray'> tensor([1., 1., 1., 1., 1.]) <class 'torch.Tensor'>
tensor([2., 2., 2., 2., 2.]) [2. 2. 2. 2. 2.] tensor([2., 2., 2., 2., 2.])
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
train_data = datasets.FashionMNIST(
test_data = datasets.FashionMNIST(
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz
Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz
Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz
Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw
labels_map = {
0: "T-Shirt",
1: "Trouser",
2: "Pullover",
3: "Dress",
4: "Coat",
5: "Sandal",
6: "Shirt",
7: "Sneaker",
8: "Bag",
9: "Ankle Boot",
figure = plt.figure(figsize=(9,9))
cols, rows = 4,4
for i in range(1, cols*rows + 1):
sample_idx = torch.randint(len(train_data), size=(1,)).item()
img, label = train_data[sample_idx]
figure.add_subplot(rows,cols, i)
print(img.shape, labels_map[label])
plt.imshow(img.squeeze(), cmap="gray")
torch.Size([1, 28, 28]) Sneaker torch.Size([1, 28, 28]) T-Shirt torch.Size([1, 28, 28]) Shirt torch.Size([1, 28, 28]) Bag torch.Size([1, 28, 28]) Pullover torch.Size([1, 28, 28]) Bag torch.Size([1, 28, 28]) Trouser torch.Size([1, 28, 28]) Trouser torch.Size([1, 28, 28]) T-Shirt torch.Size([1, 28, 28]) Shirt torch.Size([1, 28, 28]) Ankle Boot torch.Size([1, 28, 28]) Coat torch.Size([1, 28, 28]) Pullover torch.Size([1, 28, 28]) Sandal torch.Size([1, 28, 28]) Shirt torch.Size([1, 28, 28]) Shirt
sample_idx = torch.randint(len(train_data), size=(1,)).item()
import os
import pandas as pd
from torchvision.io import read_image
class myDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file, names=["file_name", "label"])
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_trasnform:
label = self.target_trasnform(label)
return image, label
from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)
일반적으로 샘플들을 “미니배치(minibatch)”로 전달하고, 매 에폭(epoch)마다 데이터를 다시 섞어서 과적합(overfit)을 막고, Python의 multiprocessing 을 사용하여 데이터 검색 속도를 높이려고 합니다.
train_features, train_labels = next(iter(train_dataloader))
print(f"Feature batch shape: {train_features.size()}")
print(f"Labels batch shape: {train_labels.size()}")
img = train_features[0].squeeze()
label = train_labels[0]
plt.imshow(img, cmap="gray")
print(f"Label: {label}")
Feature batch shape: torch.Size([64, 1, 28, 28]) Labels batch shape: torch.Size([64])
Label: 9