import torch
 import torchvision
 from torch.utils.data import Dataset, DataLoader
 import numpy as np
 import math
 import pandas as pd
 import io
# Github에 있는 wine.csv을 읽어오기 위한 작업
import requests

url = "https://raw.githubusercontent.com/python-engineer/pytorchTutorial/master/data/wine/wine.csv"
download = requests.get(url).content

df = pd.read_csv(io.StringIO(download.decode('utf-8')))
# pd.set_option('max_columns', 100)
# pd.set_option('max_rows', 100)

# pd.options.display.float_format = lambda x: f'{x:.8f}'
df
Wine Alcohol Malic.acid Ash Acl Mg Phenols Flavanoids Nonflavanoid.phenols Proanth Color.int Hue OD Proline
0 1 14.23 1.71 2.43 15.6 127 2.80 3.06 0.28 2.29 5.64 1.04 3.92 1065
1 1 13.20 1.78 2.14 11.2 100 2.65 2.76 0.26 1.28 4.38 1.05 3.40 1050
2 1 13.16 2.36 2.67 18.6 101 2.80 3.24 0.30 2.81 5.68 1.03 3.17 1185
3 1 14.37 1.95 2.50 16.8 113 3.85 3.49 0.24 2.18 7.80 0.86 3.45 1480
4 1 13.24 2.59 2.87 21.0 118 2.80 2.69 0.39 1.82 4.32 1.04 2.93 735
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
173 3 13.71 5.65 2.45 20.5 95 1.68 0.61 0.52 1.06 7.70 0.64 1.74 740
174 3 13.40 3.91 2.48 23.0 102 1.80 0.75 0.43 1.41 7.30 0.70 1.56 750
175 3 13.27 4.28 2.26 20.0 120 1.59 0.69 0.43 1.35 10.20 0.59 1.56 835
176 3 13.17 2.59 2.37 20.0 120 1.65 0.68 0.53 1.46 9.30 0.60 1.62 840
177 3 14.13 4.10 2.74 24.5 96 2.05 0.76 0.56 1.35 9.20 0.61 1.60 560

178 rows × 14 columns

# pandas dataframe 저장
# df.to_csv("wine.csv")

# 불러온 데이터들을 그냥 써도 된다. 파일로도 하는법을 보여주는것
 class WineDataset(Dataset):
  def __init__(self):
    # data loading
    xy = np.loadtxt('/content/sample_data/wine.csv', delimiter=",", dtype=np.float32, skiprows=1)
    self.x = torch.from_numpy(xy[:, 1:])
    self.y = torch.from_numpy(xy[:, [0]]) # n_samples, 1
    self.n_samples = xy.shape[0]
  def __getitem__(self, index):
    return self.x[index], self.y[index]
  def __len__(self):
    return self.n_smaples
dataset = WineDataset()
dataloader = DataLoader(dataset=dataset, batch_size=4)
# dataloader = DataLoader(dataset=dataset1, batch_size=4, shuffle=True, num_workers=2)
dataiter = iter(dataloader)
data = dataiter.next()
features, labels = data
print(features, labels)

댓글남기기