# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv
train = pd.read_csv("/kaggle/input/titanic/train.csv")
train
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
test = pd.read_csv("/kaggle/input/titanic/test.csv")
test
|
PassengerId |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
892 |
3 |
Kelly, Mr. James |
male |
34.5 |
0 |
0 |
330911 |
7.8292 |
NaN |
Q |
1 |
893 |
3 |
Wilkes, Mrs. James (Ellen Needs) |
female |
47.0 |
1 |
0 |
363272 |
7.0000 |
NaN |
S |
2 |
894 |
2 |
Myles, Mr. Thomas Francis |
male |
62.0 |
0 |
0 |
240276 |
9.6875 |
NaN |
Q |
3 |
895 |
3 |
Wirz, Mr. Albert |
male |
27.0 |
0 |
0 |
315154 |
8.6625 |
NaN |
S |
4 |
896 |
3 |
Hirvonen, Mrs. Alexander (Helga E Lindqvist) |
female |
22.0 |
1 |
1 |
3101298 |
12.2875 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
413 |
1305 |
3 |
Spector, Mr. Woolf |
male |
NaN |
0 |
0 |
A.5. 3236 |
8.0500 |
NaN |
S |
414 |
1306 |
1 |
Oliva y Ocana, Dona. Fermina |
female |
39.0 |
0 |
0 |
PC 17758 |
108.9000 |
C105 |
C |
415 |
1307 |
3 |
Saether, Mr. Simon Sivertsen |
male |
38.5 |
0 |
0 |
SOTON/O.Q. 3101262 |
7.2500 |
NaN |
S |
416 |
1308 |
3 |
Ware, Mr. Frederick |
male |
NaN |
0 |
0 |
359309 |
8.0500 |
NaN |
S |
417 |
1309 |
3 |
Peter, Master. Michael J |
male |
NaN |
1 |
1 |
2668 |
22.3583 |
NaN |
C |
418 rows × 11 columns
gender = pd.read_csv("/kaggle/input/titanic/gender_submission.csv")
gender
|
PassengerId |
Survived |
0 |
892 |
0 |
1 |
893 |
1 |
2 |
894 |
0 |
3 |
895 |
0 |
4 |
896 |
1 |
... |
... |
... |
413 |
1305 |
0 |
414 |
1306 |
1 |
415 |
1307 |
0 |
416 |
1308 |
0 |
417 |
1309 |
0 |
418 rows × 2 columns
Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
dtype='object')
df_train = pd.DataFrame(train)
df_train
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
2 |
3 |
1 |
3 |
Heikkinen, Miss. Laina |
female |
26.0 |
0 |
0 |
STON/O2. 3101282 |
7.9250 |
NaN |
S |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
888 |
889 |
0 |
3 |
Johnston, Miss. Catherine Helen "Carrie" |
female |
NaN |
1 |
2 |
W./C. 6607 |
23.4500 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
891 rows × 12 columns
import matplotlib.pyplot as plt
# plt.figure(12.5)
plt.plot(train['Sex'], train['Survived'])
plt.xlabel('sex')
plt.ylabel('survived')
plt.show()
female = train.loc[train.Sex =='female']
male = train.loc[train.Sex =='male']
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
0 |
1 |
0 |
3 |
Braund, Mr. Owen Harris |
male |
22.0 |
1 |
0 |
A/5 21171 |
7.2500 |
NaN |
S |
4 |
5 |
0 |
3 |
Allen, Mr. William Henry |
male |
35.0 |
0 |
0 |
373450 |
8.0500 |
NaN |
S |
5 |
6 |
0 |
3 |
Moran, Mr. James |
male |
NaN |
0 |
0 |
330877 |
8.4583 |
NaN |
Q |
6 |
7 |
0 |
1 |
McCarthy, Mr. Timothy J |
male |
54.0 |
0 |
0 |
17463 |
51.8625 |
E46 |
S |
7 |
8 |
0 |
3 |
Palsson, Master. Gosta Leonard |
male |
2.0 |
3 |
1 |
349909 |
21.0750 |
NaN |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
883 |
884 |
0 |
2 |
Banfield, Mr. Frederick James |
male |
28.0 |
0 |
0 |
C.A./SOTON 34068 |
10.5000 |
NaN |
S |
884 |
885 |
0 |
3 |
Sutehall, Mr. Henry Jr |
male |
25.0 |
0 |
0 |
SOTON/OQ 392076 |
7.0500 |
NaN |
S |
886 |
887 |
0 |
2 |
Montvila, Rev. Juozas |
male |
27.0 |
0 |
0 |
211536 |
13.0000 |
NaN |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
890 |
891 |
0 |
3 |
Dooley, Mr. Patrick |
male |
32.0 |
0 |
0 |
370376 |
7.7500 |
NaN |
Q |
577 rows × 12 columns
plt.figure(figsize=(10, 6))
plt.subplot(121)
plt.title("female")
plt.hist(female.Age, bins=10)
plt.subplot(122)
plt.title("male")
plt.hist(male.Age, bins=10, color='orange')
plt.show()
len(train[train["Survived"] == 0]), len(train[train["Survived"] == 1])
(549, 342)
train["Survived"].value_counts()
0 549
1 342
Name: Survived, dtype: int64
train[train["Pclass"] == 1]
|
PassengerId |
Survived |
Pclass |
Name |
Sex |
Age |
SibSp |
Parch |
Ticket |
Fare |
Cabin |
Embarked |
1 |
2 |
1 |
1 |
Cumings, Mrs. John Bradley (Florence Briggs Th... |
female |
38.0 |
1 |
0 |
PC 17599 |
71.2833 |
C85 |
C |
3 |
4 |
1 |
1 |
Futrelle, Mrs. Jacques Heath (Lily May Peel) |
female |
35.0 |
1 |
0 |
113803 |
53.1000 |
C123 |
S |
6 |
7 |
0 |
1 |
McCarthy, Mr. Timothy J |
male |
54.0 |
0 |
0 |
17463 |
51.8625 |
E46 |
S |
11 |
12 |
1 |
1 |
Bonnell, Miss. Elizabeth |
female |
58.0 |
0 |
0 |
113783 |
26.5500 |
C103 |
S |
23 |
24 |
1 |
1 |
Sloper, Mr. William Thompson |
male |
28.0 |
0 |
0 |
113788 |
35.5000 |
A6 |
S |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
871 |
872 |
1 |
1 |
Beckwith, Mrs. Richard Leonard (Sallie Monypeny) |
female |
47.0 |
1 |
1 |
11751 |
52.5542 |
D35 |
S |
872 |
873 |
0 |
1 |
Carlsson, Mr. Frans Olof |
male |
33.0 |
0 |
0 |
695 |
5.0000 |
B51 B53 B55 |
S |
879 |
880 |
1 |
1 |
Potter, Mrs. Thomas Jr (Lily Alexenia Wilson) |
female |
56.0 |
0 |
1 |
11767 |
83.1583 |
C50 |
C |
887 |
888 |
1 |
1 |
Graham, Miss. Margaret Edith |
female |
19.0 |
0 |
0 |
112053 |
30.0000 |
B42 |
S |
889 |
890 |
1 |
1 |
Behr, Mr. Karl Howell |
male |
26.0 |
0 |
0 |
111369 |
30.0000 |
C148 |
C |
216 rows × 12 columns
train["Pclass"].value_counts()
3 491
1 216
2 184
Name: Pclass, dtype: int64
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
dtype: int64
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 PassengerId 891 non-null int64
1 Survived 891 non-null int64
2 Pclass 891 non-null int64
3 Name 891 non-null object
4 Sex 891 non-null object
5 Age 714 non-null float64
6 SibSp 891 non-null int64
7 Parch 891 non-null int64
8 Ticket 891 non-null object
9 Fare 891 non-null float64
10 Cabin 204 non-null object
11 Embarked 889 non-null object
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
for col in train:
print(col)
PassengerId
Survived
Pclass
Name
Sex
Age
SibSp
Parch
Ticket
Fare
Cabin
Embarked
input_features = [col for col in train]
input_features
['PassengerId',
'Survived',
'Pclass',
'Name',
'Sex',
'Age',
'SibSp',
'Parch',
'Ticket',
'Fare',
'Cabin',
'Embarked']
output_feature = ["Survived"]
useless_features = ['PassengerId', 'Name', 'Survived', 'Ticket', 'Cabin']
for f in useless_features:
input_features.remove(f)
X, y = train[input_features], train[output_feature]
|
Pclass |
Sex |
Age |
SibSp |
Parch |
Fare |
Embarked |
0 |
3 |
male |
22.0 |
1 |
0 |
7.2500 |
S |
1 |
1 |
female |
38.0 |
1 |
0 |
71.2833 |
C |
2 |
3 |
female |
26.0 |
0 |
0 |
7.9250 |
S |
3 |
1 |
female |
35.0 |
1 |
0 |
53.1000 |
S |
4 |
3 |
male |
35.0 |
0 |
0 |
8.0500 |
S |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
2 |
male |
27.0 |
0 |
0 |
13.0000 |
S |
887 |
1 |
female |
19.0 |
0 |
0 |
30.0000 |
S |
888 |
3 |
female |
NaN |
1 |
2 |
23.4500 |
S |
889 |
1 |
male |
26.0 |
0 |
0 |
30.0000 |
C |
890 |
3 |
male |
32.0 |
0 |
0 |
7.7500 |
Q |
891 rows × 7 columns
# X['Sex'] = X['Sex'].astype("category").cat.codes
X["Sex"] = X["Sex"].astype("category").cat.codes
X["Embarked"] = X["Embarked"].astype("category").cat.codes
X
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
This is separate from the ipykernel package so we can avoid doing imports until
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
after removing the cwd from sys.path.
|
Pclass |
Sex |
Age |
SibSp |
Parch |
Fare |
Embarked |
0 |
3 |
1 |
22.0 |
1 |
0 |
7.2500 |
2 |
1 |
1 |
0 |
38.0 |
1 |
0 |
71.2833 |
0 |
2 |
3 |
0 |
26.0 |
0 |
0 |
7.9250 |
2 |
3 |
1 |
0 |
35.0 |
1 |
0 |
53.1000 |
2 |
4 |
3 |
1 |
35.0 |
0 |
0 |
8.0500 |
2 |
... |
... |
... |
... |
... |
... |
... |
... |
886 |
2 |
1 |
27.0 |
0 |
0 |
13.0000 |
2 |
887 |
1 |
0 |
19.0 |
0 |
0 |
30.0000 |
2 |
888 |
3 |
0 |
NaN |
1 |
2 |
23.4500 |
2 |
889 |
1 |
1 |
26.0 |
0 |
0 |
30.0000 |
0 |
890 |
3 |
1 |
32.0 |
0 |
0 |
7.7500 |
1 |
891 rows × 7 columns
array([ 2, 0, 1, -1], dtype=int8)
|
Pclass |
Sex |
Age |
SibSp |
Parch |
Fare |
Embarked |
61 |
1 |
0 |
38.0 |
0 |
0 |
80.0 |
-1 |
829 |
1 |
0 |
62.0 |
0 |
0 |
80.0 |
-1 |
Pclass 0
Sex 0
Age 177
SibSp 0
Parch 0
Fare 0
Embarked 0
dtype: int64
Pclass 0
Sex 0
Age 0
SibSp 0
Parch 0
Fare 0
Embarked 0
dtype: int64
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
((712, 7), (712, 1), (179, 7), (179, 1))
((891, 7), (891, 1))
4. Decision Tree Classifier
4.1 Decision Tree Classifier 사용하여 모델 트레이닝 시키기
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=0, max_depth=3)
model.fit(X_train, y_train)
DecisionTreeClassifier(max_depth=3, random_state=0)
4.2 sklearn을 활용하여 accuracy출력, confusion matrix 그리기, classification report 생성해보기, feature importance 뽑아보기
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
def summarize(model, X, y):
y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
summarize(model,X_test, y_test)
0.8156424581005587
precision recall f1-score support
0 0.80 0.92 0.85 105
1 0.86 0.66 0.75 74
accuracy 0.82 179
macro avg 0.83 0.79 0.80 179
weighted avg 0.82 0.82 0.81 179
[[97 8]
[25 49]]
plt.bar(x = X.columns, height = model.feature_importances_)
<BarContainer object of 7 artists>
4.3 sklearn.tree에 내장된 plot_tree를 이용하여, 생성한 Decision Tree 모델 plot하기
import matplotlib.pyplot as plt
from sklearn import tree
plt.figure( figsize=(20,15) )
tree.plot_tree(model,
feature_names = X.columns,
impurity=True, filled=True,
rounded=True, max_depth=24)
plt.show()
summarize(model,X_test, y_test)
0.8156424581005587
precision recall f1-score support
0 0.80 0.92 0.85 105
1 0.86 0.66 0.75 74
accuracy 0.82 179
macro avg 0.83 0.79 0.80 179
weighted avg 0.82 0.82 0.81 179
[[97 8]
[25 49]]
y_train.shape, y_test.shape
((712, 1), (179, 1))
X_train.shape, X_test.shape
((712, 7), (179, 7))
5. Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV
max_leaf_nodes = range(20, 30)
criterion = ["gini", "entropy"]
params = {"max_leaf_nodes": max_leaf_nodes, "criterion": criterion}
tree_grid = GridSearchCV(DecisionTreeClassifier(), params, cv=5, n_jobs=-1, verbose=1, scoring="accuracy")
tree_grid.fit(X_train, y_train)
Fitting 5 folds for each of 20 candidates, totalling 100 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 56 tasks | elapsed: 2.9s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 3.0s finished
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
param_grid={'criterion': ['gini', 'entropy'],
'max_leaf_nodes': range(20, 30)},
scoring='accuracy', verbose=1)
tree_grid.best_estimator_
DecisionTreeClassifier(criterion='entropy', max_leaf_nodes=28)
{'criterion': 'entropy', 'max_leaf_nodes': 28}
df_test = pd.read_csv("/kaggle/input/titanic/test.csv")
submit_x = df_test[input_features]
submit_x["Sex"] = submit_x["Sex"].astype("category").cat.codes
submit_x["Embarked"] = submit_x["Embarked"].astype("category").cat.codes
submit_x = submit_x.fillna(-1)
sumbit_y = model.predict(submit_x)
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
"""Entry point for launching an IPython kernel.
/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
array([0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0,
1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1,
0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,
1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0])
|
Pclass |
Sex |
Age |
SibSp |
Parch |
Fare |
Embarked |
0 |
3 |
1 |
34.5 |
0 |
0 |
7.8292 |
1 |
1 |
3 |
0 |
47.0 |
1 |
0 |
7.0000 |
2 |
2 |
2 |
1 |
62.0 |
0 |
0 |
9.6875 |
1 |
3 |
3 |
1 |
27.0 |
0 |
0 |
8.6625 |
2 |
4 |
3 |
0 |
22.0 |
1 |
1 |
12.2875 |
2 |
... |
... |
... |
... |
... |
... |
... |
... |
413 |
3 |
1 |
-1.0 |
0 |
0 |
8.0500 |
2 |
414 |
1 |
0 |
39.0 |
0 |
0 |
108.9000 |
0 |
415 |
3 |
1 |
38.5 |
0 |
0 |
7.2500 |
2 |
416 |
3 |
1 |
-1.0 |
0 |
0 |
8.0500 |
2 |
417 |
3 |
1 |
-1.0 |
1 |
1 |
22.3583 |
0 |
418 rows × 7 columns
df_submit = pd.read_csv("/kaggle/input/titanic/gender_submission.csv")
df_submit["Survived"] = sumbit_y
|
PassengerId |
Survived |
0 |
892 |
0 |
1 |
893 |
1 |
2 |
894 |
0 |
3 |
895 |
0 |
4 |
896 |
1 |
... |
... |
... |
413 |
1305 |
0 |
414 |
1306 |
1 |
415 |
1307 |
0 |
416 |
1308 |
0 |
417 |
1309 |
0 |
418 rows × 2 columns
df_submit.to_csv("submit2.csv", index=False)
댓글남기기