Simple CNN Model with Keras
Author: https://www.kaggle.com/frules11
From: https://www.kaggle.com/frules11/cnn-model-with-keras
License: Apache 2.0
Score: 1.0000
In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
import os
print(os.listdir("../input"))
# Any results you write to the current directory are saved as output.
['test', 'train', 'train.csv', 'sample_submission.csv']
In [2]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tqdm import tqdm
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
Using TensorFlow backend.
In [3]:
class DataLoader:
def __init__(self, npy_file: str = "npy_data"):
self.npy_file = npy_file
self.csv_name = "../input/train.csv"
self.df = self.read_csv()
self.n_classes = 2
os.makedirs(self.npy_file, exist_ok=True)
def read_csv(self):
df = pd.read_csv(self.csv_name)
return df
def read_data(self, load_from_npy: bool = True, size2resize: tuple = (75, 75), make_gray: bool = True,
save: bool = True, categorical: bool = False, n_classes: int = 2):
x_data = []
y_data = []
if load_from_npy:
try:
x_data = np.load(fr"{self.npy_file}/x_data.npy")
y_data = np.load(fr"{self.npy_file}/y_data.npy")
except FileNotFoundError:
load_from_npy = False
print("NPY files not found!")
pass
if not load_from_npy:
x_data = []
y_data = []
for dir_label in tqdm(self.df.values):
img = cv2.imread(os.path.join("../input", "train/train", dir_label[0]))
if make_gray:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, size2resize)
x_data.append(img)
y_data.append(int(dir_label[1]))
del img
x_data = np.array(x_data)
y_data = np.array(y_data)
if save:
np.save(fr"{self.npy_file}/x_data.npy", x_data)
np.save(fr"{self.npy_file}/y_data.npy", y_data)
if categorical:
y_data = tf.keras.utils.to_categorical(y_data, num_classes=n_classes)
if not categorical:
y_data = y_data.reshape(-1, 1)
if load_from_npy and make_gray:
try:
x_data_2 = [cv2.cvtColor(n, cv2.COLOR_BGR2GRAY) for n in x_data]
x_data = x_data_2
except cv2.error:
pass
if make_gray:
x_data = np.expand_dims(x_data, axis=-1)
return x_data, y_data
def read_test_data(self, load_from_npy: bool = True, size2resize: tuple = (75, 75), make_gray: bool = True,
save: bool = True, categorical: bool = False, n_classes: int = 2):
test_df = pd.read_csv("../input/sample_submission.csv")
x_data = []
y_data = []
if load_from_npy:
try:
x_data = np.load(fr"{self.npy_file}/x_data_test.npy")
y_data = np.load(fr"{self.npy_file}/y_data_test.npy")
except FileNotFoundError:
load_from_npy = False
print("NPY files not found!")
pass
if not load_from_npy:
x_data = []
y_data = []
for dir_label in tqdm(test_df.values):
img = cv2.imread(os.path.join("../input", "test/test", dir_label[0]))
if make_gray:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, size2resize)
x_data.append(img)
y_data.append(int(dir_label[1]))
del img
x_data = np.array(x_data)
y_data = np.array(y_data)
if save:
np.save(fr"{self.npy_file}/x_data_test.npy", x_data)
np.save(fr"{self.npy_file}/y_data_test.npy", y_data)
if categorical:
y_data = tf.keras.utils.to_categorical(y_data, num_classes=n_classes)
if not categorical:
y_data = y_data.reshape(-1, 1)
if load_from_npy and make_gray:
try:
x_data_2 = [cv2.cvtColor(n, cv2.COLOR_BGR2GRAY) for n in x_data]
x_data = x_data_2
except cv2.error:
pass
if make_gray:
x_data = np.expand_dims(x_data, axis=-1)
return x_data, y_data
In [4]:
class TrainWithKeras:
def __init__(self, x_data, y_data, lr: float = 0.001, epochs: int = 10, batch_size: int = 32,
loss: str = "categorical_crossentropy", model_path: str = "model.h5"):
self.x_data = x_data
self.y_data = y_data
self.model_path = model_path
self.epochs = epochs
self.batch_size = batch_size
self.optimizer = Adam(lr=lr)
self.loss = loss
def make_model(self, summarize: bool = True):
model = Sequential()
model.add(Conv2D(64, (3, 3), strides=1, activation="relu",
input_shape=(self.x_data.shape[1], self.x_data.shape[2], self.x_data.shape[3])))
model.add(MaxPooling2D())
model.add(Conv2D(128, (3, 3), strides=1, activation="relu"))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), strides=1, activation="relu"))
model.add(MaxPooling2D())
model.add(Conv2D(512, (3, 3), strides=1, activation="relu"))
model.add(Dropout(0.3))
model.add(Conv2D(1024, (3, 3), strides=1, activation="relu"))
model.add(Flatten())
model.add(Dense(1024, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(2, activation="softmax"))
if summarize:
model.summary()
return model
def compile(self, kmodel: Sequential):
kmodel.compile(loss=self.loss, optimizer=self.optimizer, metrics=["acc"])
return kmodel
def train(self, kmodel: Sequential, save: bool = True):
history = kmodel.fit(self.x_data, self.y_data, batch_size=self.batch_size, epochs=self.epochs,
validation_split=0.0)
if save:
kmodel.save(self.model_path)
return history, kmodel
In [5]:
class MakeSubmission:
def __init__(self, x_test: np.array, model_path: str, csv_path: str):
self.x_test = x_test
self.model_path = model_path
self.csv_path = csv_path
self.model = tf.keras.models.load_model(self.model_path)
self.df = pd.read_csv(self.csv_path)
preds = self.make_predictions()
submission = pd.DataFrame({'id': self.df['id'], 'has_cactus': preds})
submission.to_csv("sample_submission.csv", index=False)
def make_predictions(self, make_it_ready: bool = True):
preds = self.model.predict(self.x_test)
if make_it_ready:
preds = [np.argmax(n) for n in preds]
return preds
In [6]:
os.makedirs("models", exist_ok=True)
dl = DataLoader()
X_data, Y_data = dl.read_data(True, (32, 32), False, True, True, 2)
0%| | 71/17500 [00:00<00:24, 703.67it/s]
NPY files not found!
100%|██████████| 17500/17500 [00:25<00:00, 692.41it/s]
In [7]:
trainer = TrainWithKeras(X_data, Y_data, model_path="models/model.h5", epochs=50, batch_size=1024, lr=0.0002)
model = trainer.make_model()
model = trainer.compile(model)
histroy = trainer.train(model)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_1 (Conv2D) (None, 30, 30, 64) 1792
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 15, 15, 64) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 13, 13, 128) 73856
_________________________________________________________________
dropout_1 (Dropout) (None, 13, 13, 128) 0
_________________________________________________________________
batch_normalization_1 (Batch (None, 13, 13, 128) 512
_________________________________________________________________
conv2d_3 (Conv2D) (None, 11, 11, 256) 295168
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 5, 5, 256) 0
_________________________________________________________________
conv2d_4 (Conv2D) (None, 3, 3, 512) 1180160
_________________________________________________________________
dropout_2 (Dropout) (None, 3, 3, 512) 0
_________________________________________________________________
conv2d_5 (Conv2D) (None, 1, 1, 1024) 4719616
_________________________________________________________________
flatten_1 (Flatten) (None, 1024) 0
_________________________________________________________________
dense_1 (Dense) (None, 1024) 1049600
_________________________________________________________________
dropout_3 (Dropout) (None, 1024) 0
_________________________________________________________________
dense_2 (Dense) (None, 2) 2050
=================================================================
Total params: 7,322,754
Trainable params: 7,322,498
Non-trainable params: 256
_________________________________________________________________
Epoch 1/50
17500/17500 [==============================] - 5s 291us/step - loss: 0.4455 - acc: 0.7814
Epoch 2/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.1582 - acc: 0.9434
Epoch 3/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0925 - acc: 0.9665
Epoch 4/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0686 - acc: 0.9747
Epoch 5/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0623 - acc: 0.9774
Epoch 6/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0475 - acc: 0.9833
Epoch 7/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0484 - acc: 0.9819
Epoch 8/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0393 - acc: 0.9862
Epoch 9/50
17500/17500 [==============================] - 2s 89us/step - loss: 0.0394 - acc: 0.9858
Epoch 10/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0325 - acc: 0.9884
Epoch 11/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0468 - acc: 0.9848
Epoch 12/50
17500/17500 [==============================] - 1s 85us/step - loss: 0.0314 - acc: 0.9895
Epoch 13/50
17500/17500 [==============================] - 1s 86us/step - loss: 0.0414 - acc: 0.9859
Epoch 14/50
17500/17500 [==============================] - 1s 85us/step - loss: 0.0295 - acc: 0.9893
Epoch 15/50
17500/17500 [==============================] - 1s 86us/step - loss: 0.0203 - acc: 0.9929
Epoch 16/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0251 - acc: 0.9920
Epoch 17/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0193 - acc: 0.9935
Epoch 18/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0247 - acc: 0.9914
Epoch 19/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0179 - acc: 0.9938
Epoch 20/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0149 - acc: 0.9946
Epoch 21/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0119 - acc: 0.9957
Epoch 22/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0110 - acc: 0.9963
Epoch 23/50
17500/17500 [==============================] - 1s 85us/step - loss: 0.0120 - acc: 0.9958
Epoch 24/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0228 - acc: 0.9921
Epoch 25/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0134 - acc: 0.9960
Epoch 26/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0104 - acc: 0.9963
Epoch 27/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0136 - acc: 0.9954
Epoch 28/50
17500/17500 [==============================] - 1s 85us/step - loss: 0.0086 - acc: 0.9970
Epoch 29/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0096 - acc: 0.9967
Epoch 30/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0080 - acc: 0.9971
Epoch 31/50
17500/17500 [==============================] - 1s 86us/step - loss: 0.0204 - acc: 0.9934
Epoch 32/50
17500/17500 [==============================] - 1s 86us/step - loss: 0.0141 - acc: 0.9948
Epoch 33/50
17500/17500 [==============================] - 2s 86us/step - loss: 0.0067 - acc: 0.9974
Epoch 34/50
17500/17500 [==============================] - 1s 85us/step - loss: 0.0070 - acc: 0.9975
Epoch 35/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0129 - acc: 0.9950
Epoch 36/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0072 - acc: 0.9976
Epoch 37/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0204 - acc: 0.9926
Epoch 38/50
17500/17500 [==============================] - 2s 88us/step - loss: 0.0097 - acc: 0.9964
Epoch 39/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0054 - acc: 0.9983
Epoch 40/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0055 - acc: 0.9979
Epoch 41/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0059 - acc: 0.9978
Epoch 42/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0095 - acc: 0.9965
Epoch 43/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0057 - acc: 0.9979
Epoch 44/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0058 - acc: 0.9983
Epoch 45/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0055 - acc: 0.9982
Epoch 46/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0036 - acc: 0.9986
Epoch 47/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0049 - acc: 0.9983
Epoch 48/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0055 - acc: 0.9981
Epoch 49/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0044 - acc: 0.9984
Epoch 50/50
17500/17500 [==============================] - 2s 87us/step - loss: 0.0031 - acc: 0.9989
In [8]:
X_data_test, Y_data_test = dl.read_test_data(True, (32, 32), False, True, False)
ms = MakeSubmission(X_data_test, "models/model.h5", "../input/sample_submission.csv")
2%|▏ | 62/4000 [00:00<00:06, 614.87it/s]
NPY files not found!
100%|██████████| 4000/4000 [00:05<00:00, 669.71it/s]