In [1]:
!pip install aicrowd-cli==0.1
API_KEY = "4c49d255257272f8caf90b1e74b8cccd"
!aicrowd login --api-key $API_KEY

[32mAPI Key valid[0m
[32mSaved API Key successfully![0m


In [2]:
!aicrowd dataset download --challenge rover-classification -j 3

sample_submission.csv:   0% 0.00/164k [00:00<?, ?B/s]
sample_submission.csv: 100% 164k/164k [00:00<00:00, 384kB/s]
sample_submission.csv: 100% 164k/164k [00:00<00:00, 384kB/s]

train.csv: 100% 689k/689k [00:00<00:00, 961kB/s]
train.csv: 100% 689k/689k [00:00<00:00, 961kB/s]

train.zip:   0% 0.00/266M [00:00<?, ?B/s][A

val.csv:   0% 0.00/64.8k [00:00<?, ?B/s][A[A

val.csv: 100% 64.8k/64.8k [00:00<00:00, 261kB/s]


val.zip:   0% 0.00/26.6M [00:00<?, ?B/s][A[A
train.zip:  13% 33.6M/266M [00:02<00:18, 12.9MB/s][A

val.zip: 100% 26.6M/26.6M [00:02<00:00, 10.8MB/s]

test.zip:  51% 33.6M/66.4M [00:05<00:05, 6.31MB/s]
train.zip:  38% 101M/266M [00:05<00:10, 15.8MB/s] [A
test.zip: 100% 66.4M/66.4M [00:08<00:00, 7.08MB/s]
test.zip: 100% 66.4M/66.4M [00:08<00:00, 7.68MB/s]
train.zip: 100% 266M/266M [00:14<00:00, 18.9MB/s]


In [3]:
!rm -rf data
!mkdir data


!unzip train.zip -d data/train >/dev/null
!unzip val.zip -d data/val >/dev/null
!unzip test.zip -d data/test >/dev/null

In [4]:
import pandas as pd
import os
import re
import tensorflow as tf

In [5]:
df_train = pd.read_csv("train.csv")

In [6]:
df_val = pd.read_csv("val.csv")

In [7]:
df_train['ImageID'] = df_train['ImageID'].astype(str)+".jpg"
df_val['ImageID'] = df_val['ImageID'].astype(str)+".jpg"

In [8]:
INPUT_SIZE = 256

In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [10]:
datagen=ImageDataGenerator(rescale=1./255.)

train_generator=datagen.flow_from_dataframe(
dataframe=df_train,
directory="data/train/",
x_col="ImageID",
y_col="label",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))

Found 40000 validated image filenames belonging to 2 classes.


In [11]:
val_generator=datagen.flow_from_dataframe(
dataframe=df_val,
directory="data/val/",
x_col="ImageID",
y_col="label",
batch_size=64,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))

Found 4000 validated image filenames belonging to 2 classes.


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers, optimizers
import os
import numpy as np
import pandas as pd

In [13]:
class CustomAugment(object):
    def __call__(self, image):        
        # Random flips and grayscale with some stochasticity
        img = self._random_apply(tf.image.flip_left_right, image, p=0.5)
        img = self._random_apply(self._color_drop, img, p=0.8)
        return img

    def _color_drop(self, x):
        image = tf.image.rgb_to_grayscale(x)
        image = tf.tile(x, [1, 1, 1, 3])
        return x
    
    def _random_apply(self, func, x, p):
        return tf.cond(
          tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32),
                  tf.cast(p, tf.float32)),
          lambda: func(x),
          lambda: x)

In [14]:
data_augmentation = tf.keras.Sequential(
  [
    tf.keras.layers.Lambda(CustomAugment()),
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(INPUT_SIZE, 
                                                              INPUT_SIZE,
                                                              3)),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
  ]
)

In [15]:
model = Sequential()
model.add(data_augmentation)
model.add(tf.keras.applications.ResNet152V2(
    include_top=False,
    weights="imagenet",
    input_shape=(INPUT_SIZE, INPUT_SIZE, 3),
))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))
model.compile(optimizers.RMSprop(lr=0.0001/10), loss="categorical_crossentropy", metrics=["Recall", "Precision"])

In [16]:
model.layers[1].trainable = False

In [17]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VAL=val_generator.n//train_generator.batch_size

In [18]:
model.fit(train_generator, validation_data=val_generator, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f20abdad210>

In [19]:
model.layers[1].trainable = True

In [20]:
len(model.layers[1].layers)

564

In [21]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, None, None, None)  0         
_________________________________________________________________
resnet152v2 (Functional)     (None, 8, 8, 2048)        58331648  
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 2048)        0         
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               67109376  
_________________________________________________________________
activation (Activation)      (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)              

In [22]:
model.layers[1].trainable = True
for layer in model.layers[1].layers[:100]:
    layer.trainable =  False

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, None, None, None)  0         
_________________________________________________________________
resnet152v2 (Functional)     (None, 8, 8, 2048)        58331648  
_________________________________________________________________
dropout (Dropout)            (None, 8, 8, 2048)        0         
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 512)               67109376  
_________________________________________________________________
activation (Activation)      (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)              

In [24]:
tf.keras.backend.set_value(model.optimizer.learning_rate, 0.0001/100)

In [25]:
model.fit(train_generator, validation_data=val_generator, epochs=2)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f20abf9d810>

In [26]:
df_test = pd.read_csv("sample_submission.csv",dtype=str)
df_test["ImageID"] = df_test["ImageID"].astype(str)+".jpg"

In [27]:
test_generator=datagen.flow_from_dataframe(
dataframe=df_test,
directory="data/test/",
x_col="ImageID",
y_col="label",
batch_size=1,
seed=42,
shuffle=False,
class_mode="categorical",
target_size=(INPUT_SIZE,INPUT_SIZE))

Found 10000 validated image filenames belonging to 2 classes.


In [28]:
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

In [29]:
STEP_SIZE_TEST

10000

In [30]:
test_generator.reset()
pred = model.predict(test_generator,
steps=STEP_SIZE_TEST, verbose=1)



In [31]:
predicted_class_indices = np.argmax(pred,axis=1)

In [32]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [33]:
df_test["pred"] = predictions

In [34]:
df_test.head()

Unnamed: 0,ImageID,label,pred
0,0.jpg,curiosity,curiosity
1,1.jpg,perseverance,curiosity
2,2.jpg,curiosity,curiosity
3,3.jpg,perseverance,perseverance
4,4.jpg,perseverance,curiosity


In [35]:
df_test.drop("label", axis=1, inplace=True)
df_test.rename(columns={"pred": "label"}, inplace=True)

In [36]:
df_test.head()

Unnamed: 0,ImageID,label
0,0.jpg,curiosity
1,1.jpg,curiosity
2,2.jpg,curiosity
3,3.jpg,perseverance
4,4.jpg,curiosity


In [37]:
df_test["ImageID"] = df_test["ImageID"].map(lambda x: re.sub(r"\D", "", str(x)))

In [38]:
df_test.head()

Unnamed: 0,ImageID,label
0,0,curiosity
1,1,curiosity
2,2,curiosity
3,3,perseverance
4,4,curiosity


In [39]:
df_test.to_csv("data/03_sub.csv", index=False)

In [40]:
!aicrowd submission create -c rover-classification -f data/03_sub.csv

[2K[1;34m03_sub.csv[0m [90m━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100.0%[0m • [32m165.5/163.9 KB[0m • [31m3.3 MB/s[0m • [36m0:00:00[0m
[?25h                                    ╭─────────────────────────╮                                     
                                    │ [1mSuccessfully submitted![0m │                                     
                                    ╰─────────────────────────╯                                     
[3m                                          Important links                                           [0m
┌──────────────────┬───────────────────────────────────────────────────────────────────────────────┐
│  This submission │[1;94m [0m[1;94mhttps://www.aicrowd.com/challenges/ai-blitz-7/submissions/126678             [0m[1;94m [0m│
│                  │                                                                               │
│  All submissions │[1;94m [0m[1;94mhttps://www.aicrowd.com/challenges/ai-blitz-7/submissions?