I have a dataset compiled of geological images. They often have unnecessary padding to the left, right, and bottom. I also have a folder containing cropped versions of these images where the padding is removed. My hope is to train this CNN model to where it can crop the unwanted parts of a newly given geological image or dataset.
I am using image segmentation and creating binary masks for each cropped image, where the area to keep is 1 and the area to crop out is 0. I train the model to predict this mask instead of coordinates of what was taken out because oftentimes the dimensions don't directly correspond to crop coordinates in the original image. The images are of varying dimensions, generally around 25000x1565, and of course, the cropped versions are smaller.
I am not resizing the images to a specific dimension, rather taking the max dimension and padding images to that size in the preprocessing function. Running this code gives me a shape mismatch error: ValueError: Arguments target and output must have the same shape. Received: target.shape=(1, 1566, 34813, 1), output.shape=(1, 1568, 34816, 1). I have print statements for sizes/shapes in the code and nothing was out of place there.
Where am I going wrong? I am a beginner in this field but I feel like training a model on cropping images shouldn't be too hard. Could this be simpler or what am I missing to make it work?
import tensorflow as tf
from tensorflow.keras import layers, models, Input
import numpy as np
from PIL import Image
import os
import matplotlib.pyplot as plt
def create_model(input_shape):
inputs = Input(shape=input_shape)
# Downsampling
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
# Upsampling
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
# Ensure output has same dimensions as input
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
outputs = layers.Conv2D(1, (1, 1), activation='sigmoid', padding='same')(x)
model = models.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
return model
def train_model(model, X_train, y_train, epochs=100, batch_size=1):
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
return history
def load_and_preprocess_data(original_dir, cropped_dir):
originals = []
masks = []
for img_name in os.listdir(original_dir):
# Process original image
orig_img = Image.open(os.path.join(original_dir, img_name))
print(f"Original image size: {orig_img.size}")
orig_array = np.array(orig_img)
print(f"Original array shape: {orig_array.shape}")
# Process cropped image
cropped_img = Image.open(os.path.join(cropped_dir, img_name))
print(f"Cropped image size: {cropped_img.size}")
cropped_array = np.array(cropped_img)
print(f"Cropped array shape: {cropped_array.shape}")
# Create mask based on original image size
mask = np.zeros((*orig_array.shape[:-1], 1), dtype=np.float32)
# Resize cropped array to match original array size
resized_cropped = np.array(Image.fromarray(cropped_array).resize(orig_img.size))
# Create mask
mask[np.any(resized_cropped > 0, axis=2)] = 1
print(f"Mask shape: {mask.shape}")
originals.append(orig_array)
masks.append(mask)
# Find the maximum dimensions
max_height = max(img.shape[0] for img in originals)
max_width = max(img.shape[1] for img in originals)
# Pad images to the maximum size
padded_originals = []
padded_masks = []
for orig, mask in zip(originals, masks):
pad_height = max_height - orig.shape[0]
pad_width = max_width - orig.shape[1]
padded_orig = np.pad(orig, ((0, pad_height), (0, pad_width), (0, 0)), mode='constant')
padded_mask = np.pad(mask, ((0, pad_height), (0, pad_width), (0, 0)), mode='constant')
padded_originals.append(padded_orig)
padded_masks.append(padded_mask)
X = np.array(padded_originals)
y = np.array(padded_masks)
print(f"Final X shape: {X.shape}")
print(f"Final y shape: {y.shape}")
return X, y
Main execution
original_dir = "uncropped"
cropped_dir = "cropped"
X, y = load_and_preprocess_data(original_dir, cropped_dir)
Convert to tensors
X = tf.convert_to_tensor(X)
y = tf.convert_to_tensor(y)
print(f"X tensor shape: {X.shape}")
print(f"y tensor shape: {y.shape}")
Create and train the model
input_shape = X.shape[1:] # (height, width, channels)
model = create_model(input_shape)
history = train_model(model, X, y, epochs=100, batch_size=1)
```