Create dataset from a folder of images and a txt file with labels - image-processing

I have two folders containing train and test images. And I have two files train.txt and test.txt containing the labels. I want to create a dataset for a PyTorch setting. I mean, I want to use DataLoader and I want the tuples (x_train, y_train) and (x_test, y_test). How can I do? Basically I think that in tensorflow I can use flow_from_dataframe, but in PyTorch?
As an example, in this image I show my pandas dataframe containing filenames and labels:
I am trying with a custom dataset class:
from import Dataset, DataLoader
import os
import pandas as pd
from import read_image
class CoViDxDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = annotations_file
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
if self.transform:
image = self.transform(image)
if self.target_transform:
label = self.target_transform(label)
return image, label
train_dataset = CoViDxDataset(train_df, train_path)
When I do train_dataset[0] I have this error:
RuntimeError Traceback (most recent call last)
Input In [84], in <cell line: 1>()
----> 1 train_dataset[0]
Input In [82], in CoViDxDataset.__getitem__(self, idx)
17 img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
18 print(img_path)
---> 19 image = read_image(img_path)
20 label = self.img_labels.iloc[idx, 1]
21 if self.transform:
File ~/anaconda3/envs/openfl/lib/python3.8/site-packages/torchvision/io/, in read_image(path, mode)
243 if not torch.jit.is_scripting() and not torch.jit.is_tracing():
244 _log_api_usage_once(read_image)
--> 245 data = read_file(path)
246 return decode_image(data, mode)
File ~/anaconda3/envs/openfl/lib/python3.8/site-packages/torchvision/io/, in read_file(path)
45 if not torch.jit.is_scripting() and not torch.jit.is_tracing():
46 _log_api_usage_once(read_file)
---> 47 data = torch.ops.image.read_file(path)
48 return data
RuntimeError: Expected a non empty file
where train_path = 'train/' and train_df is the image below:


Pytorch: "KeyError: Caught KeyError in DataLoader worker process 0."

Problem Description:
I am trying to load image data using Pytorch custom dataset. I did a little dive deep and found that my images set consist of 2 types of shape (512,512,3) and (1024,1024) . My assumption is, because of the above reason, it is throwing the below error.
Note: The code is able to read some of the images but, it is throwing the below error message for few of them. This was the reason to do a little EDA on the image data and found that there were 2 different shapes of images in the dataset.
Q1. How to preprocess such image data for training?
Q2. Is there any other reasons why I might be seeing the below error message?
Error message:
KeyError Traceback (most recent call last)
<ipython-input-163-aa3385de8026> in <module>
----> 1 train_features, train_labels = next(iter(train_dataloader))
2 print(f"Feature batch shape: {train_features.size()}")
3 print(f"Labels batch shape: {train_labels.size()}")
4 img = train_features[0].squeeze()
5 label = train_labels[0]
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils /data/ in __next__(self)
519 if self._sampler_iter is None:
520 self._reset()
521 data = self._next_data()
522 self._num_yielded += 1
523 if self._dataset_kind == _DatasetKind.Iterable and \
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/ in _next_data(self)
1201 else:
1202 del self._task_info[idx]
1203 return self._process_data(data)
1205 def _try_put_index(self):
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/ in _process_data(self, data)
1227 self._try_put_index()
1228 if isinstance(data, ExceptionWrapper):
1229 data.reraise()
1230 return data
~/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/ in reraise(self)
423 # have message field
424 raise self.exc_type(message=msg)
425 raise self.exc_type(msg)
KeyError: Caught KeyError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas /core/indexes/", line 2898, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 70, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 101, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 1032, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 1039, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: 16481
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/_utils/", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/_utils/", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/torch/utils/data/_utils/", line 44, in <listcomp>
data = [self.dataset[idx] for idx in possibly_batched_index]
File "<ipython-input-161-f38b78d77dcb>", line 19, in __getitem__
img_path =os.path.join(self.img_dir,self.image_ids[idx])
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/", line 882, in __getitem__
return self._get_value(key)
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/", line 990, in _get_value
loc = self.index.get_loc(label)
File "/home/ec2-user/anaconda3/envs/pytorch_p36/lib/python3.6/site-packages/pandas/core/indexes/", line 2900, in get_loc
raise KeyError(key) from err
KeyError: 16481
from import read_image
import torch
from torchvision import transforms
from sklearn.model_selection import train_test_split
from import Dataset
class CustomImageDataset(Dataset):
# init
def __init__(self,dataset,transforms=None,target_transforms=None):
#self.train_data = pd.read_csv("Data/train_data.csv")
self.image_ids = dataset.image_id
self.image_labels = dataset.label
self.img_dir = 'Data/images'
self.transforms = transforms
self.target_transforms = target_transforms
# len
def __len__(self):
return len(self.image_ids)
# getitem
def __getitem__(self,idx):
# image path
img_path =os.path.join(self.img_dir,self.image_ids[idx])
# image
image = read_image(img_path)
label = self.image_labels[idx]
# transform image
if self.transforms:
image = self.transforms(image)
# transform target
if self.target_transforms:
label = self.target_transforms(label)
return image, label
Code: train_data is the pandas object of the csv file which has the image id, labesl information.
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(train_data, test_size=0.1, random_state=42)
train_df = CustomImageDataset(X_train)
train_dataloader =
found the issue with the code.
Pytorch Custom Dataloader function "getitem" uses idx to retrieve data and my guess is, it know the range of idx from len function, ex: 0, till len(rows in dataset).
In my case, I already had a panda dataset (train_data) with idx as one of the column. When I randomly split it into X_train and X_test, few of the data rows were moved to X_test along with the idx.
Now, when I send X_train to the custom dataloader, it is trying to get row's image_id with an idx and that idx just happens to be in X_test dataset. This lead to error as keyerror: 16481 i.e row with idx=16481 is not present in the X_train dataset. It was moved to X_test during split.
I got the same error while fine-tuning the DistilBertModel transformers-based model in PyTorch while replacing its head.
I've forgotten to reset the indices of train_dataframe and test_dataframe after train_test_split that caused my CustomDatasetto index improperly.

TypeError: Cannot convert a symbolic Keras input/output to a numpy array

I am trying to do a predictions on my pretrained model , I have total of 40 classes , it is showing me predictions in epsilon numbers , I want to choose maximum from it and by using if-else , I want to classify it . It is giving me above error !
from keras.applications.inception_resnet_v2 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import imagenet_utils
import numpy as np
def prepare_image(file):
img_path = '/content/drive/MyDrive/test imgs/'
img = image.load_img(img_path + file, target_size=(224, 224))
img_array = image.img_to_array(img)
img_array_expanded_dims = np.expand_dims(img_array, axis=0)
return tf.keras.applications.inception_resnet_v2.preprocess_input(img_array_expanded_dims)
model = tf.keras.models.load_model("CNN ResNet.h5")
preprocessed_image = prepare_image('mcd.jpg')
predictions = model.predict(preprocessed_image)
print("Highest position : ", highest)
print("This class is Acer")

RuntimeError: output with shape [512] doesn't match the broadcast shape [1, 512, 1, 512] while extracting feature vector using pytorch

I am not able to resolve this error. This code is taken from
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image
pic_one = '/content/drive/My Drive/Video_Recommender/zframe1.jpg'
pic_two = '/content/drive/My Drive/Video_Recommender/zframe2.jpg'
model = models.resnet18(pretrained=True)
layer = model._modules.get('avgpool')
scaler = transforms.Scale((224, 224))
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()
def get_vector(image_name):
# 1. Load the image with Pillow library
img =
# 2. Create a PyTorch Variable with the transformed image
t_img = Variable(normalize(to_tensor(scaler(img))).unsqueeze(0))
# 3. Create a vector of zeros that will hold our feature vector
# The 'avgpool' layer has an output size of 512
my_embedding = torch.zeros(512)
# 4. Define a function that will copy the output of a layer
def copy_data(m, i, o):
# 5. Attach that function to our selected layer
h = layer.register_forward_hook(copy_data)
# 6. Run the model on our transformed image
# 7. Detach our copy function from the layer
# 8. Return the feature vector
return my_embedding
pic_one_vector = get_vector(pic_one)
pic_two_vector = get_vector(pic_two)
RuntimeError Traceback (most recent call last)
<ipython-input-41-ca2d66de2d9c> in <module>()
----> 1 pic_one_vector = get_vector(pic_one)
2 pic_two_vector = get_vector(pic_two)
5 frames
<ipython-input-40-a45affe9d8f7> in get_vector(image_name)
13 h = layer.register_forward_hook(copy_data)
14 # 6. Run the model on our transformed image
---> 15 model(t_img)
16 # 7. Detach our copy function from the layer
17 h.remove()
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
548 result = self._slow_forward(*input, **kwargs)
549 else:
--> 550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
552 hook_result = hook(self, input, result)
/usr/local/lib/python3.6/dist-packages/torchvision/models/ in forward(self, x)
219 def forward(self, x):
--> 220 return self._forward_impl(x)
/usr/local/lib/python3.6/dist-packages/torchvision/models/ in _forward_impl(self, x)
211 x = self.layer4(x)
--> 213 x = self.avgpool(x)
214 x = torch.flatten(x, 1)
215 x = self.fc(x)
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in __call__(self, *input, **kwargs)
550 result = self.forward(*input, **kwargs)
551 for hook in self._forward_hooks.values():
--> 552 hook_result = hook(self, input, result)
553 if hook_result is not None:
554 result = hook_result
<ipython-input-40-a45affe9d8f7> in copy_data(m, i, o)
9 # 4. Define a function that will copy the output of a layer
10 def copy_data(m, i, o):
---> 11 my_embedding.copy_(
12 # 5. Attach that function to our selected layer
13 h = layer.register_forward_hook(copy_data)
RuntimeError: output with shape [512] doesn't match the broadcast shape [1, 512, 1, 512]
What I am actually trying to do is trying to extract feature vector from images that I want to further use for building a recommendation system. Do inform me if there is any other alternative available.
Thanks in advance!!!
You need to reshape the output data after avgpool:
def copy_data(m, i, o):
Alternatively, you may replace the hook function with the following, just so you don't have to deal with adjusting the output shape:
# step 3 and 4
my_embedding = None
def my_hook(module_, input_, output_):
nonlocal my_output
my_embedding = output_
then simply call the following
# step 5
h = layer.register_forward_hook(my_hook)

How to use custom image loader for DataGenerator keras?

I'm trying to use a custom preprocessing function that uses OpenCV but there's a mismatch between the image loaded by the DataGenerator and the CV2 default type.
Is it possible to specify which function to use to load images?
Here is my code.
def read_and_process_image(im,im_size):
#read image from file
gray = cv2.cvtColor(im,cv2.COLOR_RGB2GRAY) # convert 2 grayscale
im_pil = Image.fromarray(gray)
_,thresh = cv2.threshold(gray,10,255,cv2.THRESH_BINARY) # turn it into a binary image
contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) # find contours
if len(contours) != 0:
#find the biggest area
cnt = max(contours, key = cv2.contourArea)
#find the bounding rect
x,y,w,h = cv2.boundingRect(cnt)
crop = im[y+r:y+h-r,x+r:x+w-r]# crop image
# resize to im_size X im_size size
#crop1 = cv2.convertScaleAbs(crop, alpha=1, beta=0.0001)
#clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
#crop1 = clahe.apply(crop1)
return crop1
return( normalize_histograms(cv2.resize(im,(im_size,im_size))) )
the preprocessing function to call:
def preprocessing_image(image):
global IM_SIZE
return image
and the DataGenerator:
train_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1./255,
val_gen = train_datagen.flow_from_dataframe(dataframe=val_data,
I get the following error:
error Traceback (most recent call last)
<ipython-input-130-c8fee3202272> in <module>
----> 1 plt.imshow(val_gen[0])
~\Anaconda3\lib\site-packages\keras_preprocessing\image\ in __getitem__(self, idx)
63 index_array = self.index_array[self.batch_size * idx:
64 self.batch_size * (idx + 1)]
---> 65 return self._get_batches_of_transformed_samples(index_array)
67 def __len__(self):
~\Anaconda3\lib\site-packages\keras_preprocessing\image\ in _get_batches_of_transformed_samples(self, index_array)
237 params = self.image_data_generator.get_random_transform(x.shape)
238 x = self.image_data_generator.apply_transform(x, params)
--> 239 x = self.image_data_generator.standardize(x)
240 batch_x[i] = x
241 # optionally save augmented images to disk for debugging purposes
~\Anaconda3\lib\site-packages\keras_preprocessing\image\ in standardize(self, x)
702 """
703 if self.preprocessing_function:
--> 704 x = self.preprocessing_function(x)
705 if self.rescale:
706 x *= self.rescale
<ipython-input-101-3a910a8620ec> in preprocessing_image(image)
15 """
16 # TODO: augment more here
---> 17 image=read_and_process_image(image,IM_SIZE)
18 return image
<ipython-input-128-aa711687f072> in read_and_process_image(im, im_size)
8 im_pil = Image.fromarray(gray)
9 _,thresh = cv2.threshold(gray,10,255,cv2.THRESH_BINARY) # turn it into a binary image
---> 10 contours,hierarchy = cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE) # find contours
12 if len(contours) != 0:
error: OpenCV(4.1.2) C:\projects\opencv-python\opencv\modules\imgproc\src\contours.cpp:197: error: (-210:Unsupported format or combination of formats) [Start]FindContours supports only CV_8UC1 images when mode != CV_RETR_FLOODFILL otherwise supports CV_32SC1 images only in function 'cvStartFindContours_Impl'
A cv2 image is nothing but a numpy array.
You can easily transform a PIL image (Keras) into a cv2 image by simply calling cv2_image = np.array(pil_image).
Since cv2 works with BGR instead of RGB, you may call cv2_image = np.flip(cv2_image, axis=-1) (if there are 3 channels)

index 400 is out of bounds for axis 1 with size 368

im trying to build a face recognition model using CNN i have an image dataset that consists of 368 classes each class contains 15 images . im trying to use keras.utils.to_categorical on the image labels(class names which are numbers from 1-368 ) to set a specific label to 1 while set the others to zero during is what i have done so far
import cv2
import numpy as np
import os
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from pathlib import Path
from sklearn.model_selection import train_test_split
SEED = 44000
data = []
label = []
BASE_FOLDER = r'C:\Users\Desktop\images\\'
folders = os.listdir(BASE_FOLDER)
for folder in folders:
for file in os.listdir(BASE_FOLDER + folder + '//'):
img = cv2.imread(BASE_FOLDER + folder + '//' + file)
train_data, test_data, train_label, test_label = train_test_split(data, label, test_size=0.1, random_state=SEED)
train_data = np.array(train_data, dtype=np.float32)
test_data = np.array(test_data, dtype=np.float32)
train_data = train_data / 180 # to make the array values between 0-1. image size is 180 X 180
test_data = test_data / 180
train_label = list(map(int, train_label))
train_label = keras.utils.to_categorical(train_label, 368)
but i am getting this error
IndexError Traceback (most recent call last)
<ipython-input-69-f087078ef22a> in <module>
----> 1 train_label = keras.utils.to_categorical(train_label, 368)
~\Miniconda3\envs\tf_gpu\lib\site-packages\keras\utils\ in to_categorical(y, num_classes, dtype)
32 n = y.shape[0]
33 categorical = np.zeros((n, num_classes), dtype=dtype)
---> 34 categorical[np.arange(n), y] = 1
35 output_shape = input_shape + (num_classes,)
36 categorical = np.reshape(categorical, output_shape)
IndexError: index 400 is out of bounds for axis 1 with size 368
whats the meaning of this error and how to solve it ?
